Re: [Mesa-dev] [PATCH v2 08/20] i965/fs: Add emit_cs_terminate to emit CS_OPCODE_CS_TERMINATE

2015-04-24 Thread Jordan Justen
On 2015-04-24 23:11:13, Kenneth Graunke wrote:
> On Friday, April 24, 2015 04:33:00 PM Jordan Justen wrote:
> > v2:
> >  * Do more work at the visitor level. g0 is loaded and sent to the
> >generator now.
> > 
> > Signed-off-by: Jordan Justen 
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
> >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 +++
> >  2 files changed, 20 insertions(+)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> > b/src/mesa/drivers/dri/i965/brw_fs.h
> > index 4e17d44..b1e65cd 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -387,6 +387,7 @@ public:
> >   bool use_2nd_half = false);
> > void emit_fb_writes();
> > void emit_urb_writes();
> > +   void emit_cs_terminate();
> >  
> > void emit_shader_time_begin();
> > void emit_shader_time_end();
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
> > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > index 11f38c2..ba8b811 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > @@ -4154,6 +4154,25 @@ fs_visitor::resolve_ud_negate(fs_reg *reg)
> > *reg = temp;
> >  }
> >  
> > +void
> > +fs_visitor::emit_cs_terminate()
> > +{
> > +   assert(brw->gen >= 7);
> > +
> > +   /* We are getting the thread ID from the compute shader header */
> > +   assert(stage == MESA_SHADER_COMPUTE);
> > +
> > +   /* Copy g0 for the message payload */
> 
> Perhaps add a comment like:
> 
> /* We can't directly send from g0, since sends with EOT have to use
>  * g112-127.  So, copy it to a virtual register,  The register allocator
>  * will make sure it uses the appropriate register range.
>  */

Ok. It sounds good to me. Unless you can think of some better wording
based on the link below.

> I was going to suggest sending from g0, but I'm guessing this is why you
> don't.  (I wonder if it's really necessary for compute...seems wise to
> do for now though...)

Luckily someone already looked into this for us. ;)

http://lists.freedesktop.org/archives/mesa-dev/2015-March/079565.html

> Patches 1-9 are:
> Reviewed-by: Kenneth Graunke 

Thanks!

-Jordan

> > +   struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
> > +   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> > +   fs_inst *inst = emit(MOV(payload, g0));
> > +   inst->force_writemask_all = true;
> > +
> > +   /* Send a message to the thread spawner to terminate the thread. */
> > +   inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
> > +   inst->eot = true;
> > +}
> > +
> >  /**
> >   * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
> >   *
> > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 11/20] i965/cs: Add brw_cs_prog_data, brw_cs_prog_key and brw_context::cs.

2015-04-24 Thread Kenneth Graunke
On Friday, April 24, 2015 04:33:03 PM Jordan Justen wrote:
> From: Paul Berry 
> 
> jordan.l.jus...@intel.com:
>  * Added brw_cs_prog_key structure
>  * Added brw_cs_prog_data::dispatch_grf_start_reg_16
>  * Added brw_cs_prog_data::no_8
>  * Added brw_cs_prog_data::local_size
>  * Added brw_cs_prog_data::simd_size
> 
> Signed-off-by: Jordan Justen 
> Reviewed-by: Kristian Høgsberg 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h | 17 
>  src/mesa/drivers/dri/i965/brw_cs.h  | 46 
> +
>  2 files changed, 63 insertions(+)
>  create mode 100644 src/mesa/drivers/dri/i965/brw_cs.h
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 134040e..07847cc 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -148,6 +148,7 @@ struct brw_vs_prog_key;
>  struct brw_vue_prog_key;
>  struct brw_wm_prog_key;
>  struct brw_wm_prog_data;
> +struct brw_cs_prog_data;
>  
>  enum brw_pipeline {
> BRW_RENDER_PIPELINE,
> @@ -429,6 +430,18 @@ struct brw_wm_prog_data {
> int urb_setup[VARYING_SLOT_MAX];
>  };
>  
> +/* Note: brw_cs_prog_data_compare() must be updated when adding fields to 
> this
> + * struct!
> + */
> +struct brw_cs_prog_data {
> +   struct brw_stage_prog_data base;
> +
> +   GLuint dispatch_grf_start_reg_16;
> +   bool no_8;
> +   unsigned local_size[3];
> +   unsigned simd_size;
> +};

Hey Jordan,

Could you clarify something for me?  For fragment shaders, we specify
SIMD8 and SIMD16 programs, and let the hardware pick which one it thinks
is more appropriate.

For compute, we may have to use SIMD8/SIMD16/SIMD32, but we only specify
one of those programs.  Am I right?

If so, I suspect we don't need the no_8 flag - we should just generate
the one program we need, and set simd_size to indicate whether it's
SIMD8/SIMD16/SIMD32.  (At least, I assume that's what this is for...)

> +
>  /**
>   * Enum representing the i965-specific vertex results that don't correspond
>   * exactly to any element of gl_varying_slot.  The values of this enum are
> @@ -1361,6 +1374,10 @@ struct brw_context
>uint32_t fast_clear_op;
> } wm;
>  
> +   struct {
> +  struct brw_stage_state base;
> +  struct brw_cs_prog_data *prog_data;
> +   } cs;
>  
> struct {
>uint32_t state_offset;
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.h 
> b/src/mesa/drivers/dri/i965/brw_cs.h
> new file mode 100644
> index 000..0e9e65a
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_cs.h
> @@ -0,0 +1,46 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +
> +#ifndef BRW_CS_H
> +#define BRW_CS_H
> +
> +#include "brw_program.h"
> +
> +struct brw_cs_prog_key {
> +   GLuint program_string_id:32;

We should just do:

   uint32_t program_string_id;

or unsigned, if you prefer...

> +   struct brw_sampler_prog_key_data tex;
> +};
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +void
> +brw_upload_cs_prog(struct brw_context *brw);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* BRW_CS_H */
> 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 19/20] i965/cs: Upload brw_cs_state

2015-04-24 Thread Kenneth Graunke
On Friday, April 24, 2015 04:33:11 PM Jordan Justen wrote:
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_cs.cpp | 80 
> 
>  src/mesa/drivers/dri/i965/brw_defines.h  |  3 ++
>  src/mesa/drivers/dri/i965/brw_state.h|  1 +
>  src/mesa/drivers/dri/i965/brw_state_upload.c |  2 +
>  4 files changed, 86 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
> b/src/mesa/drivers/dri/i965/brw_cs.cpp
> index ea74bf1..3f378a1 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
> @@ -298,3 +298,83 @@ brw_cs_precompile(struct gl_context *ctx,
>  
> return success;
>  }
> +
> +
> +static void
> +brw_upload_cs_state(struct brw_context *brw)
> +{
> +   if (!brw->cs.prog_data)
> +  return;
> +
> +   uint32_t offset;
> +   uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
> +8 * 4, 64, &offset);
> +   struct brw_stage_state *stage_state = &brw->cs.base;
> +   struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
> +   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
> +
> +   uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
> +
> prog_data->binding_table.size_bytes,
> +32, 
> &stage_state->bind_bo_offset);
> +
> +   uint32_t dwords = brw->gen < 8 ? 8 : 9;
> +   BEGIN_BATCH(dwords);
> +   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
> +
> +   if (prog_data->total_scratch) {
> +  if (brw->gen >= 8)
> + OUT_RELOC64(stage_state->scratch_bo,
> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> + ffs(prog_data->total_scratch) - 11);
> +  else
> + OUT_RELOC(stage_state->scratch_bo,
> +   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +   ffs(prog_data->total_scratch) - 11);
> +   } else {
> +  OUT_BATCH(0);
> +  if (brw->gen >= 8)
> + OUT_BATCH(0);
> +   }
> +
> +   OUT_BATCH(((brw->max_cs_threads - 1) << 16) |
> + (brw->gen >= 8 ? (2 << 8) : 0) |
> + (brw->gen >= 8 ? 0xc0 : 0xc4));
> +
> +   OUT_BATCH(0);
> +   OUT_BATCH(brw->gen >= 8 ? (2 << 16) : 0);

Perhaps add #defines for some of these values?

> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +
> +   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
> +   memcpy(bind, stage_state->surf_offset,
> +  prog_data->binding_table.size_bytes);
> +
> +   memset(desc, 0, 8 * 4);
> +
> +   int dw = 0;
> +   desc[dw++] = brw->cs.base.prog_offset;
> +   if (brw->gen >= 8)
> +  dw++; /* Kernel Start Pointer High */
> +   dw++;
> +   dw++;
> +   desc[dw++] = stage_state->bind_bo_offset;
> +

Could we just set desc[0], desc[1], etc. directly rather than having a
variable we increment?  That seems simpler to me, and matches what we do
elsewhere in the code.

> +   BEGIN_BATCH(4);
> +   OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
> +   OUT_BATCH(0);
> +   OUT_BATCH(8 * 4);
> +   OUT_BATCH(offset);
> +   ADVANCE_BATCH();
> +}
> +
> +
> +extern "C"
> +const struct brw_tracked_state brw_cs_state = {
> +   .dirty = {
> +  .mesa  = 0,
> +  .brw   = BRW_NEW_COMPUTE_PROGRAM,

I actually don't see brw->compute_program or ctx->Shader accesses, which
would be covered by BRW_NEW_COMPUTE_PROGRAM.  brw->cs.prog_data should
be covered by BRW_NEW_CS_PROG_DATA - I think you want that instead?
(Or, at least, in addition?)

> +   },
> +   .emit = brw_upload_cs_state
> +};
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index f6f8962..36f46af 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -2449,4 +2449,7 @@ enum brw_wm_barycentric_interp_mode {
>  #define SKL_MOCS_WB 9
>  #define SKL_MOCS_WT 5
>  
> +#define MEDIA_VFE_STATE 0x7000
> +#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
> +
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
> b/src/mesa/drivers/dri/i965/brw_state.h
> index cfa67b6..c3a86e6 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -93,6 +93,7 @@ extern const struct brw_tracked_state brw_drawing_rect;
>  extern const struct brw_tracked_state brw_indices;
>  extern const struct brw_tracked_state brw_vertices;
>  extern const struct brw_tracked_state brw_index_buffer;
> +extern const struct brw_tracked_state brw_cs_state;
>  extern const struct brw_tracked_state gen6_binding_table_pointers;
>  extern const struct brw_tracked_state gen6_blend_state;
>  extern const struct brw_tracked_state gen6_cc_state_pointers;
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
> b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index d086f39..7d

Re: [Mesa-dev] [PATCH v2 18/20] i965: Remove comment about chv device numbers relating to hsw gt1

2015-04-24 Thread Kenneth Graunke
On Friday, April 24, 2015 04:33:10 PM Jordan Justen wrote:
> Remove comment "These were copied from Haswell GT1, above.". Many of
> these numbers have been modified by this point, so the HSW GT1
> reference no longer seems helpful.
> 
> The comment "Thread counts and URB limits are placeholders, and may
> not be accurate." is retained for now.
> 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_device_info.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
> b/src/mesa/drivers/dri/i965/brw_device_info.c
> index 7999ba9..8938384 100644
> --- a/src/mesa/drivers/dri/i965/brw_device_info.c
> +++ b/src/mesa/drivers/dri/i965/brw_device_info.c
> @@ -278,9 +278,7 @@ static const struct brw_device_info 
> brw_device_info_bdw_gt3 = {
> }
>  };
>  
> -/* Thread counts and URB limits are placeholders, and may not be accurate.
> - * These were copied from Haswell GT1, above.
> - */
> +/* Thread counts and URB limits are placeholders, and may not be accurate. */
>  static const struct brw_device_info brw_device_info_chv = {
> GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
> .has_llc = false,
> 

I actually think they're accurate at this point, so you can just drop
the comment entirely.  A patch to do that would get my R-b.

Thanks!


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 08/20] i965/fs: Add emit_cs_terminate to emit CS_OPCODE_CS_TERMINATE

2015-04-24 Thread Kenneth Graunke
On Friday, April 24, 2015 04:33:00 PM Jordan Justen wrote:
> v2:
>  * Do more work at the visitor level. g0 is loaded and sent to the
>generator now.
> 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 +++
>  2 files changed, 20 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 4e17d44..b1e65cd 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -387,6 +387,7 @@ public:
>   bool use_2nd_half = false);
> void emit_fb_writes();
> void emit_urb_writes();
> +   void emit_cs_terminate();
>  
> void emit_shader_time_begin();
> void emit_shader_time_end();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 11f38c2..ba8b811 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -4154,6 +4154,25 @@ fs_visitor::resolve_ud_negate(fs_reg *reg)
> *reg = temp;
>  }
>  
> +void
> +fs_visitor::emit_cs_terminate()
> +{
> +   assert(brw->gen >= 7);
> +
> +   /* We are getting the thread ID from the compute shader header */
> +   assert(stage == MESA_SHADER_COMPUTE);
> +
> +   /* Copy g0 for the message payload */

Perhaps add a comment like:

/* We can't directly send from g0, since sends with EOT have to use
 * g112-127.  So, copy it to a virtual register,  The register allocator
 * will make sure it uses the appropriate register range.
 */

I was going to suggest sending from g0, but I'm guessing this is why you
don't.  (I wonder if it's really necessary for compute...seems wise to
do for now though...)

Patches 1-9 are:
Reviewed-by: Kenneth Graunke 

> +   struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
> +   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> +   fs_inst *inst = emit(MOV(payload, g0));
> +   inst->force_writemask_all = true;
> +
> +   /* Send a message to the thread spawner to terminate the thread. */
> +   inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
> +   inst->eot = true;
> +}
> +
>  /**
>   * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
>   *
> 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 6/9] nir: Add an entirely C-based linked list implementation

2015-04-24 Thread Jason Ekstrand
On Apr 24, 2015 4:46 PM, "Rob Clark"  wrote:
>
> On Fri, Apr 24, 2015 at 7:32 PM, Jason Ekstrand 
wrote:
> > This commit adds a C-based linked list implementation for NIR.  Unlike
> > exec_list in glsl/list.h, there is no C++ API.  Also, this list is
based on
> > wl_list (from the Wayland project) which is, in turn, based on the
kernel
> > list.  As such, it should be fairly familiar to people who have done
> > anything in kernel space.
> >
> > Doesn't exec_list already have a C api?
> >
> > Yes, it does.  However, exec_list has C++ constructors for exec_list and
> > exec_node.  In the patches that follow, I use linked lists for use/def
sets
> > for registers and SSA values.  In order to do so, I have to be able to
> > place lists and links inside of unions.  Since exec_list and exec_node
have
> > constructors, doing so causes any C++ code that includes nir.h to die
in a
> > fire.  Therefore, we can't just use exec_list.
> >
> > What about simple_list?  Why re-create it?
> >
> > I thought about that too.  However, the simple_list is badly named and
the
> > API isn't that great.  Making it usable as a first-class datastructure
> > would have taken as much work as adding nir_list.  Also, simple_list
isn't
> > really a standard as it's only ever used in errors.c and the vc4 driver.
> >
> > Why a kernel list; why not keep the symantics of exec_list?
> >
> > The short version:  I like it better.  Also, while exec_list is
familiar to
> > people who have worked inside the mesa GLSL compiler, I think that the
> > kernel list will be more familiar to people in the open-source graphics
> > community in general.  For whatever it's worth, I explicitly designed it
> > with separate nir_list and nir_link structures so that we can switch
from
> > kernel list to exec_list symantics if we want to.
>
> jfwiw, I am in favor of kernel(ish) lists.. although (as mentioned on
> irc) maybe we just want to hoist gallium's u_double_list.h out into
> util so it can be used more widely.  (fwiw, I am already using
> u_double_list in freedreno)

I took a quick look through it.  While I like the nir_list api a little
better, it would work just as well.  I'm not going to quibble to much over
what gets used.  I prefer C99 iterators, it's a *lot* better than
simple_list.
--Jason

> > Why put this in NIR and not in util?
> >
> > At the moment, NIR is the only user.  I do expect that Eric may want to
use
> > it in vc4 over simple_list.  However, vc4 is already using NIR anyway,
so
> > it's not really that polluting.
> >
> > It has also been suggested by Ken that we just pull the C bits out of
> > exec_list and keep one underlying implementation for both C and C++ only
> > with different names.  While I think that this is definitely doable and
may
> > be the best long-term solution, I didn't want to do that refactoring
prior
> > to getting this series up-and-going and adding a list was easier.  I'm
ok
> > with doing that instead of adding a list.
> > ---
> >  src/glsl/Makefile.sources |   1 +
> >  src/glsl/nir/nir_list.h   | 183
++
> >  2 files changed, 184 insertions(+)
> >  create mode 100644 src/glsl/nir/nir_list.h
> >
> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> > index c471eca..fa51dcb 100644
> > --- a/src/glsl/Makefile.sources
> > +++ b/src/glsl/Makefile.sources
> > @@ -28,6 +28,7 @@ NIR_FILES = \
> > nir/nir_from_ssa.c \
> > nir/nir_intrinsics.c \
> > nir/nir_intrinsics.h \
> > +   nir/nir_list.h \
> > nir/nir_live_variables.c \
> > nir/nir_lower_alu_to_scalar.c \
> > nir/nir_lower_atomics.c \
> > diff --git a/src/glsl/nir/nir_list.h b/src/glsl/nir/nir_list.h
> > new file mode 100644
> > index 000..330a660
> > --- /dev/null
> > +++ b/src/glsl/nir/nir_list.h
> > @@ -0,0 +1,183 @@
> > +/*
> > + * Copyright © 2015 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
obtaining a
> > + * copy of this software and associated documentation files (the
"Software"),
> > + * to deal in the Software without restriction, including without
limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom
the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including
the next
> > + * paragraph) shall be included in all copies or substantial portions
of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
> > + *

Re: [Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages

2015-04-24 Thread Neil Roberts
Kenneth Graunke  writes:

> I like this idea!
>
> We definitely need to skip this optimization on Gen4, since the Gen4/G45
> sampler infers the texturing opcode based on the message length.  But
> for Gen5+, it should be no problem.

Ah ok, yes, I will add this.

> Matt mentioned that we have to emit zero in some cases due to hardware
> bugs.  IIRC, we used to skip some parameters in the middle - i.e. if the
> message took "u, v, r, lod"...and we were using a 2D texture...we'd omit
> 'r', since it shouldn't matter.  But it did matter - and had to be
> zeroed.  I think skipping ones at the end and reducing mlen should be
> fine.

Ok, that sounds good.

> Why not do this for all texture messages, though?  Or for that matter, all
> messages?  inst->is_tex() or inst->mlen > 0 might make sense.

Yes, I think you're right. I was just trying to be a bit conservative so
that I can test the few cases that it hits. We probably don't have many
Piglit tests that would hit this optimisation. I guess it wouldn't be
too hard to write some though.

>
>> + fs_inst *load_payload = (fs_inst *) inst->prev;
>> +
>> + if (load_payload->is_head_sentinel() ||
>> + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
>> +continue;
>> +
>> + /* We don't want to remove the message header. Removing all of the
>> +  * parameters is avoided because it seems to cause a GPU hang but I
>> +  * can't find any documentation indicating that this is expected.
>> +  */
>> + while (inst->mlen > inst->header_present + dispatch_width / 8 &&
>> +load_payload->src[(inst->mlen - inst->header_present) /
>> +  (dispatch_width / 8) - 1].is_zero()) {
>> +inst->mlen -= dispatch_width / 8;
>> +progress = true;
>> + }
>
> Another idea...you could just create a new LOAD_PAYLOAD for what you
> want, and leave the old one in place just in case it's used (with the
> assumption that it's probably not, and dead code elimination will make
> it go away).  Just a suggestion.

I'm actually modifying the sample instruction not the LOAD_PAYLOAD
instruction so it should still be in place unmodified. Unless I've
misunderstood I don't think there's a problem here with accidentally
eliminating a payload that is later used.

Thanks for the review.

Regards,
- Neil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages

2015-04-24 Thread Matt Turner
On Fri, Apr 24, 2015 at 5:15 PM, Neil Roberts  wrote:
> Matt Turner  writes:
>
>>> +   foreach_block_and_inst(block, fs_inst, inst, cfg) {
>>> +  if ((inst->opcode == SHADER_OPCODE_TEX ||
>>> +   inst->opcode == SHADER_OPCODE_TXF) &&
>>> +  !inst->shadow_compare) {
>>> + fs_inst *load_payload = (fs_inst *) inst->prev;
>>> +
>>> + if (load_payload->is_head_sentinel() ||
>>> + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
>>> +continue;
>>
>> We can't guarantee that the load_payload isn't used by another texture
>> later in the program, and since you need to change the texture
>> operation's mlen, I think you need to check that the load_payload
>> isn't used after this texture operation.
>>
>> To do that, (1) add an ip variable and initialize it to -1, (2) add
>> ip++ as the first statement in the foreach_block_and_inst loop, (3)
>> add some code to this check similar to in
>> brw_fs_saturate_propagation.cpp using this->live_intervals.
>
> Hrm, I'm not modifying the LOAD_PAYLOAD instruction, only the
> SHADRE_OPCODE_TEX/TXF instruction. If there is a later instruction that
> is using the LOAD_PAYLOAD, won't that end up making it's own dependency
> on the MOV instructions so they won't get removed?

On Fri, Apr 24, 2015 at 5:15 PM, Neil Roberts  wrote:
> Matt Turner  writes:
>
>>> +   foreach_block_and_inst(block, fs_inst, inst, cfg) {
>>> +  if ((inst->opcode == SHADER_OPCODE_TEX ||
>>> +   inst->opcode == SHADER_OPCODE_TXF) &&
>>> +  !inst->shadow_compare) {
>>> + fs_inst *load_payload = (fs_inst *) inst->prev;
>>> +
>>> + if (load_payload->is_head_sentinel() ||
>>> + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
>>> +continue;
>>
>> We can't guarantee that the load_payload isn't used by another texture
>> later in the program, and since you need to change the texture
>> operation's mlen, I think you need to check that the load_payload
>> isn't used after this texture operation.
>>
>> To do that, (1) add an ip variable and initialize it to -1, (2) add
>> ip++ as the first statement in the foreach_block_and_inst loop, (3)
>> add some code to this check similar to in
>> brw_fs_saturate_propagation.cpp using this->live_intervals.
>
> Hrm, I'm not modifying the LOAD_PAYLOAD instruction, only the
> SHADRE_OPCODE_TEX/TXF instruction. If there is a later instruction that
> is using the LOAD_PAYLOAD, won't that end up making it's own dependency
> on the MOV instructions so they won't get removed?

Oh, you're right. I misread what the patch was doing. Indeed, I think
this should work fine.

>>> +   OPT(opt_zero_samples);
>>
>> I think you're probably right that this can be done after the
>> optimization loop. I guess it's possible that we might trim a texture
>> payload down and it'll then be the same as an existing payload and we
>> can then CSE them. I'd be interested to see if putting it inside the
>> optimization loop improves anything.
>
> Hrm, it might be worth trying. However, as I mentioned above, I'm not
> modifying the LOAD_PAYLOAD instruction so it probably won't hit the
> example you mentioned.

Right, okay. I don't mind if you want to try that later.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages

2015-04-24 Thread Neil Roberts
Matt Turner  writes:

>> +   foreach_block_and_inst(block, fs_inst, inst, cfg) {
>> +  if ((inst->opcode == SHADER_OPCODE_TEX ||
>> +   inst->opcode == SHADER_OPCODE_TXF) &&
>> +  !inst->shadow_compare) {
>> + fs_inst *load_payload = (fs_inst *) inst->prev;
>> +
>> + if (load_payload->is_head_sentinel() ||
>> + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
>> +continue;
>
> We can't guarantee that the load_payload isn't used by another texture
> later in the program, and since you need to change the texture
> operation's mlen, I think you need to check that the load_payload
> isn't used after this texture operation.
>
> To do that, (1) add an ip variable and initialize it to -1, (2) add
> ip++ as the first statement in the foreach_block_and_inst loop, (3)
> add some code to this check similar to in
> brw_fs_saturate_propagation.cpp using this->live_intervals.

Hrm, I'm not modifying the LOAD_PAYLOAD instruction, only the
SHADRE_OPCODE_TEX/TXF instruction. If there is a later instruction that
is using the LOAD_PAYLOAD, won't that end up making it's own dependency
on the MOV instructions so they won't get removed?

>> +   OPT(opt_zero_samples);
>
> I think you're probably right that this can be done after the
> optimization loop. I guess it's possible that we might trim a texture
> payload down and it'll then be the same as an existing payload and we
> can then CSE them. I'd be interested to see if putting it inside the
> optimization loop improves anything.

Hrm, it might be worth trying. However, as I mentioned above, I'm not
modifying the LOAD_PAYLOAD instruction so it probably won't hit the
example you mentioned.

Thanks for the review.

Regards,
- Neil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 15/22] i965/gen9: Use _mesa_meta_pbo_TexSubImage to write to YF/YS surfaces

2015-04-24 Thread Pohjolainen, Topi
On Fri, Apr 24, 2015 at 05:12:34PM -0700, Anuj Phogat wrote:
> On Fri, Apr 24, 2015 at 12:22 PM, Pohjolainen, Topi
>  wrote:
> > On Fri, Apr 17, 2015 at 04:51:36PM -0700, Anuj Phogat wrote:
> >> No other path currently supports uploading data to these surfaces.
> >>
> >> Signed-off-by: Anuj Phogat 
> >> ---
> >>  src/mesa/drivers/dri/i965/intel_tex_image.c| 24 
> >> ++--
> >>  src/mesa/drivers/dri/i965/intel_tex_subimage.c | 23 
> >> +--
> >>  2 files changed, 43 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
> >> b/src/mesa/drivers/dri/i965/intel_tex_image.c
> >> index 31cbabe..03db100 100644
> >> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> >> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> >> @@ -93,8 +93,10 @@ intelTexImage(struct gl_context * ctx,
> >>const struct gl_pixelstore_attrib *unpack)
> >>  {
> >> struct intel_texture_image *intelImage = intel_texture_image(texImage);
> >> +   struct brw_context *brw = brw_context(ctx);
> >> bool ok;
> >> -
> >> +   bool create_pbo = false;
> >> +   uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE;
> >> bool tex_busy = intelImage->mt && 
> >> drm_intel_bo_busy(intelImage->mt->bo);
> >>
> >> DBG("%s mesa_format %s target %s format %s type %s level %d 
> >> %dx%dx%d\n",
> >> @@ -111,15 +113,33 @@ intelTexImage(struct gl_context * ctx,
> >>
> >> assert(intelImage->mt);
> >>
> >> +   if (brw->gen >= 9) {
> >> +  tr_mode = intelImage->mt->tr_mode;
> >> +
> >> +  /* Set create_pbo = true for surfaces with 
> >> INTEL_MIPTREE_TRMODE_{YF/YS}.
> >> +   * _mesa_meta_pbo_TexSubImage() is the only working path to upload 
> >> data
> >> +   * to such surfaces.
> >> +   */
> >> +  create_pbo = tex_busy || (intelImage->mt &&
> >> +   intelImage->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
> >> +   } else {
> >> +  create_pbo = tex_busy;
> >> +   }
> >> +
> >
> > What would you think about a helper?
> Yes, we can use a helper here.
> >
> > static bool need_to_create_pbo(const struct intel_texture_image *img)
> > {
> >if (!img->mt)
> >   return false;
> >
> >const bool tex_busy = drm_intel_bo_busy(img->mt->bo);
> >if (tex_busy || brw->gen < 9)
> >   return tex_busy;
> >
> >/* Set create_pbo = true for surfaces with INTEL_MIPTREE_TRMODE_{YF/YS}.
> > * _mesa_meta_pbo_TexSubImage() is the only working path to upload data
> > * to such surfaces.
> > */
> >return img->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE;
> > }
> >
> >> ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage, 0, 0, 0,
> >> texImage->Width, texImage->Height,
> >> texImage->Depth,
> >> format, type, pixels,
> >> false /*allocate_storage*/,
> >> -   tex_busy, unpack);
> >> +   create_pbo, unpack);
> >> if (ok)
> >>return;
> >>
> >> +   /* Currently there are no fallback paths to upload data to surfaces 
> >> with
> >> +* tr_mode != INTEL_MIPTREE_TRMODE_NONE.
> >> +*/
> >> +   assert(tr_mode == INTEL_MIPTREE_TRMODE_NONE);
> >
> > And I would put this assertion into _mesa_meta_pbo_TexSubImage() instead
> > of duplicating it for both callers. What do you think?
> >
> Meta functions are not supposed to use any Intel specific code. I'm anyway
> getting rid of these assertions in my later patches. So, I'll just keep them
> here.

Ah, of course, sorry for the noise.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 15/22] i965/gen9: Use _mesa_meta_pbo_TexSubImage to write to YF/YS surfaces

2015-04-24 Thread Anuj Phogat
On Fri, Apr 24, 2015 at 12:22 PM, Pohjolainen, Topi
 wrote:
> On Fri, Apr 17, 2015 at 04:51:36PM -0700, Anuj Phogat wrote:
>> No other path currently supports uploading data to these surfaces.
>>
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/mesa/drivers/dri/i965/intel_tex_image.c| 24 ++--
>>  src/mesa/drivers/dri/i965/intel_tex_subimage.c | 23 +--
>>  2 files changed, 43 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
>> b/src/mesa/drivers/dri/i965/intel_tex_image.c
>> index 31cbabe..03db100 100644
>> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
>> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
>> @@ -93,8 +93,10 @@ intelTexImage(struct gl_context * ctx,
>>const struct gl_pixelstore_attrib *unpack)
>>  {
>> struct intel_texture_image *intelImage = intel_texture_image(texImage);
>> +   struct brw_context *brw = brw_context(ctx);
>> bool ok;
>> -
>> +   bool create_pbo = false;
>> +   uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE;
>> bool tex_busy = intelImage->mt && drm_intel_bo_busy(intelImage->mt->bo);
>>
>> DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
>> @@ -111,15 +113,33 @@ intelTexImage(struct gl_context * ctx,
>>
>> assert(intelImage->mt);
>>
>> +   if (brw->gen >= 9) {
>> +  tr_mode = intelImage->mt->tr_mode;
>> +
>> +  /* Set create_pbo = true for surfaces with 
>> INTEL_MIPTREE_TRMODE_{YF/YS}.
>> +   * _mesa_meta_pbo_TexSubImage() is the only working path to upload 
>> data
>> +   * to such surfaces.
>> +   */
>> +  create_pbo = tex_busy || (intelImage->mt &&
>> +   intelImage->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
>> +   } else {
>> +  create_pbo = tex_busy;
>> +   }
>> +
>
> What would you think about a helper?
Yes, we can use a helper here.
>
> static bool need_to_create_pbo(const struct intel_texture_image *img)
> {
>if (!img->mt)
>   return false;
>
>const bool tex_busy = drm_intel_bo_busy(img->mt->bo);
>if (tex_busy || brw->gen < 9)
>   return tex_busy;
>
>/* Set create_pbo = true for surfaces with INTEL_MIPTREE_TRMODE_{YF/YS}.
> * _mesa_meta_pbo_TexSubImage() is the only working path to upload data
> * to such surfaces.
> */
>return img->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE;
> }
>
>> ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage, 0, 0, 0,
>> texImage->Width, texImage->Height,
>> texImage->Depth,
>> format, type, pixels,
>> false /*allocate_storage*/,
>> -   tex_busy, unpack);
>> +   create_pbo, unpack);
>> if (ok)
>>return;
>>
>> +   /* Currently there are no fallback paths to upload data to surfaces with
>> +* tr_mode != INTEL_MIPTREE_TRMODE_NONE.
>> +*/
>> +   assert(tr_mode == INTEL_MIPTREE_TRMODE_NONE);
>
> And I would put this assertion into _mesa_meta_pbo_TexSubImage() instead
> of duplicating it for both callers. What do you think?
>
Meta functions are not supposed to use any Intel specific code. I'm anyway
getting rid of these assertions in my later patches. So, I'll just keep them
here.

>> +
>> ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
>> 0, 0, 0, /*x,y,z offsets*/
>> texImage->Width,
>> diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
>> b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
>> index 909ff25..a7ad10e 100644
>> --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
>> +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
>> @@ -200,8 +200,10 @@ intelTexSubImage(struct gl_context * ctx,
>>   const struct gl_pixelstore_attrib *packing)
>>  {
>> struct intel_texture_image *intelImage = intel_texture_image(texImage);
>> +   struct brw_context *brw = brw_context(ctx);
>> bool ok;
>> -
>> +   bool create_pbo = false;
>> +   uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE;
>> bool tex_busy = intelImage->mt && drm_intel_bo_busy(intelImage->mt->bo);
>>
>> DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
>> @@ -210,13 +212,30 @@ intelTexSubImage(struct gl_context * ctx,
>> _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
>> texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
>>
>> +   if (brw->gen >= 9) {
>> +  tr_mode = intelImage->mt->tr_mode;
>> +  /* Set create_pbo = true for surfaces with 
>> INTEL_MIPTREE_TRMODE_{YF/YS}.
>> +   * _mesa_meta_pbo_TexSubImage() is the only working path to upload 
>> data
>> +   * to such surfaces.
>> +   */
>> +  create_pbo = tex_busy || (intelImage->mt &&
>> +   intelImage->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE

Re: [Mesa-dev] [RFC 6/9] nir: Add an entirely C-based linked list implementation

2015-04-24 Thread Rob Clark
On Fri, Apr 24, 2015 at 7:32 PM, Jason Ekstrand  wrote:
> This commit adds a C-based linked list implementation for NIR.  Unlike
> exec_list in glsl/list.h, there is no C++ API.  Also, this list is based on
> wl_list (from the Wayland project) which is, in turn, based on the kernel
> list.  As such, it should be fairly familiar to people who have done
> anything in kernel space.
>
> Doesn't exec_list already have a C api?
>
> Yes, it does.  However, exec_list has C++ constructors for exec_list and
> exec_node.  In the patches that follow, I use linked lists for use/def sets
> for registers and SSA values.  In order to do so, I have to be able to
> place lists and links inside of unions.  Since exec_list and exec_node have
> constructors, doing so causes any C++ code that includes nir.h to die in a
> fire.  Therefore, we can't just use exec_list.
>
> What about simple_list?  Why re-create it?
>
> I thought about that too.  However, the simple_list is badly named and the
> API isn't that great.  Making it usable as a first-class datastructure
> would have taken as much work as adding nir_list.  Also, simple_list isn't
> really a standard as it's only ever used in errors.c and the vc4 driver.
>
> Why a kernel list; why not keep the symantics of exec_list?
>
> The short version:  I like it better.  Also, while exec_list is familiar to
> people who have worked inside the mesa GLSL compiler, I think that the
> kernel list will be more familiar to people in the open-source graphics
> community in general.  For whatever it's worth, I explicitly designed it
> with separate nir_list and nir_link structures so that we can switch from
> kernel list to exec_list symantics if we want to.

jfwiw, I am in favor of kernel(ish) lists.. although (as mentioned on
irc) maybe we just want to hoist gallium's u_double_list.h out into
util so it can be used more widely.  (fwiw, I am already using
u_double_list in freedreno)

BR,
-R


> Why put this in NIR and not in util?
>
> At the moment, NIR is the only user.  I do expect that Eric may want to use
> it in vc4 over simple_list.  However, vc4 is already using NIR anyway, so
> it's not really that polluting.
>
> It has also been suggested by Ken that we just pull the C bits out of
> exec_list and keep one underlying implementation for both C and C++ only
> with different names.  While I think that this is definitely doable and may
> be the best long-term solution, I didn't want to do that refactoring prior
> to getting this series up-and-going and adding a list was easier.  I'm ok
> with doing that instead of adding a list.
> ---
>  src/glsl/Makefile.sources |   1 +
>  src/glsl/nir/nir_list.h   | 183 
> ++
>  2 files changed, 184 insertions(+)
>  create mode 100644 src/glsl/nir/nir_list.h
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index c471eca..fa51dcb 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -28,6 +28,7 @@ NIR_FILES = \
> nir/nir_from_ssa.c \
> nir/nir_intrinsics.c \
> nir/nir_intrinsics.h \
> +   nir/nir_list.h \
> nir/nir_live_variables.c \
> nir/nir_lower_alu_to_scalar.c \
> nir/nir_lower_atomics.c \
> diff --git a/src/glsl/nir/nir_list.h b/src/glsl/nir/nir_list.h
> new file mode 100644
> index 000..330a660
> --- /dev/null
> +++ b/src/glsl/nir/nir_list.h
> @@ -0,0 +1,183 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Jason Ekstrand (ja...@jlekstrand.net)
> + *
> + */
> +
> +#pragma once
> +
> +#ifndef _NIR_LIST_H_
> +#define _NIR_LIST_H_
> +
> +/** A simple linked list implementation.
> + *
> + * This linked list implementation is based on wl_list from the Wayland
> + * project which is, in turn, based on the kernel list.  As such

[Mesa-dev] [PATCH] mesa: the function name appears to have a gl prefix already

2015-04-24 Thread Ilia Mirkin
Currently we're producing errors like

User error: GL_INVALID_OPERATION in glglDeleteProgramsARB(invalid call)

And noop_warn appears to be called with the full function name. Don't
prepend a gl prefix.

Signed-off-by: Ilia Mirkin 
---
 src/mesa/main/context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index adf6497..6047cdf 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -894,14 +894,14 @@ update_default_objects(struct gl_context *ctx)
  * If there's no current OpenGL context for the calling thread, we can
  * print a message to stderr.
  *
- * \param name  the name of the OpenGL function, without the "gl" prefix
+ * \param name  the name of the OpenGL function
  */
 static void
 nop_handler(const char *name)
 {
GET_CURRENT_CONTEXT(ctx);
if (ctx) {
-  _mesa_error(ctx, GL_INVALID_OPERATION, "gl%s(invalid call)", name);
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid call)", name);
}
 #if defined(DEBUG)
else if (getenv("MESA_DEBUG") || getenv("LIBGL_DEBUG")) {
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Upload atomic buffer state for compute shaders

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
git://people.freedesktop.org/~jljusten/mesa i965-cs-atomic-counters-v1

 src/mesa/drivers/dri/i965/brw_context.h  |  2 +-
 src/mesa/drivers/dri/i965/brw_state.h|  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |  2 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 25 
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index e679d2b..48e8076 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1437,7 +1437,7 @@ struct brw_context
 
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[57];
-   const struct brw_tracked_state compute_atoms[2];
+   const struct brw_tracked_state compute_atoms[3];
 
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 4b3184c..7e2c74b 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -84,6 +84,7 @@ extern const struct brw_tracked_state brw_gs_binding_table;
 extern const struct brw_tracked_state brw_vs_binding_table;
 extern const struct brw_tracked_state brw_wm_ubo_surfaces;
 extern const struct brw_tracked_state brw_wm_abo_surfaces;
+extern const struct brw_tracked_state brw_cs_abo_surfaces;
 extern const struct brw_tracked_state brw_wm_unit;
 extern const struct brw_tracked_state brw_interpolation_map;
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 686a3da..84b0861 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -250,6 +250,7 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
&brw_state_base_address,
+   &brw_cs_abo_surfaces,
&brw_cs_state,
 };
 
@@ -332,6 +333,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
&gen8_state_base_address,
+   &brw_cs_abo_surfaces,
&brw_cs_state,
 };
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 161d140..62b606c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -986,6 +986,31 @@ const struct brw_tracked_state brw_wm_abo_surfaces = {
.emit = brw_upload_wm_abo_surfaces,
 };
 
+static void
+brw_upload_cs_abo_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog =
+  ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+   if (prog) {
+  /* CACHE_NEW_WM_PROG */
+  brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
+  &brw->cs.prog_data->base);
+   }
+}
+
+const struct brw_tracked_state brw_cs_abo_surfaces = {
+   .dirty = {
+  .mesa = _NEW_PROGRAM,
+  .brw = BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_BATCH |
+ BRW_NEW_ATOMIC_BUFFER,
+   },
+   .emit = brw_upload_cs_abo_surfaces,
+};
+
 void
 gen4_init_vtable_surface_functions(struct brw_context *brw)
 {
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] i965/cs: Implement brw_emit_gpgpu_walker

2015-04-24 Thread Jordan Justen
Tested on Ivybridge, Haswell and Broadwell.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_compute.c | 39 -
 src/mesa/drivers/dri/i965/brw_defines.h |  1 +
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index baed701..06ef448 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -31,12 +31,49 @@
 #include "brw_draw.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
+#include "brw_defines.h"
 
 
 static void
 brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 {
-   _mesa_problem(&brw->ctx, "TODO: implement brw_emit_gpgpu_walker");
+   const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
+
+   const unsigned simd_size = prog_data->simd_size;
+   unsigned group_size = prog_data->local_size[0] *
+  prog_data->local_size[1] * prog_data->local_size[2];
+   unsigned thread_width_max =
+  (group_size + simd_size - 1) / simd_size;
+
+   uint32_t right_mask = (1u << simd_size) - 1;
+   const unsigned right_non_aligned = group_size & (simd_size - 1);
+   if (right_non_aligned != 0)
+  right_mask >>= (simd_size - right_non_aligned);
+
+   uint32_t dwords = brw->gen < 8 ? 11 : 15;
+   BEGIN_BATCH(dwords);
+   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+   OUT_BATCH(0);
+   if (brw->gen >= 8) {
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+   }
+   assert(thread_width_max <= brw->max_cs_threads);
+   OUT_BATCH(((simd_size == 8) ? 0 : 1) << 30 |
+ (thread_width_max - 1));
+   OUT_BATCH(0);
+   if (brw->gen >= 8)
+  OUT_BATCH(0);
+   OUT_BATCH(num_groups[0]);
+   OUT_BATCH(0);
+   if (brw->gen >= 8)
+  OUT_BATCH(0);
+   OUT_BATCH(num_groups[1]);
+   OUT_BATCH(0);
+   OUT_BATCH(num_groups[2]);
+   OUT_BATCH(right_mask);
+   OUT_BATCH(0x);
+   ADVANCE_BATCH();
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 36f46af..cd25511 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2451,5 +2451,6 @@ enum brw_wm_barycentric_interp_mode {
 
 #define MEDIA_VFE_STATE 0x7000
 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+#define GPGPU_WALKER0x7105
 
 #endif
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] i965/cs: Emit state base address

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  | 2 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 7fd50f4..e93057f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1435,7 +1435,7 @@ struct brw_context
 
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[57];
-   const struct brw_tracked_state compute_atoms[1];
+   const struct brw_tracked_state compute_atoms[2];
 
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7d0dc8f..15d6953 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -249,6 +249,7 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
+   &brw_state_base_address,
&brw_cs_state,
 };
 
@@ -330,6 +331,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
 
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
+   &gen8_state_base_address,
&brw_cs_state,
 };
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] i965: Implement DispatchCompute() back-end

2015-04-24 Thread Jordan Justen
From: Paul Berry 

brw_emit_gpgpu_walker will be implemented in a subsequent patch.

Reviewed-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_compute.c| 121 +
 src/mesa/drivers/dri/i965/brw_context.c|   1 +
 src/mesa/drivers/dri/i965/brw_context.h|   4 +
 4 files changed, 127 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_compute.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index cf5dba4..1ae93e1 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -17,6 +17,7 @@ i965_FILES = \
brw_clip_tri.c \
brw_clip_unfilled.c \
brw_clip_util.c \
+   brw_compute.c \
brw_context.c \
brw_context.h \
brw_cs.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
new file mode 100644
index 000..baed701
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+
+#include "main/condrender.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/state.h"
+#include "brw_context.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+
+static void
+brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
+{
+   _mesa_problem(&brw->ctx, "TODO: implement brw_emit_gpgpu_walker");
+}
+
+
+static void
+brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
+{
+   struct brw_context *brw = brw_context(ctx);
+   int estimated_buffer_space_needed;
+   bool fail_next = false;
+
+   if (!_mesa_check_conditional_render(ctx))
+  return;
+
+   if (ctx->NewState)
+  _mesa_update_state(ctx);
+
+   brw_validate_textures(brw);
+
+   const int sampler_state_size = 16; /* 16 bytes */
+   estimated_buffer_space_needed = 512; /* batchbuffer commands */
+   estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT *
+ (sampler_state_size +
+  sizeof(struct 
gen5_sampler_default_color)));
+   estimated_buffer_space_needed += 1024; /* push constants */
+   estimated_buffer_space_needed += 512; /* misc. pad */
+
+   /* Flush the batch if it's approaching full, so that we don't wrap while
+* we've got validated state that needs to be in the same batch as the
+* primitives.
+*/
+   intel_batchbuffer_require_space(brw, estimated_buffer_space_needed,
+   RENDER_RING);
+   intel_batchbuffer_save_state(brw);
+
+ retry:
+   brw->no_batch_wrap = true;
+   brw_upload_compute_state(brw);
+
+   brw_emit_gpgpu_walker(brw, num_groups);
+
+   brw->no_batch_wrap = false;
+
+   if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
+  if (!fail_next) {
+ intel_batchbuffer_reset_to_saved(brw);
+ intel_batchbuffer_flush(brw);
+ fail_next = true;
+ goto retry;
+  } else {
+ if (intel_batchbuffer_flush(brw) == -ENOSPC) {
+static bool warned = false;
+
+if (!warned) {
+   fprintf(stderr, "i965: Single compute shader dispatch "
+   "exceeded available aperture space\n");
+   warned = true;
+}
+ }
+  }
+   }
+
+   /* Now that we know we haven't run out of aperture space, we can safely
+* reset the dirty bits.
+*/
+   brw_compute_state_finished(brw);
+
+   if (brw->always_flush_batch)
+  intel_batchbuffer_flush(brw);
+
+   brw_state_cache_check_size(brw);
+
+   /* Note: since compute shaders can't write to framebuffers, there's no need
+* to call brw_postdraw_set_buffers_need_resolve().
+*/
+}
+
+
+void
+brw_init

[Mesa-dev] [PATCH 7/7] i965/cs: Emit MEDIA_STATE_FLUSH after WALKER

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_compute.c | 5 +
 src/mesa/drivers/dri/i965/brw_defines.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 06ef448..d41d68a 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -74,6 +74,11 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint 
*num_groups)
OUT_BATCH(right_mask);
OUT_BATCH(0x);
ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(MEDIA_STATE_FLUSH << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index cd25511..4c88309 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2451,6 +2451,7 @@ enum brw_wm_barycentric_interp_mode {
 
 #define MEDIA_VFE_STATE 0x7000
 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+#define MEDIA_STATE_FLUSH   0x7004
 #define GPGPU_WALKER0x7105
 
 #endif
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] mesa/cs: Add DispatchCompute() to driver function table.

2015-04-24 Thread Jordan Justen
From: Paul Berry 

Reviewed-by: Jordan Justen 
---
 src/mesa/main/dd.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 0c1a13f..a329d9c 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1005,6 +1005,13 @@ struct dd_function_table {
 
void (*MemoryBarrier)(struct gl_context *ctx, GLbitfield barriers);
/** @} */
+
+   /**
+* \name GL_ARB_compute_shader interface
+*/
+   /*@{*/
+   void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
+   /*@}*/
 };
 
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] i965/state: Emit pipeline select when changing pipelines

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  2 ++
 src/mesa/drivers/dri/i965/brw_misc_state.c   | 23 +--
 src/mesa/drivers/dri/i965/brw_state.h| 12 
 src/mesa/drivers/dri/i965/brw_state_upload.c |  5 +
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index a1b835b..e679d2b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1138,6 +1138,8 @@ struct brw_context
   struct brw_state_flags pipelines[BRW_NUM_PIPELINES];
} state;
 
+   enum brw_pipeline last_pipeline;
+
struct brw_cache cache;
 
/** IDs for meta stencil blit shader programs. */
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 78a46cb..67a693b 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -854,6 +854,22 @@ const struct brw_tracked_state brw_line_stipple = {
 };
 
 
+void
+brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
+{
+   const bool is_965 = brw->gen == 4 && !brw->is_g4x;
+   const uint32_t _3DSTATE_PIPELINE_SELECT =
+  is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
+
+   /* Select the pipeline */
+   BEGIN_BATCH(1);
+   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
+ (brw->gen >= 9 ? (3 << 8) : 0) |
+ (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
+   ADVANCE_BATCH();
+}
+
+
 /***
  * Misc invariant state packets
  */
@@ -863,12 +879,7 @@ brw_upload_invariant_state(struct brw_context *brw)
 {
const bool is_965 = brw->gen == 4 && !brw->is_g4x;
 
-   /* Select the 3D pipeline (as opposed to media) */
-   const uint32_t _3DSTATE_PIPELINE_SELECT =
-  is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
-   BEGIN_BATCH(1);
-   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 | (brw->gen >= 9 ? (3 << 8) : 0));
-   ADVANCE_BATCH();
+   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
 
if (brw->gen < 6) {
   /* Disable depth offset clamping. */
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index c3a86e6..4b3184c 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -175,6 +175,18 @@ void brw_upload_compute_state(struct brw_context *brw);
 void brw_compute_state_finished(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);
+void brw_emit_select_pipeline(struct brw_context *brw,
+  enum brw_pipeline pipeline);
+
+static inline void
+brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
+{
+   if (unlikely(brw->last_pipeline != pipeline)) {
+  assert(pipeline < BRW_NUM_PIPELINES);
+  brw_emit_select_pipeline(brw, pipeline);
+  brw->last_pipeline = pipeline;
+   }
+}
 
 /***
  * brw_state_cache.c
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 15d6953..686a3da 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -406,6 +406,9 @@ void brw_init_state( struct brw_context *brw )
 {
struct gl_context *ctx = &brw->ctx;
 
+   /* Force the first brw_select_pipeline to emit pipeline select */
+   brw->last_pipeline = BRW_NUM_PIPELINES;
+
STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
@@ -656,6 +659,8 @@ brw_upload_pipeline_state(struct brw_context *brw,
static int dirty_count = 0;
struct brw_state_flags state = brw->state.pipelines[pipeline];
 
+   brw_select_pipeline(brw, pipeline);
+
if (0) {
   /* Always re-emit all state. */
   brw->NewGLState = ~0;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/7] i965 DispatchCompute support

2015-04-24 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa i965-dispatch-compute-v1

Jordan Justen (4):
  i965/cs: Emit state base address
  i965/state: Emit pipeline select when changing pipelines
  i965/cs: Implement brw_emit_gpgpu_walker
  i965/cs: Emit MEDIA_STATE_FLUSH after WALKER

Paul Berry (3):
  mesa/cs: Add DispatchCompute() to driver function table.
  main/cs: Implement front end code for glDispatchCompute().
  i965: Implement DispatchCompute() back-end

 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_compute.c  | 163 +++
 src/mesa/drivers/dri/i965/brw_context.c  |   1 +
 src/mesa/drivers/dri/i965/brw_context.h  |   8 +-
 src/mesa/drivers/dri/i965/brw_defines.h  |   2 +
 src/mesa/drivers/dri/i965/brw_misc_state.c   |  23 +++-
 src/mesa/drivers/dri/i965/brw_state.h|  12 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c |   7 ++
 src/mesa/main/compute.c  |  20 +++-
 src/mesa/main/dd.h   |   7 ++
 10 files changed, 236 insertions(+), 8 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_compute.c

-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] main/cs: Implement front end code for glDispatchCompute().

2015-04-24 Thread Jordan Justen
From: Paul Berry 

Reviewed-by: Jordan Justen 
---
 src/mesa/main/compute.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index 575..37a4ba7 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -31,9 +31,27 @@ _mesa_DispatchCompute(GLuint num_groups_x,
   GLuint num_groups_z)
 {
GET_CURRENT_CONTEXT(ctx);
+   int i;
+   struct gl_shader_program *prog;
+   const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
 
if (ctx->Extensions.ARB_compute_shader) {
-  assert(!"TODO");
+  for (i = 0; i < 3; i++) {
+ if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
+_mesa_error(ctx, GL_INVALID_VALUE,
+"glDispatchCompute(num_groups_%c)", 'x' + i);
+return;
+ }
+  }
+  if (!_mesa_valid_to_render(ctx, "glDispatchCompute"))
+ return;
+  prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
+  if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glDispatchCompute(no active compute shader)");
+ return;
+  }
+  ctx->Driver.DispatchCompute(ctx, num_groups);
} else {
   _mesa_error(ctx, GL_INVALID_OPERATION,
   "unsupported function (glDispatchCompute) called");
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 7/9] nir/nir: Use a linked list instead of a has set for use/def sets

2015-04-24 Thread Jason Ekstrand
On Fri, Apr 24, 2015 at 4:32 PM, Jason Ekstrand  wrote:
> This commit switches us from the current setup of using hash sets for
> use/def sets to using linked lists.  Doing so should save us quite a bit of
> memory because we aren't carrying around 3 hash sets per register and 2 per
> SSA value.  It should also save us CPU time because adding/removing things
> from use/def sets is 4 pointer manipulations instead of a hash lookup.

I'm going to get some actual performance numbers for this but the
performance looks like we can save  about %10 on shader-db.  I should
have perf and memory numbers by some time on Monday.

> On the code complexity side of things, some things are now much easier and
> others are a bit harder.  One of the operations we perform constantly in
> optimization passes is to replace one source with another.  Due to the fact
> that an instruction can use the same SSA value multiple times, we had to
> iterate through the sources of the instruction and determine if the use we
> were replacing was the only one before removing it from the set of uses.
> With this patch, uses are per-source not per-instruction so we can just
> remove it safely.  On the other hand, trying to iterate over all of the
> instructions that use a given value is more difficult.  Fortunately, the
> two places we do that are the ffma peephole where it doesn't matter and GCM
> where we already gracefully handle duplicates visits to an instruction.
>
> Another aspect here is that using linked lists in this way can be tricky to
> get right.  With sets, things were quite forgiving and the worst that
> happened if you didn't properly remove a use was that it would get caught
> in the validator.  With linked lists, it can lead to linked list corruption
> which can be harder to track.  However, we do just as much validation of
> the linked lists as we did of the sets so the validator should still catch
> these problems.  While working on this series, the vast majority of the
> bugs I had to fix were caught by assertions.  I don't think the lists are
> going to be that much worse than the sets.
> ---
>  src/glsl/nir/nir.c  | 232 
> 
>  src/glsl/nir/nir.h  |  27 --
>  src/glsl/nir/nir_validate.c | 158 +++---
>  3 files changed, 182 insertions(+), 235 deletions(-)
>
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index b8f5dd4..283b861 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -58,12 +58,9 @@ reg_create(void *mem_ctx, struct exec_list *list)
> nir_register *reg = ralloc(mem_ctx, nir_register);
>
> reg->parent_instr = NULL;
> -   reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
> -_mesa_key_pointer_equal);
> -   reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
> -_mesa_key_pointer_equal);
> -   reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
> -   _mesa_key_pointer_equal);
> +   nir_list_init(®->uses);
> +   nir_list_init(®->defs);
> +   nir_list_init(®->if_uses);
>
> reg->num_components = 0;
> reg->num_array_elems = 0;
> @@ -1070,11 +1067,14 @@ update_if_uses(nir_cf_node *node)
>
> nir_if *if_stmt = nir_cf_node_as_if(node);
>
> -   struct set *if_uses_set = if_stmt->condition.is_ssa ?
> - if_stmt->condition.ssa->if_uses :
> - if_stmt->condition.reg.reg->uses;
> -
> -   _mesa_set_add(if_uses_set, if_stmt);
> +   if_stmt->condition.parent_if = if_stmt;
> +   if (if_stmt->condition.is_ssa) {
> +  nir_list_push_tail(&if_stmt->condition.ssa->if_uses,
> + &if_stmt->condition.use_link);
> +   } else {
> +  nir_list_push_tail(&if_stmt->condition.reg.reg->if_uses,
> + &if_stmt->condition.use_link);
> +   }
>  }
>
>  void
> @@ -1227,16 +1227,7 @@ cleanup_cf_node(nir_cf_node *node)
>foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
>   cleanup_cf_node(child);
>
> -  struct set *if_uses;
> -  if (if_stmt->condition.is_ssa) {
> - if_uses = if_stmt->condition.ssa->if_uses;
> -  } else {
> - if_uses = if_stmt->condition.reg.reg->if_uses;
> -  }
> -
> -  struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
> -  assert(entry);
> -  _mesa_set_remove(if_uses, entry);
> +  nir_link_remove(&if_stmt->condition.use_link);
>break;
> }
>
> @@ -1293,9 +1284,10 @@ add_use_cb(nir_src *src, void *state)
>  {
> nir_instr *instr = state;
>
> -   struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses;
> -
> -   _mesa_set_add(uses_set, instr);
> +   src->parent_instr = instr;
> +   nir_link_init(&src->use_link);
> +   nir_list *uses_list = src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses;
> +   nir_list_push_tail(uses_list, &src->use_link);
>
> return true;
>  }
> @@ -1320,

[Mesa-dev] [PATCH v2 10/20] i965/cs: Add generator support for CS_OPCODE_CS_TERMINATE

2015-04-24 Thread Jordan Justen
v2:
 * Don't rely on brw_eu* to generate the send instruction. We now
   generate the send here, and drop the "i965/cs: Add support for the
   SEND message that terminates a CS thread" brw_eu* patch.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_fs.h |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 35 ++
 2 files changed, 36 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index b1e65cd..8a71ac7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -556,6 +556,7 @@ private:
   GLuint nr);
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
+   void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
 struct brw_reg *src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index b88dc8e..114f938 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -370,6 +370,37 @@ fs_generator::generate_urb_write(fs_inst *inst, struct 
brw_reg payload)
 }
 
 void
+fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
+{
+   struct brw_inst *insn;
+
+   insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, brw_null_reg());
+   brw_set_src0(p, insn, payload);
+   brw_set_src1(p, insn, brw_imm_d(0));
+
+   /* Terminate a compute shader by sending a message to the thread spawner.
+*/
+   brw_inst_set_sfid(devinfo, insn, BRW_SFID_THREAD_SPAWNER);
+   brw_inst_set_mlen(devinfo, insn, 1);
+   brw_inst_set_rlen(devinfo, insn, 0);
+   brw_inst_set_eot(devinfo, insn, inst->eot);
+   brw_inst_set_header_present(devinfo, insn, false);
+
+   brw_inst_set_ts_opcode(devinfo, insn, 0); /* Dereference resource */
+   brw_inst_set_ts_request_type(devinfo, insn, 0); /* Root thread */
+
+   /* Note that even though the thread has a URB resource associated with it,
+* we set the "do not dereference URB" bit, because the URB resource is
+* managed by the fixed-function unit, so it will free it automatically.
+*/
+   brw_inst_set_ts_resource_select(devinfo, insn, 1); /* Do not dereference 
URB */
+
+   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+}
+
+void
 fs_generator::generate_blorp_fb_write(fs_inst *inst)
 {
brw_fb_WRITE(p,
@@ -2073,6 +2104,10 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)

GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
  break;
 
+  case CS_OPCODE_CS_TERMINATE:
+generate_cs_terminate(inst, src[0]);
+break;
+
   default:
  unreachable("Unsupported opcode");
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 01/20] i965/cs: Add BRW_NEW_COMPUTE_PROGRAM state flag.

2015-04-24 Thread Jordan Justen
From: Paul Berry 

Also add code to brw_upload_state to set it when the compute program
changes.

Reviewed-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/brw_context.h  | 3 +++
 src/mesa/drivers/dri/i965/brw_state_upload.c | 6 ++
 2 files changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index f79729b..a3a05cd 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -209,6 +209,7 @@ enum brw_state_id {
BRW_STATE_CLIP_VP,
BRW_STATE_SAMPLER_STATE_TABLE,
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
+   BRW_STATE_COMPUTE_PROGRAM,
BRW_NUM_STATE_BITS
 };
 
@@ -288,6 +289,7 @@ enum brw_state_id {
 #define BRW_NEW_CLIP_VP (1ull << BRW_STATE_CLIP_VP)
 #define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
 #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << 
BRW_STATE_VS_ATTRIB_WORKAROUNDS)
+#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
 
 struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -1194,6 +1196,7 @@ struct brw_context
const struct gl_vertex_program *vertex_program;
const struct gl_geometry_program *geometry_program;
const struct gl_fragment_program *fragment_program;
+   const struct gl_compute_program *compute_program;
 
/**
 * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ab316bf..435f759 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -580,6 +580,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_CLIP_VP),
DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
+   DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
{0, 0, 0}
 };
 
@@ -670,6 +671,11 @@ brw_upload_pipeline_state(struct brw_context *brw,
   }
}
 
+   if (brw->compute_program != ctx->ComputeProgram._Current) {
+  brw->compute_program = ctx->ComputeProgram._Current;
+  brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
+   }
+
if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
   brw->meta_in_progress = _mesa_meta_in_progress(ctx);
   brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 14/20] i965/cs: Add max_cs_threads

2015-04-24 Thread Jordan Justen
Add some values for gen7 & gen8. These are the number threads in a
subslice.

Signed-off-by: Jordan Justen 
Cc: Ben Widawsky 
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_context.c |  1 +
 src/mesa/drivers/dri/i965/brw_context.h |  1 +
 src/mesa/drivers/dri/i965/brw_device_info.c | 12 +++-
 src/mesa/drivers/dri/i965/brw_device_info.h |  1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 25b50c7..7f8d430 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -835,6 +835,7 @@ brwCreateContext(gl_api api,
brw->max_ds_threads = devinfo->max_ds_threads;
brw->max_gs_threads = devinfo->max_gs_threads;
brw->max_wm_threads = devinfo->max_wm_threads;
+   brw->max_cs_threads = devinfo->max_cs_threads;
brw->urb.size = devinfo->urb.size;
brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 07847cc..56827d8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1228,6 +1228,7 @@ struct brw_context
int max_ds_threads;
int max_gs_threads;
int max_wm_threads;
+   int max_cs_threads;
 
/* BRW_NEW_URB_ALLOCATIONS:
 */
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index 928bf99..7999ba9 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -117,6 +117,7 @@ static const struct brw_device_info brw_device_info_ivb_gt1 
= {
.max_ds_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = 48,
+   .max_cs_threads = 36,
.urb = {
   .size = 128,
   .min_vs_entries = 32,
@@ -135,6 +136,7 @@ static const struct brw_device_info brw_device_info_ivb_gt2 
= {
.max_ds_threads = 128,
.max_gs_threads = 128,
.max_wm_threads = 172,
+   .max_cs_threads = 64, /* Maybe 96? */
.urb = {
   .size = 256,
   .min_vs_entries = 32,
@@ -154,6 +156,7 @@ static const struct brw_device_info brw_device_info_byt = {
.max_ds_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = 48,
+   .max_cs_threads = 32,
.urb = {
   .size = 128,
   .min_vs_entries = 32,
@@ -176,6 +179,7 @@ static const struct brw_device_info brw_device_info_hsw_gt1 
= {
.max_ds_threads = 70,
.max_gs_threads = 70,
.max_wm_threads = 102,
+   .max_cs_threads = 70,
.urb = {
   .size = 128,
   .min_vs_entries = 32,
@@ -193,6 +197,7 @@ static const struct brw_device_info brw_device_info_hsw_gt2 
= {
.max_ds_threads = 280,
.max_gs_threads = 256,
.max_wm_threads = 204,
+   .max_cs_threads = 70,
.urb = {
   .size = 256,
   .min_vs_entries = 64,
@@ -210,6 +215,7 @@ static const struct brw_device_info brw_device_info_hsw_gt3 
= {
.max_ds_threads = 280,
.max_gs_threads = 256,
.max_wm_threads = 408,
+   .max_cs_threads = 70,
.urb = {
   .size = 512,
   .min_vs_entries = 64,
@@ -231,10 +237,11 @@ static const struct brw_device_info 
brw_device_info_hsw_gt3 = {
.max_hs_threads = 504,   \
.max_ds_threads = 504,   \
.max_gs_threads = 504,   \
-   .max_wm_threads = 384\
+   .max_wm_threads = 384
 
 static const struct brw_device_info brw_device_info_bdw_gt1 = {
GEN8_FEATURES, .gt = 1,
+   .max_cs_threads = 42,
.urb = {
   .size = 192,
   .min_vs_entries = 64,
@@ -247,6 +254,7 @@ static const struct brw_device_info brw_device_info_bdw_gt1 
= {
 
 static const struct brw_device_info brw_device_info_bdw_gt2 = {
GEN8_FEATURES, .gt = 2,
+   .max_cs_threads = 56,
.urb = {
   .size = 384,
   .min_vs_entries = 64,
@@ -259,6 +267,7 @@ static const struct brw_device_info brw_device_info_bdw_gt2 
= {
 
 static const struct brw_device_info brw_device_info_bdw_gt3 = {
GEN8_FEATURES, .gt = 3,
+   .max_cs_threads = 56,
.urb = {
   .size = 384,
   .min_vs_entries = 64,
@@ -280,6 +289,7 @@ static const struct brw_device_info brw_device_info_chv = {
.max_ds_threads = 80,
.max_gs_threads = 80,
.max_wm_threads = 128,
+   .max_cs_threads = 28,
.urb = {
   .size = 192,
   .min_vs_entries = 34,
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h 
b/src/mesa/drivers/dri/i965/brw_device_info.h
index b921c2b..65c024c 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -71,6 +71,7 @@ struct brw_device_info
unsigned max_ds_threads;
unsigned max_gs_threads;
unsigned max_wm_threads;
+   unsigned max_cs_threads;
 
struct {
   unsigned size;
-- 
2.1.4

___
mesa-d

[Mesa-dev] [PATCH v2 20/20] i965/fs: Add CS shader time support

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h |  3 +++
 src/mesa/drivers/dri/i965/brw_cs.cpp|  8 
 src/mesa/drivers/dri/i965/brw_fs.cpp| 11 +++
 src/mesa/drivers/dri/i965/brw_program.c | 13 +++--
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 44bedf0..7fd50f4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -835,6 +835,9 @@ enum shader_time_shader_type {
ST_FS16,
ST_FS16_WRITTEN,
ST_FS16_RESET,
+   ST_CS,
+   ST_CS_WRITTEN,
+   ST_CS_RESET,
 };
 
 struct brw_vertex_buffer {
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 3f378a1..ff87fc1 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -313,6 +313,14 @@ brw_upload_cs_state(struct brw_context *brw)
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+  brw->vtbl.emit_buffer_surface_state(
+ brw, &stage_state->surf_offset[
+ prog_data->binding_table.shader_time_start],
+ brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
+ brw->shader_time.bo->size, 1, true);
+   }
+
uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 
prog_data->binding_table.size_bytes,
 32, &stage_state->bind_bo_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 70db754..c209874 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -758,6 +758,11 @@ fs_visitor::emit_shader_time_end()
  reset_type = ST_FS16_RESET;
   }
   break;
+   case MESA_SHADER_COMPUTE:
+  type = ST_CS;
+  written_type = ST_CS_WRITTEN;
+  reset_type = ST_CS_RESET;
+  break;
default:
   unreachable("fs_visitor::emit_shader_time_end missing code");
}
@@ -4091,6 +4096,9 @@ fs_visitor::run_cs()
 
setup_cs_payload();
 
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+  emit_shader_time_begin();
+
emit_nir_code();
 
if (failed)
@@ -4098,6 +4106,9 @@ fs_visitor::run_cs()
 
emit_cs_terminate();
 
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+  emit_shader_time_end();
+
calculate_cfg();
 
optimize();
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 81a0c19..e5c0d3c 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -323,7 +323,8 @@ get_written_and_reset(struct brw_context *brw, int i,
   uint64_t *written, uint64_t *reset)
 {
enum shader_time_shader_type type = brw->shader_time.types[i];
-   assert(type == ST_VS || type == ST_GS || type == ST_FS8 || type == ST_FS16);
+   assert(type == ST_VS || type == ST_GS || type == ST_FS8 ||
+  type == ST_FS16 || type == ST_CS);
 
/* Find where we recorded written and reset. */
int wi, ri;
@@ -363,7 +364,7 @@ brw_report_shader_time(struct brw_context *brw)
 
uint64_t scaled[brw->shader_time.num_entries];
uint64_t *sorted[brw->shader_time.num_entries];
-   uint64_t total_by_type[ST_FS16 + 1];
+   uint64_t total_by_type[ST_CS + 1];
memset(total_by_type, 0, sizeof(total_by_type));
double total = 0;
for (int i = 0; i < brw->shader_time.num_entries; i++) {
@@ -381,6 +382,8 @@ brw_report_shader_time(struct brw_context *brw)
   case ST_FS8_RESET:
   case ST_FS16_WRITTEN:
   case ST_FS16_RESET:
+  case ST_CS_WRITTEN:
+  case ST_CS_RESET:
  /* We'll handle these when along with the time. */
  scaled[i] = 0;
  continue;
@@ -389,6 +392,7 @@ brw_report_shader_time(struct brw_context *brw)
   case ST_GS:
   case ST_FS8:
   case ST_FS16:
+  case ST_CS:
  get_written_and_reset(brw, i, &written, &reset);
  break;
 
@@ -413,6 +417,7 @@ brw_report_shader_time(struct brw_context *brw)
   case ST_GS:
   case ST_FS8:
   case ST_FS16:
+  case ST_CS:
  total_by_type[type] += scaled[i];
  break;
   default:
@@ -455,6 +460,9 @@ brw_report_shader_time(struct brw_context *brw)
   case ST_FS16:
  stage = "fs16";
  break;
+  case ST_CS:
+ stage = "cs";
+ break;
   default:
  stage = "other";
  break;
@@ -469,6 +477,7 @@ brw_report_shader_time(struct brw_context *brw)
print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
+   print_shader_time_line("total", "cs", 0,

[Mesa-dev] [PATCH v2 08/20] i965/fs: Add emit_cs_terminate to emit CS_OPCODE_CS_TERMINATE

2015-04-24 Thread Jordan Justen
v2:
 * Do more work at the visitor level. g0 is loaded and sent to the
   generator now.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 19 +++
 2 files changed, 20 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 4e17d44..b1e65cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -387,6 +387,7 @@ public:
  bool use_2nd_half = false);
void emit_fb_writes();
void emit_urb_writes();
+   void emit_cs_terminate();
 
void emit_shader_time_begin();
void emit_shader_time_end();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 11f38c2..ba8b811 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -4154,6 +4154,25 @@ fs_visitor::resolve_ud_negate(fs_reg *reg)
*reg = temp;
 }
 
+void
+fs_visitor::emit_cs_terminate()
+{
+   assert(brw->gen >= 7);
+
+   /* We are getting the thread ID from the compute shader header */
+   assert(stage == MESA_SHADER_COMPUTE);
+
+   /* Copy g0 for the message payload */
+   struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
+   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+   fs_inst *inst = emit(MOV(payload, g0));
+   inst->force_writemask_all = true;
+
+   /* Send a message to the thread spawner to terminate the thread. */
+   inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
+   inst->eot = true;
+}
+
 /**
  * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
  *
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 06/20] i965/cs: Add BRW_NEW_CS_PROG_DATA and BRW_CACHE_CS_PROG

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/brw_context.h  | 2 ++
 src/mesa/drivers/dri/i965/brw_state_dump.c   | 3 +++
 src/mesa/drivers/dri/i965/brw_state_upload.c | 1 +
 3 files changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index a3a05cd..134040e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -164,6 +164,7 @@ enum brw_cache_id {
BRW_CACHE_FF_GS_PROG,
BRW_CACHE_GS_PROG,
BRW_CACHE_CLIP_PROG,
+   BRW_CACHE_CS_PROG,
 
BRW_MAX_CACHE
 };
@@ -245,6 +246,7 @@ enum brw_state_id {
 #define BRW_NEW_FF_GS_PROG_DATA (1ull << BRW_CACHE_FF_GS_PROG)
 #define BRW_NEW_GS_PROG_DATA(1ull << BRW_CACHE_GS_PROG)
 #define BRW_NEW_CLIP_PROG_DATA  (1ull << BRW_CACHE_CLIP_PROG)
+#define BRW_NEW_CS_PROG_DATA(1ull << BRW_CACHE_CS_PROG)
 #define BRW_NEW_URB_FENCE   (1ull << BRW_STATE_URB_FENCE)
 #define BRW_NEW_FRAGMENT_PROGRAM(1ull << BRW_STATE_FRAGMENT_PROGRAM)
 #define BRW_NEW_GEOMETRY_PROGRAM(1ull << BRW_STATE_GEOMETRY_PROGRAM)
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c 
b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 5cf70eb..530f5a8 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -490,6 +490,9 @@ dump_prog_cache(struct brw_context *brw)
 case BRW_CACHE_FS_PROG:
name = "FS kernel";
break;
+ case BRW_CACHE_CS_PROG:
+name = "CS kernel";
+break;
 default:
name = "unknown";
break;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 435f759..5c5420d 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -541,6 +541,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
DEFINE_BIT(BRW_NEW_URB_FENCE),
DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 04/20] mesa/cs: Add compute support to update_program().

2015-04-24 Thread Jordan Justen
From: Paul Berry 

Reviewed-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/main/state.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index cc84c61..99db37b 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -101,9 +101,12 @@ update_program(struct gl_context *ctx)
   ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
struct gl_shader_program *fsProg =
   ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+   const struct gl_shader_program *csProg =
+  ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
+   const struct gl_compute_program *prevCP = ctx->ComputeProgram._Current;
GLbitfield new_state = 0x0;
 
/*
@@ -199,6 +202,16 @@ update_program(struct gl_context *ctx)
   _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
}
 
+   if (csProg && csProg->LinkStatus
+   && csProg->_LinkedShaders[MESA_SHADER_COMPUTE]) {
+  /* Use GLSL compute shader */
+  _mesa_reference_compprog(ctx, &ctx->ComputeProgram._Current,
+   
gl_compute_program(csProg->_LinkedShaders[MESA_SHADER_COMPUTE]->Program));
+   } else {
+  /* no compute program */
+  _mesa_reference_compprog(ctx, &ctx->ComputeProgram._Current, NULL);
+   }
+
/* Let the driver know what's happening:
 */
if (ctx->FragmentProgram._Current != prevFP) {
@@ -225,6 +238,14 @@ update_program(struct gl_context *ctx)
   }
}
 
+   if (ctx->ComputeProgram._Current != prevCP) {
+  new_state |= _NEW_PROGRAM;
+  if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, GL_COMPUTE_PROGRAM_NV,
+ (struct gl_program *) 
ctx->ComputeProgram._Current);
+  }
+   }
+
return new_state;
 }
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 14/22] i965/gen9: Set vertical and horizontal surface alignments

2015-04-24 Thread Anuj Phogat
On Thu, Apr 23, 2015 at 6:05 PM, Pohjolainen, Topi
 wrote:
> On Fri, Apr 17, 2015 at 04:51:35PM -0700, Anuj Phogat wrote:
>> Patch sets the alignments for texture and renderbuffer surfaces.
>>
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/mesa/drivers/dri/i965/gen8_surface_state.c | 34 
>> +++---
>>  1 file changed, 30 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
>> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> index 1ec57c0..189f1db 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> @@ -218,10 +218,23 @@ gen8_update_texture_surface(struct gl_context *ctx,
>>
>> surf[0] = surf_type << BRW_SURFACE_TYPE_SHIFT |
>>   tex_format << BRW_SURFACE_FORMAT_SHIFT |
>> - vertical_alignment(mt) |
>> - horizontal_alignment(mt) |
>>   tiling_mode;
>>
>> +   if (brw->gen < 9) {
>> + surf[0] |=  horizontal_alignment(mt) | vertical_alignment(mt);
>> +   }
>> +   /* Horizontal alignment is ignored when Tiled Resource Mode is not
>> +* TRMODE_NONE. Vertical alignment is ignored for 1D surfaces and when
>> +* Tiled Resource Mode is not TRMODE_NONE.
>> +*/
>> +   else if (tr_mode == GEN9_SURFACE_TRMODE_NONE) {
>> +  if (!gen9_use_linear_1d_layout(brw, mt))
>> + surf[0] |=  horizontal_alignment(mt);
>> +
>> +  if (surf_type != BRW_SURFACE_1D)
>> + surf[0] |=  vertical_alignment(mt);
>> +   }
>
> Wouldn't it be cleaner to patch both horizontal_alignment() and
> vertical_alignment() passing them the additional information as extra
> arguments?
>
Yes, It'll be. Thanks for the suggestion. Will send out the V3.
>> +
>> if (surf_type == BRW_SURFACE_CUBE) {
>>surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
>> }
>> @@ -400,10 +413,23 @@ gen8_update_renderbuffer_surface(struct brw_context 
>> *brw,
>> surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
>>   (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
>>   (format << BRW_SURFACE_FORMAT_SHIFT) |
>> - vertical_alignment(mt) |
>> - horizontal_alignment(mt) |
>>   surface_tiling_mode(tiling);
>>
>> +   if (brw->gen < 9) {
>> + surf[0] |=  horizontal_alignment(mt) | vertical_alignment(mt);
>> +   }
>> +   /* Horizontal alignment is ignored when Tiled Resource Mode is not
>> +* TRMODE_NONE. Vertical alignment is ignored for 1D surfaces and when
>> +* Tiled Resource Mode is not TRMODE_NONE.
>> +*/
>> +   else if (tr_mode == GEN9_SURFACE_TRMODE_NONE) {
>> +  if (!gen9_use_linear_1d_layout(brw, mt))
>> + surf[0] |=  horizontal_alignment(mt);
>> +
>> +  if (surf_type != BRW_SURFACE_1D)
>> + surf[0] |=  vertical_alignment(mt);
>> +   }
>> +
>> surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
>>
>> surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
>> --
>> 2.3.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 11/20] i965/cs: Add brw_cs_prog_data, brw_cs_prog_key and brw_context::cs.

2015-04-24 Thread Jordan Justen
From: Paul Berry 

jordan.l.jus...@intel.com:
 * Added brw_cs_prog_key structure
 * Added brw_cs_prog_data::dispatch_grf_start_reg_16
 * Added brw_cs_prog_data::no_8
 * Added brw_cs_prog_data::local_size
 * Added brw_cs_prog_data::simd_size

Signed-off-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/brw_context.h | 17 
 src/mesa/drivers/dri/i965/brw_cs.h  | 46 +
 2 files changed, 63 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.h

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 134040e..07847cc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -148,6 +148,7 @@ struct brw_vs_prog_key;
 struct brw_vue_prog_key;
 struct brw_wm_prog_key;
 struct brw_wm_prog_data;
+struct brw_cs_prog_data;
 
 enum brw_pipeline {
BRW_RENDER_PIPELINE,
@@ -429,6 +430,18 @@ struct brw_wm_prog_data {
int urb_setup[VARYING_SLOT_MAX];
 };
 
+/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this
+ * struct!
+ */
+struct brw_cs_prog_data {
+   struct brw_stage_prog_data base;
+
+   GLuint dispatch_grf_start_reg_16;
+   bool no_8;
+   unsigned local_size[3];
+   unsigned simd_size;
+};
+
 /**
  * Enum representing the i965-specific vertex results that don't correspond
  * exactly to any element of gl_varying_slot.  The values of this enum are
@@ -1361,6 +1374,10 @@ struct brw_context
   uint32_t fast_clear_op;
} wm;
 
+   struct {
+  struct brw_stage_state base;
+  struct brw_cs_prog_data *prog_data;
+   } cs;
 
struct {
   uint32_t state_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h 
b/src/mesa/drivers/dri/i965/brw_cs.h
new file mode 100644
index 000..0e9e65a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef BRW_CS_H
+#define BRW_CS_H
+
+#include "brw_program.h"
+
+struct brw_cs_prog_key {
+   GLuint program_string_id:32;
+   struct brw_sampler_prog_key_data tex;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void
+brw_upload_cs_prog(struct brw_context *brw);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BRW_CS_H */
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 00/20] i965 basic CS program generation support

2015-04-24 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa i965-cs-prog-v2

These patches could use review:
 * [08/20] i965/fs: Add emit_cs_terminate to emit CS_OPCODE_CS_TERMINATE
 * [10/20] i965/cs: Add generator support for CS_OPCODE_CS_TERMINATE
 * [13/20] i965/fs: Support compute programs in fs_visitor
 * [14/20] i965/cs: Add max_cs_threads
 * [15/20] i965/cs: Emit compute shader code and upload programs
 * [16/20] i965: Add brw_setup_tex_for_precompile. Use in VS, GS & FS.
 * [17/20] i965/cs: Support CS program precompile
 * [18/20] i965: Remove comment about chv device numbers relating to hsw gt1
 * [19/20] i965/cs: Upload brw_cs_state
 * [20/20] i965/fs: Add CS shader time support

v2:
 * Incorporate feedback from v1
 * Dropped "i965/cs: Emit gen code for CS_OPCODE_CS_TERMINATE" and
   instead move into use "i965/fs: Add emit_cs_terminate to emit
   CS_OPCODE_CS_TERMINATE" and "i965/cs: Add generator support for
   CS_OPCODE_CS_TERMINATE"
 * Fixed/verified scratch memory
 * Implemented/verified shader time

Jordan Justen (14):
  i965/cs: Add BRW_NEW_CS_PROG_DATA and BRW_CACHE_CS_PROG
  i965/cs: Add CS_OPCODE_CS_TERMINATE
  i965/fs: Add emit_cs_terminate to emit CS_OPCODE_CS_TERMINATE
  i965/cs: Mark g0 as used by CS_OPCODE_CS_TERMINATE
  i965/cs: Add generator support for CS_OPCODE_CS_TERMINATE
  i965/cache: Add support for CS in program state cache
  i965/fs: Support compute programs in fs_visitor
  i965/cs: Add max_cs_threads
  i965/cs: Emit compute shader code and upload programs
  i965: Add brw_setup_tex_for_precompile. Use in VS, GS & FS.
  i965/cs: Support CS program precompile
  i965: Remove comment about chv device numbers relating to hsw gt1
  i965/cs: Upload brw_cs_state
  i965/fs: Add CS shader time support

Paul Berry (6):
  i965/cs: Add BRW_NEW_COMPUTE_PROGRAM state flag.
  mesa/cs: Add inline functions for dealing with compute shaders.
  mesa/cs: Update program.c for compute shaders.
  mesa/cs: Add compute support to update_program().
  i965: Add an INTEL_DEBUG=cs option.
  i965/cs: Add brw_cs_prog_data, brw_cs_prog_key and brw_context::cs.

 src/mesa/drivers/dri/i965/Makefile.sources|   1 +
 src/mesa/drivers/dri/i965/brw_context.c   |   3 +
 src/mesa/drivers/dri/i965/brw_context.h   |  33 ++
 src/mesa/drivers/dri/i965/brw_cs.cpp  | 388 ++
 src/mesa/drivers/dri/i965/brw_cs.h|  48 +++
 src/mesa/drivers/dri/i965/brw_defines.h   |   8 +
 src/mesa/drivers/dri/i965/brw_device_info.c   |  16 +-
 src/mesa/drivers/dri/i965/brw_device_info.h   |   1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 104 +-
 src/mesa/drivers/dri/i965/brw_fs.h|  15 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp|  35 ++
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   4 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  42 +++
 src/mesa/drivers/dri/i965/brw_program.c   |  13 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp  |   6 +
 src/mesa/drivers/dri/i965/brw_shader.h|   3 +
 src/mesa/drivers/dri/i965/brw_state.h |   1 +
 src/mesa/drivers/dri/i965/brw_state_cache.c   |   3 +
 src/mesa/drivers/dri/i965/brw_state_dump.c|   3 +
 src/mesa/drivers/dri/i965/brw_state_upload.c  |  12 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  13 +-
 src/mesa/drivers/dri/i965/intel_debug.c   |   3 +-
 src/mesa/drivers/dri/i965/intel_debug.h   |   1 +
 src/mesa/main/state.c |  21 ++
 src/mesa/program/program.c|   3 +
 src/mesa/program/program.h|  22 ++
 26 files changed, 768 insertions(+), 34 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.h

-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 02/20] mesa/cs: Add inline functions for dealing with compute shaders.

2015-04-24 Thread Jordan Justen
From: Paul Berry 

Reviewed-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/program/program.h | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index 0b0d1ac..2d92ab2 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -138,6 +138,15 @@ _mesa_reference_geomprog(struct gl_context *ctx,
(struct gl_program *) prog);
 }
 
+static inline void
+_mesa_reference_compprog(struct gl_context *ctx,
+ struct gl_compute_program **ptr,
+ struct gl_compute_program *prog)
+{
+   _mesa_reference_program(ctx, (struct gl_program **) ptr,
+   (struct gl_program *) prog);
+}
+
 extern struct gl_program *
 _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog);
 
@@ -276,6 +285,19 @@ gl_geometry_program_const(const struct gl_program *prog)
 }
 
 
+static inline struct gl_compute_program *
+gl_compute_program(struct gl_program *prog)
+{
+   return (struct gl_compute_program *) prog;
+}
+
+static inline const struct gl_compute_program *
+gl_compute_program_const(const struct gl_program *prog)
+{
+   return (const struct gl_compute_program *) prog;
+}
+
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 05/20] i965: Add an INTEL_DEBUG=cs option.

2015-04-24 Thread Jordan Justen
From: Paul Berry 

At the moment it's not wired up to anything.  Later patches will hook
it up to the compute shader back-end.

Reviewed-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/intel_debug.c | 3 ++-
 src/mesa/drivers/dri/i965/intel_debug.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_debug.c 
b/src/mesa/drivers/dri/i965/intel_debug.c
index 19be464..9c8023c 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -70,6 +70,7 @@ static const struct dri_debug_control debug_control[] = {
{ "no8", DEBUG_NO8 },
{ "vec4vs",  DEBUG_VEC4VS },
{ "spill",   DEBUG_SPILL },
+   { "cs",  DEBUG_CS },
{ NULL,0 }
 };
 
@@ -80,7 +81,7 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
   [MESA_SHADER_VERTEX] = DEBUG_VS,
   [MESA_SHADER_GEOMETRY] = DEBUG_GS,
   [MESA_SHADER_FRAGMENT] = DEBUG_WM,
-  [MESA_SHADER_COMPUTE] = 0, /* no debug flag yet */
+  [MESA_SHADER_COMPUTE] = DEBUG_CS,
};
STATIC_ASSERT(MESA_SHADER_STAGES == 4);
return flags[stage];
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h 
b/src/mesa/drivers/dri/i965/intel_debug.h
index e0e9cb7..f754be2 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -65,6 +65,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO8 (1ull << 29)
 #define DEBUG_VEC4VS  (1ull << 30)
 #define DEBUG_SPILL   (1ull << 31)
+#define DEBUG_CS  (1ull << 32)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 17/20] i965/cs: Support CS program precompile

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  6 ++
 src/mesa/drivers/dri/i965/brw_cs.cpp | 28 
 src/mesa/drivers/dri/i965/brw_shader.cpp |  4 
 src/mesa/drivers/dri/i965/brw_shader.h   |  3 +++
 4 files changed, 41 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 9e13c59..44bedf0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1856,6 +1856,12 @@ brw_fragment_program_const(const struct 
gl_fragment_program *p)
return (const struct brw_fragment_program *) p;
 }
 
+static inline struct brw_compute_program *
+brw_compute_program(struct gl_compute_program *p)
+{
+   return (struct brw_compute_program *) p;
+}
+
 /**
  * Pre-gen6, the register file of the EUs was shared between threads,
  * and each thread used some subset allocated on a 16-register block
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 648f0f0..ea74bf1 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -270,3 +270,31 @@ brw_upload_cs_prog(struct brw_context *brw)
}
brw->cs.base.prog_data = &brw->cs.prog_data->base;
 }
+
+
+extern "C" bool
+brw_cs_precompile(struct gl_context *ctx,
+  struct gl_shader_program *shader_prog,
+  struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_cs_prog_key key;
+
+   struct gl_compute_program *cp = (struct gl_compute_program *) prog;
+   struct brw_compute_program *bcp = brw_compute_program(cp);
+
+   memset(&key, 0, sizeof(key));
+   key.program_string_id = bcp->id;
+
+   brw_setup_tex_for_precompile(brw, &key.tex, prog);
+
+   uint32_t old_prog_offset = brw->cs.base.prog_offset;
+   struct brw_cs_prog_data *old_prog_data = brw->cs.prog_data;
+
+   bool success = brw_codegen_cs_prog(brw, shader_prog, bcp, &key);
+
+   brw->cs.base.prog_offset = old_prog_offset;
+   brw->cs.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index c89127a..dca8571 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -73,6 +73,7 @@ brw_shader_precompile(struct gl_context *ctx,
struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+   struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
 
if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
   return false;
@@ -83,6 +84,9 @@ brw_shader_precompile(struct gl_context *ctx,
if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
   return false;
 
+   if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
+  return false;
+
return true;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index ac4e62a..ebce51d 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -287,6 +287,9 @@ bool brw_gs_precompile(struct gl_context *ctx,
 bool brw_fs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
+bool brw_cs_precompile(struct gl_context *ctx,
+   struct gl_shader_program *shader_prog,
+   struct gl_program *prog);
 
 #ifdef __cplusplus
 }
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 09/20] i965/cs: Mark g0 as used by CS_OPCODE_CS_TERMINATE

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 94e1a0a..582d099 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -404,6 +404,10 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
  }
  break;
 
+  case CS_OPCODE_CS_TERMINATE:
+ payload_last_use_ip[0] = use_ip;
+ break;
+
   default:
  if (inst->eot) {
 /* We could omit this for the !inst->header_present case, except
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 03/20] mesa/cs: Update program.c for compute shaders.

2015-04-24 Thread Jordan Justen
From: Paul Berry 

Reviewed-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/program/program.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 4f28e2a..fb61f4d 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -102,6 +102,8 @@ _mesa_init_program(struct gl_context *ctx)
_mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
 NULL);
 
+   _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
+
/* XXX probably move this stuff */
ctx->ATIFragmentShader.Enabled = GL_FALSE;
ctx->ATIFragmentShader.Current = ctx->Shared->DefaultFragmentShader;
@@ -121,6 +123,7 @@ _mesa_free_program_data(struct gl_context *ctx)
_mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
_mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache);
_mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
+   _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
 
/* XXX probably move this stuff */
if (ctx->ATIFragmentShader.Current) {
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 07/20] i965/cs: Add CS_OPCODE_CS_TERMINATE

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_defines.h  | 5 +
 src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index bd3218a..f6f8962 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1103,6 +1103,11 @@ enum opcode {
 *   and number of SO primitives needed.
 */
GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
+
+   /**
+* Terminate the compute shader.
+*/
+   CS_OPCODE_CS_TERMINATE,
 };
 
 enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 79f0e1c..c89127a 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -605,6 +605,8 @@ brw_instruction_name(enum opcode op)
   return "gs_svb_set_dst_index";
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
   return "gs_ff_sync_set_primitives";
+   case CS_OPCODE_CS_TERMINATE:
+  return "cs_terminate";
}
 
unreachable("not reached");
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 15/20] i965/cs: Emit compute shader code and upload programs

2015-04-24 Thread Jordan Justen
v2:
 * Don't bother checking for 'gen > 5' (krh)
 * Populate sampler data in key (krh)

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  |   1 +
 src/mesa/drivers/dri/i965/brw_cs.cpp | 224 +++
 src/mesa/drivers/dri/i965/brw_state_upload.c |   3 +
 3 files changed, 228 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 56827d8..9e13c59 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -148,6 +148,7 @@ struct brw_vs_prog_key;
 struct brw_vue_prog_key;
 struct brw_wm_prog_key;
 struct brw_wm_prog_data;
+struct brw_cs_prog_key;
 struct brw_cs_prog_data;
 
 enum brw_pipeline {
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 8021147..648f0f0 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -22,8 +22,15 @@
  */
 
 
+#include "util/ralloc.h"
 #include "brw_context.h"
 #include "brw_cs.h"
+#include "brw_fs.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "intel_mipmap_tree.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
 
 extern "C"
 bool
@@ -46,3 +53,220 @@ brw_cs_prog_data_compare(const void *in_a, const void *in_b)
 
return true;
 }
+
+
+static const unsigned *
+brw_cs_emit(struct brw_context *brw,
+void *mem_ctx,
+const struct brw_cs_prog_key *key,
+struct brw_cs_prog_data *prog_data,
+struct gl_compute_program *cp,
+struct gl_shader_program *prog,
+unsigned *final_assembly_size)
+{
+   bool start_busy = false;
+   double start_time = 0;
+
+   if (unlikely(brw->perf_debug)) {
+  start_busy = (brw->batch.last_bo &&
+drm_intel_bo_busy(brw->batch.last_bo));
+  start_time = get_time();
+   }
+
+   struct brw_shader *shader = NULL;
+   if (prog)
+  shader = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+
+   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+  brw_dump_ir("compute", prog, &shader->base, &cp->Base);
+
+   /* Now the main event: Visit the shader IR and generate our CS IR for it.
+*/
+   fs_visitor v(brw, mem_ctx, key, prog_data, prog, cp, 8);
+   if (!v.run_cs()) {
+  if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
+  }
+
+  _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
+v.fail_msg);
+
+  return NULL;
+   }
+
+   cfg_t *simd16_cfg = NULL;
+   fs_visitor v2(brw, mem_ctx, key, prog_data, prog, cp, 16);
+   if (likely(!(INTEL_DEBUG & DEBUG_NO16))) {
+  if (!v.simd16_unsupported) {
+ /* Try a SIMD16 compile */
+ v2.import_uniforms(&v);
+ if (!v2.run_cs()) {
+perf_debug("SIMD16 shader failed to compile, falling back to "
+   "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+ } else {
+simd16_cfg = v2.cfg;
+ }
+  } else {
+ perf_debug("SIMD16 shader unsupported, falling back to "
+"SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
+  }
+   }
+
+   prog_data->local_size[0] = cp->LocalSize[0];
+   prog_data->local_size[1] = cp->LocalSize[1];
+   prog_data->local_size[2] = cp->LocalSize[2];
+
+   cfg_t *simd8_cfg;
+   int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
+   if (no_simd8 && simd16_cfg) {
+  simd8_cfg = NULL;
+  prog_data->no_8 = true;
+   } else {
+  simd8_cfg = v.cfg;
+  prog_data->no_8 = false;
+   }
+
+   fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+  v.promoted_constants, v.runtime_check_aads_emit, "CS");
+   if (INTEL_DEBUG & DEBUG_CS) {
+  char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+   prog->Label ? prog->Label : "unnamed",
+   prog->Name);
+  g.enable_debug(name);
+   }
+   if (simd16_cfg) {
+  prog_data->simd_size = 16;
+  g.generate_code(simd16_cfg, 16);
+   } else if (simd8_cfg) {
+  prog_data->simd_size = 8;
+  g.generate_code(simd8_cfg, 8);
+   }
+
+   if (unlikely(brw->perf_debug) && shader) {
+  if (shader->compiled_once) {
+ _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
+  }
+  shader->compiled_once = true;
+
+  if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("CS compile took %.03f ms and stalled the GPU\n",
+(get_time() - start_time) * 1000);
+  }
+   }
+
+   return g.get_assembly(final_assembly_size);
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+struct gl_shader_program *prog,
+struct brw_compute_program *cp,
+struct brw_cs_prog_key *key)
+{
+   struct gl_context *ctx = &brw->ctx;
+ 

[Mesa-dev] [PATCH v2 19/20] i965/cs: Upload brw_cs_state

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_cs.cpp | 80 
 src/mesa/drivers/dri/i965/brw_defines.h  |  3 ++
 src/mesa/drivers/dri/i965/brw_state.h|  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |  2 +
 4 files changed, 86 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
b/src/mesa/drivers/dri/i965/brw_cs.cpp
index ea74bf1..3f378a1 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -298,3 +298,83 @@ brw_cs_precompile(struct gl_context *ctx,
 
return success;
 }
+
+
+static void
+brw_upload_cs_state(struct brw_context *brw)
+{
+   if (!brw->cs.prog_data)
+  return;
+
+   uint32_t offset;
+   uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+8 * 4, 64, &offset);
+   struct brw_stage_state *stage_state = &brw->cs.base;
+   struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
+   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+
+   uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+
prog_data->binding_table.size_bytes,
+32, &stage_state->bind_bo_offset);
+
+   uint32_t dwords = brw->gen < 8 ? 8 : 9;
+   BEGIN_BATCH(dwords);
+   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
+
+   if (prog_data->total_scratch) {
+  if (brw->gen >= 8)
+ OUT_RELOC64(stage_state->scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(prog_data->total_scratch) - 11);
+  else
+ OUT_RELOC(stage_state->scratch_bo,
+   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+   ffs(prog_data->total_scratch) - 11);
+   } else {
+  OUT_BATCH(0);
+  if (brw->gen >= 8)
+ OUT_BATCH(0);
+   }
+
+   OUT_BATCH(((brw->max_cs_threads - 1) << 16) |
+ (brw->gen >= 8 ? (2 << 8) : 0) |
+ (brw->gen >= 8 ? 0xc0 : 0xc4));
+
+   OUT_BATCH(0);
+   OUT_BATCH(brw->gen >= 8 ? (2 << 16) : 0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+   memcpy(bind, stage_state->surf_offset,
+  prog_data->binding_table.size_bytes);
+
+   memset(desc, 0, 8 * 4);
+
+   int dw = 0;
+   desc[dw++] = brw->cs.base.prog_offset;
+   if (brw->gen >= 8)
+  dw++; /* Kernel Start Pointer High */
+   dw++;
+   dw++;
+   desc[dw++] = stage_state->bind_bo_offset;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(8 * 4);
+   OUT_BATCH(offset);
+   ADVANCE_BATCH();
+}
+
+
+extern "C"
+const struct brw_tracked_state brw_cs_state = {
+   .dirty = {
+  .mesa  = 0,
+  .brw   = BRW_NEW_COMPUTE_PROGRAM,
+   },
+   .emit = brw_upload_cs_state
+};
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index f6f8962..36f46af 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2449,4 +2449,7 @@ enum brw_wm_barycentric_interp_mode {
 #define SKL_MOCS_WB 9
 #define SKL_MOCS_WT 5
 
+#define MEDIA_VFE_STATE 0x7000
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index cfa67b6..c3a86e6 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -93,6 +93,7 @@ extern const struct brw_tracked_state brw_drawing_rect;
 extern const struct brw_tracked_state brw_indices;
 extern const struct brw_tracked_state brw_vertices;
 extern const struct brw_tracked_state brw_index_buffer;
+extern const struct brw_tracked_state brw_cs_state;
 extern const struct brw_tracked_state gen6_binding_table_pointers;
 extern const struct brw_tracked_state gen6_blend_state;
 extern const struct brw_tracked_state gen6_cc_state_pointers;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index d086f39..7d0dc8f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -249,6 +249,7 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
+   &brw_cs_state,
 };
 
 static const struct brw_tracked_state *gen8_render_atoms[] =
@@ -329,6 +330,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
 
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
+   &brw_cs_state,
 };
 
 static void
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 13/20] i965/fs: Support compute programs in fs_visitor

2015-04-24 Thread Jordan Justen
v2:
 * Clean out some unneeded code copied from run_fs (krh)
 * Always use NIR
 * Split shader time out into a separate commit

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.c  |  2 +
 src/mesa/drivers/dri/i965/brw_fs.cpp | 61 ++--
 src/mesa/drivers/dri/i965/brw_fs.h   | 10 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 +++
 4 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 9d90360..25b50c7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -594,6 +594,8 @@ brw_initialize_context_constants(struct brw_context *brw)
if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
   ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = 
&nir_options;
 
+   ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = 
&nir_options;
+
/* ARB_viewport_array */
if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
   ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 61ee056..24d3fa9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1719,9 +1719,15 @@ fs_visitor::assign_curb_setup()
if (dispatch_width == 8) {
   prog_data->dispatch_grf_start_reg = payload.num_regs;
} else {
-  assert(stage == MESA_SHADER_FRAGMENT);
-  brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
-  prog_data->dispatch_grf_start_reg_16 = payload.num_regs;
+  if (stage == MESA_SHADER_FRAGMENT) {
+ brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
+ prog_data->dispatch_grf_start_reg_16 = payload.num_regs;
+  } else if (stage == MESA_SHADER_COMPUTE) {
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+ prog_data->dispatch_grf_start_reg_16 = payload.num_regs;
+  } else {
+ unreachable("Unsupported shader type!");
+  }
}
 
prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8;
@@ -3726,6 +3732,14 @@ fs_visitor::setup_vs_payload()
 }
 
 void
+fs_visitor::setup_cs_payload()
+{
+   assert(brw->gen >= 7);
+
+   payload.num_regs = 1;
+}
+
+void
 fs_visitor::assign_binding_table_offsets()
 {
assert(stage == MESA_SHADER_FRAGMENT);
@@ -4065,6 +4079,47 @@ fs_visitor::run_fs()
return !failed;
 }
 
+bool
+fs_visitor::run_cs()
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+   assert (shader);
+
+   sanity_param_count = prog->Parameters->NumParameters;
+
+   assign_common_binding_table_offsets(0);
+
+   setup_cs_payload();
+
+   emit_nir_code();
+
+   if (failed)
+  return false;
+
+   emit_cs_terminate();
+
+   calculate_cfg();
+
+   optimize();
+
+   assign_curb_setup();
+
+   fixup_3src_null_dest();
+   allocate_registers();
+
+   if (failed)
+  return false;
+
+   /* If any state parameters were appended, then ParameterValues could have
+* been realloced, in which case the driver uniform storage set up by
+* _mesa_associate_uniform_storage() would point to freed memory.  Make
+* sure that didn't happen.
+*/
+   assert(sanity_param_count == prog->Parameters->NumParameters);
+
+   return !failed;
+}
+
 const unsigned *
 brw_wm_fs_emit(struct brw_context *brw,
void *mem_ctx,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 8a71ac7..d233260 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -89,6 +89,14 @@ public:
   struct gl_vertex_program *cp,
   unsigned dispatch_width);
 
+   fs_visitor(struct brw_context *brw,
+  void *mem_ctx,
+  const struct brw_cs_prog_key *key,
+  struct brw_cs_prog_data *prog_data,
+  struct gl_shader_program *shader_prog,
+  struct gl_compute_program *cp,
+  unsigned dispatch_width);
+
~fs_visitor();
void init();
 
@@ -189,12 +197,14 @@ public:
 
bool run_fs();
bool run_vs();
+   bool run_cs();
void optimize();
void allocate_registers();
void assign_binding_table_offsets();
void setup_payload_gen4();
void setup_payload_gen6();
void setup_vs_payload();
+   void setup_cs_payload();
void fixup_3src_null_dest();
void assign_curb_setup();
void calculate_urb_setup();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index ba8b811..74f2e52 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -39,6 +39,7 @@
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
+#include "brw_cs.h"
 #include "brw_vec4.h"
 #include "brw_fs.h"
 #include "main/uniforms.h"
@@ -4232,6 +4233,25 @@ fs_visitor::

[Mesa-dev] [PATCH v2 16/20] i965: Add brw_setup_tex_for_precompile. Use in VS, GS & FS.

2015-04-24 Thread Jordan Justen
Suggested-by: Kristian Høgsberg 
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 32 
 src/mesa/drivers/dri/i965/brw_fs.h |  3 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 13 +
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 24d3fa9..70db754 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4252,18 +4252,7 @@ brw_fs_precompile(struct gl_context *ctx,
  BRW_FS_VARYING_INPUT_MASK) > 16)
   key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
 
-   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
-   unsigned sampler_count = _mesa_fls(fp->Base.SamplersUsed);
-   for (unsigned i = 0; i < sampler_count; i++) {
-  if (!has_shader_channel_select && (fp->Base.ShadowSamplers & (1 << i))) {
- /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
- key.tex.swizzles[i] =
-MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
-  } else {
- /* Color sampler: assume no swizzling. */
- key.tex.swizzles[i] = SWIZZLE_XYZW;
-  }
-   }
+   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
 
if (fp->Base.InputsRead & VARYING_BIT_POS) {
   key.drawable_height = ctx->DrawBuffer->Height;
@@ -4290,3 +4279,22 @@ brw_fs_precompile(struct gl_context *ctx,
 
return success;
 }
+
+void
+brw_setup_tex_for_precompile(struct brw_context *brw,
+ struct brw_sampler_prog_key_data *tex,
+ struct gl_program *prog)
+{
+   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+   for (unsigned i = 0; i < sampler_count; i++) {
+  if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+ /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+ tex->swizzles[i] =
+MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+  } else {
+ /* Color sampler: assume no swizzling. */
+ tex->swizzles[i] = SWIZZLE_XYZW;
+  }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index d233260..ea20dca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -672,3 +672,6 @@ private:
 
 bool brw_do_channel_expressions(struct exec_list *instructions);
 bool brw_do_vector_splitting(struct exec_list *instructions);
+void brw_setup_tex_for_precompile(struct brw_context *brw,
+  struct brw_sampler_prog_key_data *tex,
+  struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9398adc..aaa80c1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1915,18 +1915,7 @@ brw_vue_setup_prog_key_for_precompile(struct gl_context 
*ctx,
struct brw_context *brw = brw_context(ctx);
key->program_string_id = id;
 
-   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
-   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
-   for (unsigned i = 0; i < sampler_count; i++) {
-  if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
- /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
- key->tex.swizzles[i] =
-MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
-  } else {
- /* Color sampler: assume no swizzling. */
- key->tex.swizzles[i] = SWIZZLE_XYZW;
-  }
-   }
+   brw_setup_tex_for_precompile(brw, &key->tex, prog);
 }
 
 } /* extern "C" */
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 18/20] i965: Remove comment about chv device numbers relating to hsw gt1

2015-04-24 Thread Jordan Justen
Remove comment "These were copied from Haswell GT1, above.". Many of
these numbers have been modified by this point, so the HSW GT1
reference no longer seems helpful.

The comment "Thread counts and URB limits are placeholders, and may
not be accurate." is retained for now.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_device_info.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index 7999ba9..8938384 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -278,9 +278,7 @@ static const struct brw_device_info brw_device_info_bdw_gt3 
= {
}
 };
 
-/* Thread counts and URB limits are placeholders, and may not be accurate.
- * These were copied from Haswell GT1, above.
- */
+/* Thread counts and URB limits are placeholders, and may not be accurate. */
 static const struct brw_device_info brw_device_info_chv = {
GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
.has_llc = false,
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 12/20] i965/cache: Add support for CS in program state cache

2015-04-24 Thread Jordan Justen
Signed-off-by: Jordan Justen 
Reviewed-by: Kristian Høgsberg 
---
 src/mesa/drivers/dri/i965/Makefile.sources  |  1 +
 src/mesa/drivers/dri/i965/brw_cs.cpp| 48 +
 src/mesa/drivers/dri/i965/brw_cs.h  |  2 ++
 src/mesa/drivers/dri/i965/brw_state_cache.c |  3 ++
 4 files changed, 54 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 6d4659f..cf5dba4 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -19,6 +19,7 @@ i965_FILES = \
brw_clip_util.c \
brw_context.c \
brw_context.h \
+   brw_cs.cpp \
brw_cubemap_normalize.cpp \
brw_curbe.c \
brw_dead_control_flow.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp 
b/src/mesa/drivers/dri/i965/brw_cs.cpp
new file mode 100644
index 000..8021147
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2014 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "brw_context.h"
+#include "brw_cs.h"
+
+extern "C"
+bool
+brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+{
+   const struct brw_cs_prog_data *a =
+  (const struct brw_cs_prog_data *)in_a;
+   const struct brw_cs_prog_data *b =
+  (const struct brw_cs_prog_data *)in_b;
+
+   /* Compare the base structure. */
+   if (!brw_stage_prog_data_compare(&a->base, &b->base))
+  return false;
+
+   /* Compare the rest of the structure. */
+   const unsigned offset = sizeof(struct brw_stage_prog_data);
+   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
+  sizeof(struct brw_cs_prog_data) - offset))
+  return false;
+
+   return true;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h 
b/src/mesa/drivers/dri/i965/brw_cs.h
index 0e9e65a..e2c9779 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -36,6 +36,8 @@ struct brw_cs_prog_key {
 extern "C" {
 #endif
 
+bool brw_cs_prog_data_compare(const void *a, const void *b);
+
 void
 brw_upload_cs_prog(struct brw_context *brw);
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c 
b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 606740e..157b33d 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -50,6 +50,7 @@
 #include "brw_vs.h"
 #include "brw_wm.h"
 #include "brw_gs.h"
+#include "brw_cs.h"
 
 #define FILE_DEBUG_FLAG DEBUG_STATE
 
@@ -363,9 +364,11 @@ brw_init_caches(struct brw_context *brw)
cache->aux_compare[BRW_CACHE_VS_PROG] = brw_vs_prog_data_compare;
cache->aux_compare[BRW_CACHE_GS_PROG] = brw_gs_prog_data_compare;
cache->aux_compare[BRW_CACHE_FS_PROG] = brw_wm_prog_data_compare;
+   cache->aux_compare[BRW_CACHE_CS_PROG] = brw_cs_prog_data_compare;
cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
+   cache->aux_free[BRW_CACHE_CS_PROG] = brw_stage_prog_data_free;
 }
 
 static void
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 9/9] SQUASH: nir: Update various components for the new list-based use/def sets

2015-04-24 Thread Jason Ekstrand
---
 src/glsl/nir/nir_from_ssa.c | 11 +--
 src/glsl/nir/nir_lower_locals_to_regs.c | 14 ++
 src/glsl/nir/nir_lower_to_source_mods.c | 20 
 src/glsl/nir/nir_lower_vars_to_ssa.c|  3 ++-
 src/glsl/nir/nir_opt_gcm.c  | 14 ++
 src/glsl/nir/nir_opt_global_to_local.c  | 13 ++---
 src/glsl/nir/nir_opt_peephole_ffma.c|  9 -
 src/glsl/nir/nir_opt_peephole_select.c  | 10 --
 src/glsl/nir/nir_to_ssa.c   | 19 ++-
 9 files changed, 55 insertions(+), 58 deletions(-)

diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 5e7deca..436ecd6 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -345,6 +345,7 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
 
  nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
   nir_parallel_copy_entry);
+ entry->src.parent_instr = &pcopy->instr;
  nir_ssa_dest_init(&pcopy->instr, &entry->dest,
phi->dest.ssa.num_components, src->src.ssa->name);
  exec_list_push_tail(&pcopy->entries, &entry->node);
@@ -358,6 +359,7 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
 
   nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
nir_parallel_copy_entry);
+  entry->src.parent_instr = &block_pcopy->instr;
   nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
 phi->dest.ssa.num_components, phi->dest.ssa.name);
   exec_list_push_tail(&block_pcopy->entries, &entry->node);
@@ -503,7 +505,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
}
 
nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
-   assert(def->uses->entries == 0 && def->if_uses->entries == 0);
+   assert(nir_list_is_empty(&def->uses) && nir_list_is_empty(&def->if_uses));
 
if (def->parent_instr->type == nir_instr_type_ssa_undef)
   return true;
@@ -515,12 +517,9 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
 */
nir_dest *dest = exec_node_data(nir_dest, def, ssa);
 
-   _mesa_set_destroy(dest->ssa.uses, NULL);
-   _mesa_set_destroy(dest->ssa.if_uses, NULL);
-
*dest = nir_dest_for_reg(reg);
-
-   _mesa_set_add(reg->defs, state->instr);
+   dest->reg.parent_instr = state->instr;
+   nir_list_push_tail(®->defs, &dest->reg.def_link);
 
return true;
 }
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c 
b/src/glsl/nir/nir_lower_locals_to_regs.c
index bc6a3d3..b1ba4c5 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -269,18 +269,16 @@ lower_locals_to_regs_block(nir_block *block, void 
*void_state)
 static nir_block *
 compute_reg_usedef_lca(nir_register *reg)
 {
-   struct set_entry *entry;
nir_block *lca = NULL;
 
-   set_foreach(reg->defs, entry)
-  lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block);
+   nir_list_foreach(nir_dest, def_dest, reg.def_link, ®->defs)
+  lca = nir_dominance_lca(lca, def_dest->reg.parent_instr->block);
 
-   set_foreach(reg->uses, entry)
-  lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block);
+   nir_list_foreach(nir_src, use_src, use_link, ®->uses)
+  lca = nir_dominance_lca(lca, use_src->parent_instr->block);
 
-   set_foreach(reg->if_uses, entry) {
-  nir_if *if_stmt = (nir_if *)entry->key;
-  nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
+   nir_list_foreach(nir_src, use_src, use_link, ®->if_uses) {
+  nir_cf_node *prev_node = nir_cf_node_prev(&use_src->parent_if->cf_node);
   assert(prev_node->type == nir_cf_node_block);
   lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node));
}
diff --git a/src/glsl/nir/nir_lower_to_source_mods.c 
b/src/glsl/nir/nir_lower_to_source_mods.c
index 7b4a0f6..33a2a27 100644
--- a/src/glsl/nir/nir_lower_to_source_mods.c
+++ b/src/glsl/nir/nir_lower_to_source_mods.c
@@ -88,8 +88,8 @@ nir_lower_to_source_mods_block(nir_block *block, void *state)
 alu->src[i].swizzle[j] = 
parent->src[0].swizzle[alu->src[i].swizzle[j]];
  }
 
- if (parent->dest.dest.ssa.uses->entries == 0 &&
- parent->dest.dest.ssa.if_uses->entries == 0)
+ if (nir_list_is_empty(&parent->dest.dest.ssa.uses) &&
+ nir_list_is_empty(&parent->dest.dest.ssa.if_uses))
 nir_instr_remove(&parent->instr);
   }
 
@@ -131,13 +131,13 @@ nir_lower_to_source_mods_block(nir_block *block, void 
*state)
   if (nir_op_infos[alu->op].output_type != nir_type_float)
  continue;
 
-  if (alu->dest.dest.ssa.if_uses->entries != 0)
+  if (!nir_list_is_empty(&alu->dest.dest.ssa.if_uses))
  continue;
 
   bool all_children_are_sat = true;
-  struct set_entry *entry;
-  set_foreach(alu->dest.dest.ssa.uses, entry) {
-  

[Mesa-dev] [RFC 7/9] nir/nir: Use a linked list instead of a has set for use/def sets

2015-04-24 Thread Jason Ekstrand
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists.  Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value.  It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.

On the code complexity side of things, some things are now much easier and
others are a bit harder.  One of the operations we perform constantly in
optimization passes is to replace one source with another.  Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely.  On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult.  Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.

Another aspect here is that using linked lists in this way can be tricky to
get right.  With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator.  With linked lists, it can lead to linked list corruption
which can be harder to track.  However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems.  While working on this series, the vast majority of the
bugs I had to fix were caught by assertions.  I don't think the lists are
going to be that much worse than the sets.
---
 src/glsl/nir/nir.c  | 232 
 src/glsl/nir/nir.h  |  27 --
 src/glsl/nir/nir_validate.c | 158 +++---
 3 files changed, 182 insertions(+), 235 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index b8f5dd4..283b861 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -58,12 +58,9 @@ reg_create(void *mem_ctx, struct exec_list *list)
nir_register *reg = ralloc(mem_ctx, nir_register);
 
reg->parent_instr = NULL;
-   reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
-   reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
-   reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
-   _mesa_key_pointer_equal);
+   nir_list_init(®->uses);
+   nir_list_init(®->defs);
+   nir_list_init(®->if_uses);
 
reg->num_components = 0;
reg->num_array_elems = 0;
@@ -1070,11 +1067,14 @@ update_if_uses(nir_cf_node *node)
 
nir_if *if_stmt = nir_cf_node_as_if(node);
 
-   struct set *if_uses_set = if_stmt->condition.is_ssa ?
- if_stmt->condition.ssa->if_uses :
- if_stmt->condition.reg.reg->uses;
-
-   _mesa_set_add(if_uses_set, if_stmt);
+   if_stmt->condition.parent_if = if_stmt;
+   if (if_stmt->condition.is_ssa) {
+  nir_list_push_tail(&if_stmt->condition.ssa->if_uses,
+ &if_stmt->condition.use_link);
+   } else {
+  nir_list_push_tail(&if_stmt->condition.reg.reg->if_uses,
+ &if_stmt->condition.use_link);
+   }
 }
 
 void
@@ -1227,16 +1227,7 @@ cleanup_cf_node(nir_cf_node *node)
   foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
  cleanup_cf_node(child);
 
-  struct set *if_uses;
-  if (if_stmt->condition.is_ssa) {
- if_uses = if_stmt->condition.ssa->if_uses;
-  } else {
- if_uses = if_stmt->condition.reg.reg->if_uses;
-  }
-
-  struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
-  assert(entry);
-  _mesa_set_remove(if_uses, entry);
+  nir_link_remove(&if_stmt->condition.use_link);
   break;
}
 
@@ -1293,9 +1284,10 @@ add_use_cb(nir_src *src, void *state)
 {
nir_instr *instr = state;
 
-   struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses;
-
-   _mesa_set_add(uses_set, instr);
+   src->parent_instr = instr;
+   nir_link_init(&src->use_link);
+   nir_list *uses_list = src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses;
+   nir_list_push_tail(uses_list, &src->use_link);
 
return true;
 }
@@ -1320,8 +1312,11 @@ add_reg_def_cb(nir_dest *dest, void *state)
 {
nir_instr *instr = state;
 
-   if (!dest->is_ssa)
-  _mesa_set_add(dest->reg.reg->defs, instr);
+   if (!dest->is_ssa) {
+  dest->reg.parent_instr = instr;
+  nir_link_init(&dest->reg.def_link);
+  nir_list_push_tail(&dest->reg.reg->defs, &dest->reg.def_link);
+   }
 
return true;
 }
@@ -1436,13 +1431,7 @@ nir_instr_insert_after_cf_list(struct exec_li

[Mesa-dev] [RFC 3/9] nir: Add and use initializer #defines for nir_src and nir_dest

2015-04-24 Thread Jason Ekstrand
---
 src/glsl/nir/glsl_to_nir.cpp   |  2 +-
 src/glsl/nir/nir.h | 13 +++--
 src/glsl/nir/nir_builder.h |  6 ++
 src/glsl/nir/nir_lower_samplers.cpp|  5 ++---
 src/glsl/nir/nir_lower_tex_projector.c |  5 ++---
 src/glsl/nir/nir_search.c  |  2 +-
 6 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index f6b8331..ed70508 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -824,7 +824,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
nir_dest *dest = get_instr_dest(this->result);
 
assert(dest->is_ssa);
-   nir_src src;
+   nir_src src = NIR_SRC_INIT;
src.is_ssa = true;
src.ssa = &dest->ssa;
 
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 98b0ec3..a174666 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -497,6 +497,8 @@ typedef struct nir_src {
bool is_ssa;
 } nir_src;
 
+#define NIR_SRC_INIT (nir_src) { { { NULL } } }
+
 typedef struct {
union {
   nir_reg_dest reg;
@@ -506,10 +508,12 @@ typedef struct {
bool is_ssa;
 } nir_dest;
 
+#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+
 static inline nir_src
 nir_src_for_ssa(nir_ssa_def *def)
 {
-   nir_src src;
+   nir_src src = NIR_SRC_INIT;
 
src.is_ssa = true;
src.ssa = def;
@@ -520,7 +524,7 @@ nir_src_for_ssa(nir_ssa_def *def)
 static inline nir_src
 nir_src_for_reg(nir_register *reg)
 {
-   nir_src src;
+   nir_src src = NIR_SRC_INIT;
 
src.is_ssa = false;
src.reg.reg = reg;
@@ -543,12 +547,9 @@ nir_src_get_parent_instr(const nir_src *src)
 static inline nir_dest
 nir_dest_for_reg(nir_register *reg)
 {
-   nir_dest dest;
+   nir_dest dest = NIR_DEST_INIT;
 
-   dest.is_ssa = false;
dest.reg.reg = reg;
-   dest.reg.indirect = NULL;
-   dest.reg.base_offset = 0;
 
return dest;
 }
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index d1419ee..9223e83 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -231,8 +231,7 @@ static inline nir_ssa_def *
 nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
 unsigned num_components, bool use_fmov)
 {
-   nir_alu_src alu_src;
-   memset(&alu_src, 0, sizeof(alu_src));
+   nir_alu_src alu_src = { NIR_SRC_INIT };
alu_src.src = nir_src_for_ssa(src);
for (int i = 0; i < 4; i++)
   alu_src.swizzle[i] = swiz[i];
@@ -251,8 +250,7 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int 
num_components)
if (src.is_ssa && src.ssa->num_components == num_components)
   return src.ssa;
 
-   nir_alu_src alu;
-   memset(&alu, 0, sizeof(alu));
+   nir_alu_src alu = { NIR_SRC_INIT };
alu.src = src;
for (int j = 0; j < 4; j++)
   alu.swizzle[j] = j;
diff --git a/src/glsl/nir/nir_lower_samplers.cpp 
b/src/glsl/nir/nir_lower_samplers.cpp
index cf8ab83..e9e152f 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -95,9 +95,8 @@ lower_sampler(nir_tex_instr *instr, const struct 
gl_shader_program *shader_progr
 
 instr->sampler_array_size = glsl_get_length(deref->type);
 
-nir_src empty;
-memset(&empty, 0, sizeof empty);
-nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, 
empty);
+nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+  NIR_SRC_INIT);
 
 if (deref_array->deref.child)
ralloc_strcat(&name, "[0]");
diff --git a/src/glsl/nir/nir_lower_tex_projector.c 
b/src/glsl/nir/nir_lower_tex_projector.c
index 6b0e9c3..6e2cc80 100644
--- a/src/glsl/nir/nir_lower_tex_projector.c
+++ b/src/glsl/nir/nir_lower_tex_projector.c
@@ -109,9 +109,8 @@ nir_lower_tex_projector_block(nir_block *block, void 
*void_state)
   /* Now move the later tex sources down the array so that the projector
* disappears.
*/
-  nir_src dead;
-  memset(&dead, 0, sizeof dead);
-  nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead);
+  nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
+NIR_SRC_INIT);
   memmove(&tex->src[proj_index],
   &tex->src[proj_index + 1],
   (tex->num_srcs - proj_index) * sizeof(*tex->src));
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index 5ba0160..490b31f 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -276,7 +276,7 @@ construct_value(const nir_search_value *value, nir_alu_type 
type,
   const nir_search_variable *var = nir_search_value_as_variable(value);
   assert(state->variables_seen & (1 << var->variable));
 
-  nir_alu_src val;
+  nir_alu_src val = { NIR_SRC_INIT };
   nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx);
 
   assert(!var->is_constant);
-- 
2.3.6

___
mesa

[Mesa-dev] [RFC 0/9] Use linked lists for use/def sets in NIR

2015-04-24 Thread Jason Ekstrand
This patch series is an experiment that I ran this week to see what would
happen if we used a linked list instead of a hash set for use/def sets in
NIR.  The first 5 patches are cleanups that we probably want anyway.  The
6th adds a C-based linked list to NIR. The last 3, which need to be
squashed together, actually make the change to linked lists.  I'll let you
read the commit messages on 6 and 7 for the important details there.

Jason Ekstrand (9):
  nir/validate: Validate SSA def parent instructiosn
  nir: Modernize the out-of-SSA pass
  nir: Add and use initializer #defines for nir_src and nir_dest
  nir: Add a function for rewriting the condition of an if statement
  nir: Use nir_instr_rewrite_src in copy propagation
  nir: Add an entirely C-based linked list implementation
  nir/nir: Use a linked list instead of a has set for use/def sets
  SQUASH: nir: Add a helper for moving a source and use it in texture
lowering
  SQUASH: nir: Update various components for the new list-based use/def
sets

 src/glsl/Makefile.sources   |   1 +
 src/glsl/nir/glsl_to_nir.cpp|   2 +-
 src/glsl/nir/nir.c  | 230 ++--
 src/glsl/nir/nir.h  |  40 --
 src/glsl/nir/nir_builder.h  |   6 +-
 src/glsl/nir/nir_from_ssa.c | 142 +---
 src/glsl/nir/nir_list.h | 183 +
 src/glsl/nir/nir_lower_locals_to_regs.c |  14 +-
 src/glsl/nir/nir_lower_samplers.cpp |  30 +++--
 src/glsl/nir/nir_lower_tex_projector.c  |  12 +-
 src/glsl/nir/nir_lower_to_source_mods.c |  20 +--
 src/glsl/nir/nir_lower_vars_to_ssa.c|   3 +-
 src/glsl/nir/nir_opt_copy_propagate.c   |  71 ++
 src/glsl/nir/nir_opt_gcm.c  |  14 +-
 src/glsl/nir/nir_opt_global_to_local.c  |  13 +-
 src/glsl/nir/nir_opt_peephole_ffma.c|   9 +-
 src/glsl/nir/nir_opt_peephole_select.c  |  10 +-
 src/glsl/nir/nir_search.c   |   2 +-
 src/glsl/nir/nir_to_ssa.c   |  19 +--
 src/glsl/nir/nir_validate.c | 160 +++---
 20 files changed, 518 insertions(+), 463 deletions(-)
 create mode 100644 src/glsl/nir/nir_list.h

-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 2/9] nir: Modernize the out-of-SSA pass

2015-04-24 Thread Jason Ekstrand
The out-of-SSA pass was one of the first passes written when getting SSA
up-and-going (for obvious reasons).  As such, it came before a lot of the
nifty SSA-based helpers were introduced.  This commit modernizes it so that
we're no longer doing nearly as much manual banging on use/def sets.
---
 src/glsl/nir/nir_from_ssa.c | 139 
 1 file changed, 36 insertions(+), 103 deletions(-)

diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 184698a..5e7deca 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -37,7 +37,6 @@
 struct from_ssa_state {
void *mem_ctx;
void *dead_ctx;
-   struct hash_table *ssa_table;
struct hash_table *merge_node_table;
nir_instr *instr;
nir_function_impl *impl;
@@ -344,45 +343,31 @@ isolate_phi_nodes_block(nir_block *block, void 
*void_state)
 get_parallel_copy_at_end_of_block(src->pred);
  assert(pcopy);
 
- nir_parallel_copy_entry *entry = ralloc(state->dead_ctx,
- nir_parallel_copy_entry);
- exec_list_push_tail(&pcopy->entries, &entry->node);
-
- nir_src_copy(&entry->src, &src->src, state->dead_ctx);
- _mesa_set_add(src->src.ssa->uses, &pcopy->instr);
-
+ nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
+  nir_parallel_copy_entry);
  nir_ssa_dest_init(&pcopy->instr, &entry->dest,
phi->dest.ssa.num_components, src->src.ssa->name);
+ exec_list_push_tail(&pcopy->entries, &entry->node);
 
- struct set_entry *use_entry =
-_mesa_set_search(src->src.ssa->uses, instr);
- if (use_entry)
-/* It is possible that a phi node can use the same source twice
- * but for different basic blocks.  If that happens, entry will
- * be NULL because we already deleted it.  This is safe
- * because, by the time the loop is done, we will have deleted
- * all of the sources of the phi from their respective use sets
- * and moved them to the parallel copy definitions.
- */
-_mesa_set_remove(src->src.ssa->uses, use_entry);
+ assert(src->src.is_ssa);
+ nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src);
 
- src->src.ssa = &entry->dest.ssa;
- _mesa_set_add(entry->dest.ssa.uses, instr);
+ nir_instr_rewrite_src(&phi->instr, &src->src,
+   nir_src_for_ssa(&entry->dest.ssa));
   }
 
-  nir_parallel_copy_entry *entry = ralloc(state->dead_ctx,
-  nir_parallel_copy_entry);
-  exec_list_push_tail(&block_pcopy->entries, &entry->node);
-
+  nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
+   nir_parallel_copy_entry);
   nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
 phi->dest.ssa.num_components, phi->dest.ssa.name);
+  exec_list_push_tail(&block_pcopy->entries, &entry->node);
+
   nir_ssa_def_rewrite_uses(&phi->dest.ssa,
nir_src_for_ssa(&entry->dest.ssa),
state->mem_ctx);
 
-  entry->src.is_ssa = true;
-  entry->src.ssa = &phi->dest.ssa;
-  _mesa_set_add(phi->dest.ssa.uses, &block_pcopy->instr);
+  nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
+nir_src_for_ssa(&phi->dest.ssa));
}
 
return true;
@@ -472,12 +457,16 @@ agressive_coalesce_block(nir_block *block, void 
*void_state)
return true;
 }
 
-static nir_register *
-get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
+static bool
+rewrite_ssa_def(nir_ssa_def *def, void *void_state)
 {
+   struct from_ssa_state *state = void_state;
+   nir_register *reg;
+
struct hash_entry *entry =
   _mesa_hash_table_search(state->merge_node_table, def);
if (entry) {
+  /* In this case, we're part of a phi web.  Use the web's register. */
   merge_node *node = (merge_node *)entry->data;
 
   /* If it doesn't have a register yet, create one.  Note that all of
@@ -491,20 +480,15 @@ get_register_for_ssa_def(nir_ssa_def *def, struct 
from_ssa_state *state)
  node->set->reg->num_array_elems = 0;
   }
 
-  return node->set->reg;
-   }
-
-   entry = _mesa_hash_table_search(state->ssa_table, def);
-   if (entry) {
-  return (nir_register *)entry->data;
+  reg = node->set->reg;
} else {
   /* We leave load_const SSA values alone.  They act as immediates to
* the backend.  If it got coalesced into a phi, that's ok.
*/
   if (def->parent_instr->type == nir_instr_type_load_const)
- return NULL;
+ return true;
 
-  nir_register *reg = nir_local_reg_create(state->impl);
+  reg = nir_loc

[Mesa-dev] [RFC 6/9] nir: Add an entirely C-based linked list implementation

2015-04-24 Thread Jason Ekstrand
This commit adds a C-based linked list implementation for NIR.  Unlike
exec_list in glsl/list.h, there is no C++ API.  Also, this list is based on
wl_list (from the Wayland project) which is, in turn, based on the kernel
list.  As such, it should be fairly familiar to people who have done
anything in kernel space.

Doesn't exec_list already have a C api?

Yes, it does.  However, exec_list has C++ constructors for exec_list and
exec_node.  In the patches that follow, I use linked lists for use/def sets
for registers and SSA values.  In order to do so, I have to be able to
place lists and links inside of unions.  Since exec_list and exec_node have
constructors, doing so causes any C++ code that includes nir.h to die in a
fire.  Therefore, we can't just use exec_list.

What about simple_list?  Why re-create it?

I thought about that too.  However, the simple_list is badly named and the
API isn't that great.  Making it usable as a first-class datastructure
would have taken as much work as adding nir_list.  Also, simple_list isn't
really a standard as it's only ever used in errors.c and the vc4 driver.

Why a kernel list; why not keep the symantics of exec_list?

The short version:  I like it better.  Also, while exec_list is familiar to
people who have worked inside the mesa GLSL compiler, I think that the
kernel list will be more familiar to people in the open-source graphics
community in general.  For whatever it's worth, I explicitly designed it
with separate nir_list and nir_link structures so that we can switch from
kernel list to exec_list symantics if we want to.

Why put this in NIR and not in util?

At the moment, NIR is the only user.  I do expect that Eric may want to use
it in vc4 over simple_list.  However, vc4 is already using NIR anyway, so
it's not really that polluting.

It has also been suggested by Ken that we just pull the C bits out of
exec_list and keep one underlying implementation for both C and C++ only
with different names.  While I think that this is definitely doable and may
be the best long-term solution, I didn't want to do that refactoring prior
to getting this series up-and-going and adding a list was easier.  I'm ok
with doing that instead of adding a list.
---
 src/glsl/Makefile.sources |   1 +
 src/glsl/nir/nir_list.h   | 183 ++
 2 files changed, 184 insertions(+)
 create mode 100644 src/glsl/nir/nir_list.h

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index c471eca..fa51dcb 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -28,6 +28,7 @@ NIR_FILES = \
nir/nir_from_ssa.c \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
+   nir/nir_list.h \
nir/nir_live_variables.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
diff --git a/src/glsl/nir/nir_list.h b/src/glsl/nir/nir_list.h
new file mode 100644
index 000..330a660
--- /dev/null
+++ b/src/glsl/nir/nir_list.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Jason Ekstrand (ja...@jlekstrand.net)
+ *
+ */
+
+#pragma once
+
+#ifndef _NIR_LIST_H_
+#define _NIR_LIST_H_
+
+/** A simple linked list implementation.
+ *
+ * This linked list implementation is based on wl_list from the Wayland
+ * project which is, in turn, based on the kernel list.  As such, it should
+ * be fairly familiar to anyone who has worked in kernel space.
+ */
+
+/* Required for exec_node_data */
+#include "../list.h"
+
+struct nir_link;
+
+/** \class nir_list
+ *
+ * \brief doubly-linked list
+ *
+ * The list head is of type nir_list and must be initialized using
+ * nir_list_init().  All entries in the list must be of the same type.  The
+ * item type must have a nir_link member which must be initialized to zero.
+ * To query if the list is empty in O(1), use nir_l

[Mesa-dev] [RFC 5/9] nir: Use nir_instr_rewrite_src in copy propagation

2015-04-24 Thread Jason Ekstrand
We were rolling our own rewrite_src variant in copy-propagation.  Let's
stop doing that and use the ones in core NIR.
---
 src/glsl/nir/nir_opt_copy_propagate.c | 71 +--
 1 file changed, 10 insertions(+), 61 deletions(-)

diff --git a/src/glsl/nir/nir_opt_copy_propagate.c 
b/src/glsl/nir/nir_opt_copy_propagate.c
index ee78e5a..71367d0 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -93,62 +93,6 @@ is_swizzleless_move(nir_alu_instr *instr)
}
 }
 
-typedef struct {
-   nir_ssa_def *def;
-   bool found;
-} search_def_state;
-
-static bool
-search_def(nir_src *src, void *_state)
-{
-   search_def_state *state = (search_def_state *) _state;
-
-   if (src->is_ssa && src->ssa == state->def)
-  state->found = true;
-
-   return true;
-}
-
-static void
-rewrite_src_instr(nir_src *src, nir_ssa_def *new_def, nir_instr *parent_instr)
-{
-   nir_ssa_def *old_def = src->ssa;
-
-   src->ssa = new_def;
-
-   /*
-* The instruction could still use the old definition in one of its other
-* sources, so only remove the instruction from the uses if there are no
-* more uses left.
-*/
-
-   search_def_state search_state;
-   search_state.def = old_def;
-   search_state.found = false;
-   nir_foreach_src(parent_instr, search_def, &search_state);
-   if (!search_state.found) {
-  struct set_entry *entry = _mesa_set_search(old_def->uses, parent_instr);
-  assert(entry);
-  _mesa_set_remove(old_def->uses, entry);
-   }
-
-   _mesa_set_add(new_def->uses, parent_instr);
-}
-
-static void
-rewrite_src_if(nir_if *if_stmt, nir_ssa_def *new_def)
-{
-   nir_ssa_def *old_def = if_stmt->condition.ssa;
-
-   if_stmt->condition.ssa = new_def;
-
-   struct set_entry *entry = _mesa_set_search(old_def->if_uses, if_stmt);
-   assert(entry);
-   _mesa_set_remove(old_def->if_uses, entry);
-
-   _mesa_set_add(new_def->if_uses, if_stmt);
-}
-
 static bool
 copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
 {
@@ -178,10 +122,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, 
nir_if *parent_if)
  return false;
}
 
-   if (parent_instr)
-  rewrite_src_instr(src, alu_instr->src[0].src.ssa, parent_instr);
-   else
-  rewrite_src_if(parent_if, alu_instr->src[0].src.ssa);
+   if (parent_instr) {
+  nir_instr_rewrite_src(parent_instr, src,
+nir_src_for_ssa(alu_instr->src[0].src.ssa));
+   } else {
+  assert(src == &parent_if->condition);
+  nir_if_rewrite_condition(parent_if,
+   nir_src_for_ssa(alu_instr->src[0].src.ssa));
+   }
 
return true;
 }
@@ -234,7 +182,8 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned 
index)
for (unsigned i = 0; i < 4; i++)
   src->swizzle[i] = new_swizzle[i];
 
-   rewrite_src_instr(&src->src, def, &parent_alu_instr->instr);
+   nir_instr_rewrite_src(&parent_alu_instr->instr, &src->src,
+ nir_src_for_ssa(def));
 
return true;
 }
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 8/9] SQUASH: nir: Add a helper for moving a source and use it in texture lowering

2015-04-24 Thread Jason Ekstrand
One of the side-effects of using a linked list for use/def sets is that you
can no longer simply copy them around.  There were a couple of places in
the texture lowering passes that realloced or used memmove on lists of
sources.  Instead, this commit adds a helper for moving a source and uses
it for these cases.
---
 src/glsl/nir/nir.c | 12 
 src/glsl/nir/nir.h |  1 +
 src/glsl/nir/nir_lower_samplers.cpp| 29 +++--
 src/glsl/nir/nir_lower_tex_projector.c |  7 ---
 4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 283b861..b8f768f 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1872,6 +1872,18 @@ nir_instr_rewrite_src(nir_instr *instr, nir_src *src, 
nir_src new_src)
 }
 
 void
+nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
+{
+   assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
+
+   src_remove_all_uses(dest);
+   src_remove_all_uses(src);
+   *dest = *src;
+   *src = NIR_SRC_INIT;
+   src_add_all_uses(dest, dest_instr, NULL);
+}
+
+void
 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
 {
nir_src *src = &if_stmt->condition;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 364071d..a64b534 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1562,6 +1562,7 @@ bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb 
cb, void *state);
 nir_const_value *nir_src_as_const_value(nir_src src);
 bool nir_srcs_equal(nir_src src1, nir_src src2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
 
 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
diff --git a/src/glsl/nir/nir_lower_samplers.cpp 
b/src/glsl/nir/nir_lower_samplers.cpp
index e9e152f..8fc5909 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -83,21 +83,30 @@ lower_sampler(nir_tex_instr *instr, const struct 
gl_shader_program *shader_progr
ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
 break;
  case nir_deref_array_type_indirect: {
-instr->src = reralloc(instr, instr->src, nir_tex_src,
-  instr->num_srcs + 1);
-memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
+/* First, we have to resize the array of texture sources */
+nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+  instr->num_srcs + 1);
+
+for (unsigned i = 0; i < instr->num_srcs; i++) {
+   new_srcs[i].src_type = instr->src[i].src_type;
+   nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+  &instr->src[i].src);
+}
+
+ralloc_free(instr->src);
+instr->src = new_srcs;
+
+/* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
 instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
 instr->num_srcs++;
-
-nir_instr_rewrite_src(&instr->instr,
-  &instr->src[instr->num_srcs - 1].src,
-  deref_array->indirect);
+nir_instr_move_src(&instr->instr,
+   &instr->src[instr->num_srcs - 1].src,
+   &deref_array->indirect);
 
 instr->sampler_array_size = glsl_get_length(deref->type);
 
-nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
-  NIR_SRC_INIT);
-
 if (deref_array->deref.child)
ralloc_strcat(&name, "[0]");
 break;
diff --git a/src/glsl/nir/nir_lower_tex_projector.c 
b/src/glsl/nir/nir_lower_tex_projector.c
index 6e2cc80..357131c 100644
--- a/src/glsl/nir/nir_lower_tex_projector.c
+++ b/src/glsl/nir/nir_lower_tex_projector.c
@@ -111,9 +111,10 @@ nir_lower_tex_projector_block(nir_block *block, void 
*void_state)
*/
   nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
 NIR_SRC_INIT);
-  memmove(&tex->src[proj_index],
-  &tex->src[proj_index + 1],
-  (tex->num_srcs - proj_index) * sizeof(*tex->src));
+  for (int i = proj_index + 1; i < tex->num_srcs; i++) {
+ tex->src[i-1].src_type = tex->src[i].src_type;
+ nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
+  }
   tex->num_srcs--;
}
 
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 1/9] nir/validate: Validate SSA def parent instructiosn

2015-04-24 Thread Jason Ekstrand
---
 src/glsl/nir/nir_validate.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index a7aa798..35a853d 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -236,6 +236,8 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state)
assert(!BITSET_TEST(state->ssa_defs_found, def->index));
BITSET_SET(state->ssa_defs_found, def->index);
 
+   assert(def->parent_instr == state->instr);
+
assert(def->num_components <= 4);
 
ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 4/9] nir: Add a function for rewriting the condition of an if statement

2015-04-24 Thread Jason Ekstrand
---
 src/glsl/nir/nir.c | 22 ++
 src/glsl/nir/nir.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 4cc074b..b8f5dd4 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1895,6 +1895,28 @@ nir_instr_rewrite_src(nir_instr *instr, nir_src *src, 
nir_src new_src)
 }
 
 void
+nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
+{
+   for (nir_src *src = &if_stmt->condition; src;
+src = src->is_ssa ? NULL : src->reg.indirect) {
+  struct set *uses = src->is_ssa ? src->ssa->if_uses
+ : src->reg.reg->if_uses;
+  struct set_entry *entry = _mesa_set_search(uses, if_stmt);
+  assert(entry);
+  _mesa_set_remove(uses, entry);
+   }
+
+   if_stmt->condition = new_src;
+
+   for (nir_src *src = &if_stmt->condition; src;
+src = src->is_ssa ? NULL : src->reg.indirect) {
+  struct set *uses = src->is_ssa ? src->ssa->if_uses
+ : src->reg.reg->if_uses;
+  _mesa_set_add(uses, if_stmt);
+   }
+}
+
+void
 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
  unsigned num_components, const char *name)
 {
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index a174666..aaf1c57 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1549,6 +1549,7 @@ bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb 
cb, void *state);
 nir_const_value *nir_src_as_const_value(nir_src src);
 bool nir_srcs_equal(nir_src src1, nir_src src2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
 
 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, const char *name);
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: update stencil surface if it comes from texture

2015-04-24 Thread Ilia Mirkin
Now that ARB_texture_stencil8 is supported, this might happen.

Signed-off-by: Ilia Mirkin 
---
 src/mesa/state_tracker/st_atom_framebuffer.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c 
b/src/mesa/state_tracker/st_atom_framebuffer.c
index b195c55..ae883a2 100644
--- a/src/mesa/state_tracker/st_atom_framebuffer.c
+++ b/src/mesa/state_tracker/st_atom_framebuffer.c
@@ -134,7 +134,10 @@ update_framebuffer_state( struct st_context *st )
else {
   strb = st_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer);
   if (strb) {
- assert(strb->surface);
+ if (strb->is_rtt) {
+/* rendering to a GL texture, may have to update surface */
+st_update_renderbuffer_surface(st, strb);
+ }
  pipe_surface_reference(&framebuffer->zsbuf, strb->surface);
  update_framebuffer_size(framebuffer, strb->surface);
   }
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 13/22] i965: Use BRW_SURFACE_* in place of GL_TEXTURE_*

2015-04-24 Thread Anuj Phogat
On Thu, Apr 23, 2015 at 6:10 PM, Pohjolainen, Topi
 wrote:
> On Fri, Apr 17, 2015 at 04:51:34PM -0700, Anuj Phogat wrote:
>> Makes no functional changes in the code.
>>
>> Signed-off-by: Anuj Phogat 
>> Reviewed-by: Chris Forbes 
>> ---
>>  src/mesa/drivers/dri/i965/gen8_surface_state.c | 15 +--
>>  1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
>> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> index d2eceff..1ec57c0 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> @@ -172,6 +172,7 @@ gen8_update_texture_surface(struct gl_context *ctx,
>> struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
>> struct intel_mipmap_tree *aux_mt = NULL;
>> uint32_t aux_mode = 0;
>> +   uint32_t surf_type;
>> mesa_format format = intelObj->_Format;
>> uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
>>
>> @@ -201,29 +202,31 @@ gen8_update_texture_surface(struct gl_context *ctx,
>>aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
>> }
>>
>> +   surf_type = translate_tex_target(tObj->Target);
>> +
>
> I would rather just write:
>
>  const uint32_t surf_type = translate_tex_target(tObj->Target);
>
>> /* If this is a view with restricted NumLayers, then our effective depth
>>  * is not just the miptree depth.
>>  */
>> uint32_t effective_depth =
>> -  (tObj->Immutable && tObj->Target != GL_TEXTURE_3D) ? tObj->NumLayers
>> - : 
>> mt->logical_depth0;
>> +  (tObj->Immutable && surf_type != BRW_SURFACE_3D) ? tObj->NumLayers
>> +   : mt->logical_depth0;
>>
>> uint32_t tex_format = translate_tex_format(brw, format, 
>> sampler->sRGBDecode);
>>
>> uint32_t *surf = allocate_surface_state(brw, surf_offset);
>>
>> -   surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
>> +
>
> I guess you didn't mean to have extra newline here.
>
>> +   surf[0] = surf_type << BRW_SURFACE_TYPE_SHIFT |
>
> Lets use SET_FIELD(surf_type, BRW_SURFACE_TYPE) instead.
OK
>
> With those:
>
> Reviewed-by: Topi Pohjolainen 
>
> Having said that, would you mind rebasing this on top of my series that
> moves all the decision making common to all hardware platforms into one
> place?
>
No problem. I'll coordinate with you on this.

>>   tex_format << BRW_SURFACE_FORMAT_SHIFT |
>>   vertical_alignment(mt) |
>>   horizontal_alignment(mt) |
>>   tiling_mode;
>>
>> -   if (tObj->Target == GL_TEXTURE_CUBE_MAP ||
>> -   tObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
>> +   if (surf_type == BRW_SURFACE_CUBE) {
>>surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
>> }
>>
>> -   if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D)
>> +   if (mt->logical_depth0 > 1 && surf_type != BRW_SURFACE_3D)
>>surf[0] |= GEN8_SURFACE_IS_ARRAY;
>>
>> surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
>> --
>> 2.3.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/13] i965: Add typed surface access opcodes.

2015-04-24 Thread Francisco Jerez
"Pohjolainen, Topi"  writes:

> On Fri, Feb 27, 2015 at 05:34:55PM +0200, Francisco Jerez wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_defines.h|   4 +
>>  src/mesa/drivers/dri/i965/brw_eu.h |  24 +++
>>  src/mesa/drivers/dri/i965/brw_eu_emit.c| 169 
>> +
>>  src/mesa/drivers/dri/i965/brw_fs.cpp   |  12 ++
>>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  17 +++
>>  .../drivers/dri/i965/brw_schedule_instructions.cpp |   3 +
>>  src/mesa/drivers/dri/i965/brw_shader.cpp   |   8 +
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp |   6 +
>>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   |  18 +++
>>  9 files changed, 261 insertions(+)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
>> b/src/mesa/drivers/dri/i965/brw_defines.h
>> index e56f49c..cf07da9 100644
>> --- a/src/mesa/drivers/dri/i965/brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>> @@ -906,6 +906,10 @@ enum opcode {
>> SHADER_OPCODE_UNTYPED_SURFACE_READ,
>> SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
>>  
>> +   SHADER_OPCODE_TYPED_ATOMIC,
>> +   SHADER_OPCODE_TYPED_SURFACE_READ,
>> +   SHADER_OPCODE_TYPED_SURFACE_WRITE,
>> +
>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>> SHADER_OPCODE_GEN4_SCRATCH_WRITE,
>> SHADER_OPCODE_GEN7_SCRATCH_READ,
>> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
>> b/src/mesa/drivers/dri/i965/brw_eu.h
>> index cad956b..ce9554b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_eu.h
>> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
>> @@ -421,6 +421,30 @@ brw_untyped_surface_write(struct brw_compile *p,
>>unsigned num_channels);
>>  
>>  void
>> +brw_typed_atomic(struct brw_compile *p,
>> + struct brw_reg dst,
>> + struct brw_reg payload,
>> + struct brw_reg surface,
>> + unsigned atomic_op,
>> + unsigned msg_length,
>> + bool response_expected);
>> +
>> +void
>> +brw_typed_surface_read(struct brw_compile *p,
>> +   struct brw_reg dst,
>> +   struct brw_reg payload,
>> +   struct brw_reg surface,
>> +   unsigned msg_length,
>> +   unsigned num_channels);
>> +
>> +void
>> +brw_typed_surface_write(struct brw_compile *p,
>> +struct brw_reg payload,
>> +struct brw_reg surface,
>> +unsigned msg_length,
>> +unsigned num_channels);
>> +
>> +void
>>  brw_pixel_interpolator_query(struct brw_compile *p,
>>   struct brw_reg dest,
>>   struct brw_reg mrf,
>> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
>> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>> index f5b8fa9..74f1fc1 100644
>> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
>> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>> @@ -2944,6 +2944,175 @@ brw_untyped_surface_write(struct brw_compile *p,
>>p, insn, num_channels);
>>  }
>>  
>> +static void
>> +brw_set_dp_typed_atomic_message(struct brw_compile *p,
>> +struct brw_inst *insn,
>> +unsigned atomic_op,
>> +bool response_expected)
>> +{
>> +   const struct brw_context *brw = p->brw;
>> +   unsigned msg_control =
>> +  atomic_op | /* Atomic Operation Type: BRW_AOP_* */
>> +  (response_expected ? 1 << 5 : 0); /* Return data expected */
>> +
>> +   if (brw->gen >= 8 || brw->is_haswell) {
>> +  if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) {
>> + if (brw_inst_qtr_control(brw, p->current) == GEN6_COMPRESSION_2Q)
>> +msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
>> +
>> + brw_inst_set_dp_msg_type(brw, insn,
>> +  HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP);
>> +  } else {
>> + brw_inst_set_dp_msg_type(brw, insn,
>> +  
>> HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2);
>> +  }
>> +
>> +   } else {
>> +  brw_inst_set_dp_msg_type(brw, insn,
>> +   GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
>> +
>> +  if (brw_inst_qtr_control(brw, p->current) == GEN6_COMPRESSION_2Q)
>> + msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
>> +   }
>> +
>> +   brw_inst_set_dp_msg_control(brw, insn, msg_control);
>> +}
>> +
>> +void
>> +brw_typed_atomic(struct brw_compile *p,
>> + struct brw_reg dst,
>> + struct brw_reg payload,
>> + struct brw_reg surface,
>> + unsigned atomic_op,
>> + unsigned msg_length,
>> + bool response_expected) {
>> +   const struct brw_context *brw = p->brw;
>> +   const unsigned sfid = (brw->gen >= 8 || brw->is_haswell ?
>> +

[Mesa-dev] [Bug 90167] [softpipe] piglit depthstencil-default_fb-drawpixels-32f_24_8_rev regression

2015-04-24 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=90167

Bug ID: 90167
   Summary: [softpipe] piglit
depthstencil-default_fb-drawpixels-32f_24_8_rev
regression
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Keywords: bisected, regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org
CC: airl...@freedesktop.org, bri...@vmware.com,
srol...@vmware.com

mesa: 13b2e6a520d1f8979fc4da1dd2c6811585b16203 (master 10.6.0-devel)

$ ./bin/fbo-depthstencil drawpixels default_fb 32F_24_8_REV -auto
Testing default_fb.
Testing 32F_24_8_REV.
Testing glDrawPixels(depthstencil).
Stencil at 0,0   Expected: 0x33   Observed: 0xff
Stencil at 1,0   Expected: 0x33   Observed: 0xff
Stencil at 2,0   Expected: 0x33   Observed: 0xff
Stencil at 3,0   Expected: 0x33   Observed: 0xff
Stencil at 4,0   Expected: 0x33   Observed: 0xff
Stencil at 5,0   Expected: 0x33   Observed: 0xff
Stencil at 6,0   Expected: 0x33   Observed: 0xff
Stencil at 7,0   Expected: 0x33   Observed: 0xff
Stencil at 8,0   Expected: 0x33   Observed: 0xff
Stencil at 9,0   Expected: 0x33   Observed: 0xff
Stencil at 10,0   Expected: 0x33   Observed: 0xff
Stencil at 11,0   Expected: 0x33   Observed: 0xff
Stencil at 12,0   Expected: 0x33   Observed: 0xff
Stencil at 13,0   Expected: 0x33   Observed: 0xff
Stencil at 14,0   Expected: 0x33   Observed: 0xff
Stencil at 15,0   Expected: 0x33   Observed: 0xff
Stencil at 16,0   Expected: 0x33   Observed: 0xff
Stencil at 17,0   Expected: 0x33   Observed: 0xff
Stencil at 18,0   Expected: 0x33   Observed: 0xff
...
Total failures: 14884
PIGLIT: {"result": "fail" }


8a41cd2407c1d219cba98add69609ce3a65b5435 is the first bad commit
commit 8a41cd2407c1d219cba98add69609ce3a65b5435
Author: Dave Airlie 
Date:   Wed Apr 22 17:13:06 2015 +1000

softpipe: fix stencil write to use an integer value

This fixes a number of regressions since
61393bdcdc3b63624bf6e9730444f5e9deeedfc8
u_tile: fix stencil texturing tests under softpipe

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89960
Reviewed-by: Brian Paul 
Reviewed-by: Roland Scheidegger 
Signed-off-by: Dave Airlie 

:04 04 c98019756adb79e5dd7b8b6f718cc61036c3a502
3a62a6bff9e5ea0584b4d93d0e1d81defa05cc6a Msrc
bisect run success

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 12/22] i965: Rename use_linear_1d_layout() and make it global

2015-04-24 Thread Anuj Phogat
On Fri, Apr 24, 2015 at 2:39 PM, Pohjolainen, Topi
 wrote:
> On Fri, Apr 17, 2015 at 04:51:33PM -0700, Anuj Phogat wrote:
>> This function will be utilised in later patches.
>>
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h|  4 
>>  src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 +-
>>  2 files changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index 6c168a3..3d0b35c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1938,6 +1938,10 @@ gen6_upload_push_constants(struct brw_context *brw,
>> struct brw_stage_state *stage_state,
>> enum aub_state_struct_type type);
>>
>> +bool
>> +gen9_use_linear_1d_layout(struct brw_context *brw,
>> +  struct intel_mipmap_tree *mt);
>
> I would use the opportunity to make both pointers constants - the function
> only uses them for reading. Anyway:
done
>
> Reviewed-by: Topi Pohjolainen 
>
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
>> b/src/mesa/drivers/dri/i965/brw_tex_layout.c
>> index 9342101..28927e9 100644
>> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
>> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
>> @@ -430,9 +430,9 @@ align_cube(struct intel_mipmap_tree *mt)
>>mt->total_height += 2;
>>  }
>>
>> -static bool
>> -use_linear_1d_layout(struct brw_context *brw,
>> - struct intel_mipmap_tree *mt)
>> +bool
>> +gen9_use_linear_1d_layout(struct brw_context *brw,
>> +  struct intel_mipmap_tree *mt)
>>  {
>> /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a
>>  * horizontal line. This isn't done for depth/stencil buffers however
>> @@ -458,7 +458,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
>>  {
>> int h0, h1;
>> unsigned height = mt->physical_height0;
>> -   bool layout_1d = use_linear_1d_layout(brw, mt);
>> +   bool layout_1d = gen9_use_linear_1d_layout(brw, mt);
>>
>> h0 = ALIGN(mt->physical_height0, mt->align_h);
>> h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
>> @@ -674,7 +674,7 @@ intel_miptree_total_width_height(struct brw_context *brw,
>>   break;
>>case INTEL_MSAA_LAYOUT_NONE:
>>case INTEL_MSAA_LAYOUT_IMS:
>> - if (use_linear_1d_layout(brw, mt))
>> + if (gen9_use_linear_1d_layout(brw, mt))
>>  gen9_miptree_layout_1d(mt);
>>   else
>>  brw_miptree_layout_2d(mt);
>> --
>> 2.3.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/16] st/nine: Rework texture data allocation

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 6:13 PM, Axel Davy  wrote:
> Le 25/04/2015 00:02, Ilia Mirkin a écrit :
>>
>>
>> I'm having trouble understanding what you're changing here... perhaps
>> a better commit description could go a ways to clearing things up?
>> Were you allocating it a level at a time before? How did that work, I
>> don't see a per-level structure... I'm guessing there's a bit more
>> going on here.
>>
> Yes, we were allocating per level, whereas there we allocate once for all
> levels, and get the offset.
>
> The nine_format_get_p_offset are called once per texture creation for every
> level, after the pointers are cached in the surfaces associated to the
> levels.

So that'll be n^2 calls to the size function. Which is suboptimal.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fill out the rest of brw_debug_recompile_sampler_key().

2015-04-24 Thread Pohjolainen, Topi
On Thu, Apr 23, 2015 at 11:24:11PM -0700, Kenneth Graunke wrote:
> This makes INTEL_DEBUG=perf report shader recompiles due to CMS vs.
> UMS/IMS differences and Sandybridge textureGather workarounds.
> 
> Previously, we just flagged them as "Something else".
> 
> Signed-off-by: Kenneth Graunke 

Reviewed-by: Topi Pohjolainen 

> ---
>  src/mesa/drivers/dri/i965/brw_wm.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index 959f346..45a03bb 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -266,6 +266,14 @@ brw_debug_recompile_sampler_key(struct brw_context *brw,
>old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]);
> found |= key_debug(brw, "gather channel quirk on any texture unit",
>old_key->gather_channel_quirk_mask, 
> key->gather_channel_quirk_mask);
> +   found |= key_debug(brw, "compressed multisample layout",
> +  old_key->compressed_multisample_layout_mask,
> +  key->compressed_multisample_layout_mask);
> +
> +   for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
> +  found |= key_debug(brw, "textureGather workarounds",
> + old_key->gen6_gather_wa[i], key->gen6_gather_wa[i]);
> +   }
>  
> return found;
>  }
> -- 
> 2.3.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/13] i965: Add typed surface access opcodes.

2015-04-24 Thread Pohjolainen, Topi
On Fri, Feb 27, 2015 at 05:34:55PM +0200, Francisco Jerez wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h|   4 +
>  src/mesa/drivers/dri/i965/brw_eu.h |  24 +++
>  src/mesa/drivers/dri/i965/brw_eu_emit.c| 169 
> +
>  src/mesa/drivers/dri/i965/brw_fs.cpp   |  12 ++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  17 +++
>  .../drivers/dri/i965/brw_schedule_instructions.cpp |   3 +
>  src/mesa/drivers/dri/i965/brw_shader.cpp   |   8 +
>  src/mesa/drivers/dri/i965/brw_vec4.cpp |   6 +
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   |  18 +++
>  9 files changed, 261 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index e56f49c..cf07da9 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -906,6 +906,10 @@ enum opcode {
> SHADER_OPCODE_UNTYPED_SURFACE_READ,
> SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
>  
> +   SHADER_OPCODE_TYPED_ATOMIC,
> +   SHADER_OPCODE_TYPED_SURFACE_READ,
> +   SHADER_OPCODE_TYPED_SURFACE_WRITE,
> +
> SHADER_OPCODE_GEN4_SCRATCH_READ,
> SHADER_OPCODE_GEN4_SCRATCH_WRITE,
> SHADER_OPCODE_GEN7_SCRATCH_READ,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
> b/src/mesa/drivers/dri/i965/brw_eu.h
> index cad956b..ce9554b 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -421,6 +421,30 @@ brw_untyped_surface_write(struct brw_compile *p,
>unsigned num_channels);
>  
>  void
> +brw_typed_atomic(struct brw_compile *p,
> + struct brw_reg dst,
> + struct brw_reg payload,
> + struct brw_reg surface,
> + unsigned atomic_op,
> + unsigned msg_length,
> + bool response_expected);
> +
> +void
> +brw_typed_surface_read(struct brw_compile *p,
> +   struct brw_reg dst,
> +   struct brw_reg payload,
> +   struct brw_reg surface,
> +   unsigned msg_length,
> +   unsigned num_channels);
> +
> +void
> +brw_typed_surface_write(struct brw_compile *p,
> +struct brw_reg payload,
> +struct brw_reg surface,
> +unsigned msg_length,
> +unsigned num_channels);
> +
> +void
>  brw_pixel_interpolator_query(struct brw_compile *p,
>   struct brw_reg dest,
>   struct brw_reg mrf,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index f5b8fa9..74f1fc1 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -2944,6 +2944,175 @@ brw_untyped_surface_write(struct brw_compile *p,
>p, insn, num_channels);
>  }
>  
> +static void
> +brw_set_dp_typed_atomic_message(struct brw_compile *p,
> +struct brw_inst *insn,
> +unsigned atomic_op,
> +bool response_expected)
> +{
> +   const struct brw_context *brw = p->brw;
> +   unsigned msg_control =
> +  atomic_op | /* Atomic Operation Type: BRW_AOP_* */
> +  (response_expected ? 1 << 5 : 0); /* Return data expected */
> +
> +   if (brw->gen >= 8 || brw->is_haswell) {
> +  if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) {
> + if (brw_inst_qtr_control(brw, p->current) == GEN6_COMPRESSION_2Q)
> +msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
> +
> + brw_inst_set_dp_msg_type(brw, insn,
> +  HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP);
> +  } else {
> + brw_inst_set_dp_msg_type(brw, insn,
> +  
> HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2);
> +  }
> +
> +   } else {
> +  brw_inst_set_dp_msg_type(brw, insn,
> +   GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
> +
> +  if (brw_inst_qtr_control(brw, p->current) == GEN6_COMPRESSION_2Q)
> + msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
> +   }
> +
> +   brw_inst_set_dp_msg_control(brw, insn, msg_control);
> +}
> +
> +void
> +brw_typed_atomic(struct brw_compile *p,
> + struct brw_reg dst,
> + struct brw_reg payload,
> + struct brw_reg surface,
> + unsigned atomic_op,
> + unsigned msg_length,
> + bool response_expected) {
> +   const struct brw_context *brw = p->brw;
> +   const unsigned sfid = (brw->gen >= 8 || brw->is_haswell ?
> +  HSW_SFID_DATAPORT_DATA_CACHE_1 :
> +  GEN6_SFID_DATAPORT_RENDER_CACHE);
> +   const bool align1 = (brw_inst_access_mod

Re: [Mesa-dev] [PATCH V2 06/22] i965/gen9: Set tiled resource mode for the miptree

2015-04-24 Thread Anuj Phogat
On Thu, Apr 23, 2015 at 4:51 PM, Pohjolainen, Topi
 wrote:
> On Fri, Apr 17, 2015 at 04:51:27PM -0700, Anuj Phogat wrote:
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/mesa/drivers/dri/i965/brw_tex_layout.c| 2 ++
>>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 6 ++
>>  2 files changed, 8 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
>> b/src/mesa/drivers/dri/i965/brw_tex_layout.c
>> index 68c6634..19ff5b8 100644
>> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
>> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
>> @@ -540,6 +540,8 @@ brw_miptree_layout(struct brw_context *brw,
>>  {
>> bool gen6_hiz_or_stencil = false;
>>
>> +   mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
>> +
>> if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
>>const GLenum base_format = _mesa_get_format_base_format(mt->format);
>>gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>> index b03ffe7..2669817 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
>> @@ -338,6 +338,7 @@ struct intel_mipmap_tree
>> uint32_t pitch; /**< pitch in bytes. */
>>
>> uint32_t tiling; /**< One of the I915_TILING_* flags */
>> +   uint32_t tr_mode; /**< One of the INTEL_MIPTREE_TRMODE_* flags */
>>
>> /* Effectively the key:
>>  */
>> @@ -503,6 +504,11 @@ enum intel_miptree_tiling_mode {
>> INTEL_MIPTREE_TILING_NONE,
>>  };
>>
>> +/* Tile resource modes */
>> +#define   INTEL_MIPTREE_TRMODE_NONE0
>> +#define   INTEL_MIPTREE_TRMODE_YF  1
>> +#define   INTEL_MIPTREE_TRMODE_YS  2
>> +
>
> I think we can use an enum here. We don't do that for tiling but those
> are readily shifted values to go into batches. These values here are
> instead used only by software to make runtime decisions. Enumeration
> gives us compiler warning support and debugger giving the human readable
> value instead of just number.
>
I agree. Will make the suggested change.

> With that:
>
> Reviewed-by: Topi Pohjolainen 
>
>>  bool
>>  intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
>> struct intel_mipmap_tree *mt);
>> --
>> 2.3.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/16] st/nine: Rework texture data allocation

2015-04-24 Thread Axel Davy

Le 25/04/2015 00:02, Ilia Mirkin a écrit :


I'm having trouble understanding what you're changing here... perhaps
a better commit description could go a ways to clearing things up?
Were you allocating it a level at a time before? How did that work, I
don't see a per-level structure... I'm guessing there's a bit more
going on here.

Yes, we were allocating per level, whereas there we allocate once for 
all levels, and get the offset.


The nine_format_get_p_offset are called once per texture creation for 
every level, after the pointers are cached in the surfaces associated to 
the levels.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Mesa 10.5.4

2015-04-24 Thread Emil Velikov
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Mesa 10.5.4 is now available. This release includes fixes for the mesa
state-tracker used by all the gallium drivers, a drirc workaround for
Second Life, plus i965 fixes. For the Android users out there, this release
includes many compilation fixes with more to come shortly.


Brian Paul (1):
  glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow

Dave Airlie (2):
  st/mesa: convert sub image for cube map arrays to 2d arrays for upload
  st/mesa: align cube map arrays layers

Emil Velikov (12):
  docs: Add 256 sums for the 10.5.3 release
  radeonsi: remove unused si_dump_key()
  android: use LOCAL_SHARED_LIBRARIES over TARGET_OUT_HEADERS
  android: add $(mesa_top)/src include to the whole of mesa
  android: egl: add libsync_cflags to the build
  android: dri/common: conditionally include drm_cflags/set __NOT_HAVE_DRM_H
  android: add HAVE__BUILTIN_* and HAVE_FUNC_ATTRIBUTE_* defines
  android: add $(mesa_top)/src/mesa/main to the includes list
  android: dri: link against libmesa_util
  android: mesa: fix the path of the SSE4_1 optimisations
  Update version to 10.5.4
  Add release notes for the 10.5.4 release

Ian Romanick (1):
  nir: Fix typo in "ushr by 0" algebraic replacement

Kenneth Graunke (2):
  i965: Fix software primitive restart with indirect draws.
  drirc: Add "Second Life" quirk (allow_glsl_extension_directive_midshader).

Kristian Høgsberg (1):
  i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders

Marek Olšák (2):
  glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms
  glsl_to_tgsi: don't use a potentially-undefined immediate for 
ir_query_levels

Mathias Froehlich (1):
  i965: Flush batchbuffer containing the query on glQueryCounter.

Mauro Rossi (2):
  android: mesa: generate the format_{un,}pack.[ch] sources
  android: add inital NIR build


git tag: mesa-10.5.4

ftp://ftp.freedesktop.org/pub/mesa/10.5.4/mesa-10.5.4.tar.gz
MD5: 592d79c313812b2c8d27ca71a1cfd628  mesa-10.5.4.tar.gz
SHA1: 574aacf0f439a42f2faa53bce8314f76c62d614e  mesa-10.5.4.tar.gz
SHA256: e1089567fc7bf8d9b2d8badcc9f2fc3b758701c8c0ccfe7af1805549fea53f11  
mesa-10.5.4.tar.gz
PGP: ftp://ftp.freedesktop.org/pub/mesa/10.5.4/mesa-10.5.4.tar.gz.sig

ftp://ftp.freedesktop.org/pub/mesa/10.5.4/mesa-10.5.4.tar.xz
MD5: 26644437b6447fb3dbae50714a019797  mesa-10.5.4.tar.xz
SHA1: 3ccd3bab83032d225d9e28522358c8c231d33a0b  mesa-10.5.4.tar.xz
SHA256: b51e723f3a20d842c88a92d809435b229fc4744ca0dbec0317d9d4a3ac4c6803  
mesa-10.5.4.tar.xz
PGP: ftp://ftp.freedesktop.org/pub/mesa/10.5.4/mesa-10.5.4.tar.xz.sig

- --
- -Emil

-BEGIN PGP SIGNATURE-
Version: GnuPG v2

iQIcBAEBAgAGBQJVOsyCAAoJEO2uN7As60kNxh8P/2HjtRYu6GMy/w39Gw6MxfGe
x5U1AkJblBl4Uh/P0Uk6I7JTeMjvvmr1PVPT6Xzn1GcHVRPq3EjRCHMuQEYNxnmH
Pf1smxHWv0Y7WR8O5asG82PDz8ewL7LMh/P7FVJydyE2XPxF7mdf0fQxxIFY5YWD
/lcn1FvDKlEapLk/+RFErke5uXUV0foKuGa0IMI8739Z4BfBxo1E5GkARkOS6es8
bIc26ljgbmL6zTvUQC8NowosbQrkzHUeiZU3MU1Gl2HlyxoOee52fU5qUQM29K42
9cQkvUTLbaRianht4fggu9x//Sq8KQUVu5WH5YMfW29qxTl75i0J8XZQcssMISkU
HKs+Xz965dEvPXKLy2PtwLcCazfxIG1kQsaQTNVnin2xfRDdqjdH6e641IAiYJvw
nE2Bk4vpSrOAE1Um969d4zwd9KpWywY1hcx2CsoFlJgZDg51Msu4F31NwuSRDybk
H+Av+e6deM+8vPQ9Z/zPCGyDWg1NEDUUVp29tsPyrxuFebJLfmIU8krGP7OBDHkR
bXbkfgmST+NK8SVfyDg4IP7K1yyX/kwUf76QHrWPXiZzlnzOYMD1bND+8eKjMrmX
RtbCz0f170FGRadpWvvQEXgumxpFiKqqrj5H0Z18W9ZctL6rPifnHYwHP6jI7PWj
Fqur5cl8GuN6cUoRJ/5g
=w41w
-END PGP SIGNATURE-
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/16] st/nine: Rework texture data allocation

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> Some applications assume the memory for multilevel
> textures is allocated per continuous blocks.
>
> This patch implements that behaviour.
>
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/cubetexture9.c | 42 --
>  src/gallium/state_trackers/nine/cubetexture9.h |  1 +
>  src/gallium/state_trackers/nine/nine_pipe.h| 34 ++
>  src/gallium/state_trackers/nine/surface9.c | 50 
> +-
>  src/gallium/state_trackers/nine/surface9.h |  1 -
>  src/gallium/state_trackers/nine/texture9.c | 18 --
>  src/gallium/state_trackers/nine/texture9.h |  1 +
>  7 files changed, 110 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/cubetexture9.c 
> b/src/gallium/state_trackers/nine/cubetexture9.c
> index d81cc70..34ef4ec 100644
> --- a/src/gallium/state_trackers/nine/cubetexture9.c
> +++ b/src/gallium/state_trackers/nine/cubetexture9.c
> @@ -40,8 +40,9 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
>  struct pipe_resource *info = &This->base.base.info;
>  struct pipe_screen *screen = pParams->device->screen;
>  enum pipe_format pf;
> -unsigned i;
> +unsigned i, l, f, offset, face_size = 0;
>  D3DSURFACE_DESC sfdesc;
> +void *p;
>  HRESULT hr;
>
>  DBG("This=%p pParams=%p EdgeLength=%u Levels=%u Usage=%d "
> @@ -97,6 +98,14 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
>  DBG("Application asked for Software Vertex Processing, "
>  "but this is unimplemented\n");
>
> +if (Pool != D3DPOOL_DEFAULT) {
> +face_size = nine_format_get_alloc_size(pf, EdgeLength, EdgeLength,
> +   info->last_level);
> +This->managed_buffer = MALLOC(6 * face_size);
> +if (!This->managed_buffer)
> +return E_OUTOFMEMORY;
> +}
> +
>  This->surfaces = CALLOC(6 * (info->last_level + 1), 
> sizeof(*This->surfaces));
>  if (!This->surfaces)
>  return E_OUTOFMEMORY;
> @@ -117,16 +126,26 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
>  sfdesc.Pool = Pool;
>  sfdesc.MultiSampleType = D3DMULTISAMPLE_NONE;
>  sfdesc.MultiSampleQuality = 0;
> -for (i = 0; i < (info->last_level + 1) * 6; ++i) {
> -sfdesc.Width = sfdesc.Height = u_minify(EdgeLength, i / 6);
> -
> -hr = NineSurface9_new(This->base.base.base.device, NineUnknown(This),
> -  This->base.base.resource, NULL, 
> D3DRTYPE_CUBETEXTURE,
> -  i / 6, i % 6,
> -  &sfdesc, &This->surfaces[i]);
> -if (FAILED(hr))
> -return hr;
> +/* We allocate the memory for the surfaces as continous blocks.
> + * This is the expected behaviour, however we haven't tested for
> + * cube textures in which order the faces/levels should be in memory
> + */
> +for (f = 0; f < 6; f++) {
> +offset = f * face_size;
> +for (l = 0; l < info->last_level + 1; l++) {

Elsewhere else you do <= info->last_level. Try to stay consistent...

> +sfdesc.Width = sfdesc.Height = u_minify(EdgeLength, l);
> +p = This->managed_buffer ? This->managed_buffer + offset +
> +nine_format_get_p_offset(pf, EdgeLength, EdgeLength, l) :
> +NULL;
> +
> +hr = NineSurface9_new(This->base.base.base.device, 
> NineUnknown(This),
> +  This->base.base.resource, p, 
> D3DRTYPE_CUBETEXTURE,
> +  l, f, &sfdesc, &This->surfaces[f + 6 * l]);
> +if (FAILED(hr))
> +return hr;
> +}
>  }
> +
>  for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */
>  This->dirty_rect[i].depth = 1;
>
> @@ -146,6 +165,9 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This )
>  FREE(This->surfaces);
>  }
>
> +if (This->managed_buffer)
> +FREE(This->managed_buffer);
> +
>  NineBaseTexture9_dtor(&This->base);
>  }
>
> diff --git a/src/gallium/state_trackers/nine/cubetexture9.h 
> b/src/gallium/state_trackers/nine/cubetexture9.h
> index e8594d3..ee7e275 100644
> --- a/src/gallium/state_trackers/nine/cubetexture9.h
> +++ b/src/gallium/state_trackers/nine/cubetexture9.h
> @@ -31,6 +31,7 @@ struct NineCubeTexture9
>  struct NineBaseTexture9 base;
>  struct NineSurface9 **surfaces;
>  struct pipe_box dirty_rect[6]; /* covers all mip levels */
> +uint8_t *managed_buffer;
>  };
>  static INLINE struct NineCubeTexture9 *
>  NineCubeTexture9( void *data )
> diff --git a/src/gallium/state_trackers/nine/nine_pipe.h 
> b/src/gallium/state_trackers/nine/nine_pipe.h
> index b8e728e..20916b7 100644
> --- a/src/gallium/state_trackers/nine/nine_pipe.h
> +++ b/src/gallium/state_trackers/nine/nine_pipe.h
> @@ -673,4 +673,38 @@ 
> d3dtexturef

Re: [Mesa-dev] [PATCH 15/16] st/nine: Fix update_vertex_elements bad rebase

2015-04-24 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> This code was supposed to be removed, but a rebase seems to have
> made it stay.
>
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/nine_state.c | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_state.c 
> b/src/gallium/state_trackers/nine/nine_state.c
> index 6e01955..034e666 100644
> --- a/src/gallium/state_trackers/nine/nine_state.c
> +++ b/src/gallium/state_trackers/nine/nine_state.c
> @@ -225,9 +225,6 @@ update_vertex_elements(struct NineDevice9 *device)
>  memset(used_streams, 0, device->caps.MaxStreams);
>  vs = device->state.vs ? device->state.vs : device->ff.vs;
>
> -if (!vdecl) /* no inputs */
> -return;
> -
>  if (vdecl) {
>  for (n = 0; n < vs->num_inputs; ++n) {
>  DBG("looking up input %u (usage %u) from vdecl(%p)\n",
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/16] st/nine: Add debug warning when application uses sw processing

2015-04-24 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/cubetexture9.c   |  3 +++
>  src/gallium/state_trackers/nine/device9.c|  5 +
>  src/gallium/state_trackers/nine/indexbuffer9.c   | 14 --
>  src/gallium/state_trackers/nine/texture9.c   |  5 +
>  src/gallium/state_trackers/nine/vertexbuffer9.c  |  4 +++-
>  src/gallium/state_trackers/nine/volumetexture9.c |  3 +++
>  6 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/cubetexture9.c 
> b/src/gallium/state_trackers/nine/cubetexture9.c
> index e250315..d81cc70 100644
> --- a/src/gallium/state_trackers/nine/cubetexture9.c
> +++ b/src/gallium/state_trackers/nine/cubetexture9.c
> @@ -93,6 +93,9 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
>  PIPE_BIND_TRANSFER_READ |
>  PIPE_BIND_TRANSFER_WRITE;
>  }
> +if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
> +DBG("Application asked for Software Vertex Processing, "
> +"but this is unimplemented\n");
>
>  This->surfaces = CALLOC(6 * (info->last_level + 1), 
> sizeof(*This->surfaces));
>  if (!This->surfaces)
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 4ef02bb..924f755 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -183,6 +183,11 @@ NineDevice9_ctor( struct NineDevice9 *This,
>  if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE))
>  nine_setup_fpu();
>
> +if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING)
> +DBG("Application asked full Software Vertex Processing. 
> Ignoring.\n");
> +if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
> +DBG("Application asked mixed Software Vertex Processing. 
> Ignoring.\n");
> +
>  This->pipe = This->screen->context_create(This->screen, NULL);
>  if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
>
> diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c 
> b/src/gallium/state_trackers/nine/indexbuffer9.c
> index 48553fd..860313b 100644
> --- a/src/gallium/state_trackers/nine/indexbuffer9.c
> +++ b/src/gallium/state_trackers/nine/indexbuffer9.c
> @@ -63,12 +63,14 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This,
>  if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
>  info->usage = PIPE_USAGE_STAGING;
>
> -/* if (This->desc.Usage & D3DUSAGE_DONOTCLIP) { } */
> -/* if (This->desc.Usage & D3DUSAGE_NONSECURE) { } */
> -/* if (This->desc.Usage & D3DUSAGE_NPATCHES) { } */
> -/* if (This->desc.Usage & D3DUSAGE_POINTS) { } */
> -/* if (This->desc.Usage & D3DUSAGE_RTPATCHES) { } */
> -/* if (This->desc.Usage & D3DUSAGE_SOFTWAREPROCESSING) { } */
> +/* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
> +/* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
> +/* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
> +/* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
> +/* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
> +if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
> +DBG("Application asked for Software Vertex Processing, "
> +"but this is unimplemented\n");
>
>  info->height0 = 1;
>  info->depth0 = 1;
> diff --git a/src/gallium/state_trackers/nine/texture9.c 
> b/src/gallium/state_trackers/nine/texture9.c
> index 0dbdd37..78a632f 100644
> --- a/src/gallium/state_trackers/nine/texture9.c
> +++ b/src/gallium/state_trackers/nine/texture9.c
> @@ -125,6 +125,11 @@ NineTexture9_ctor( struct NineTexture9 *This,
>  PIPE_BIND_TRANSFER_READ |
>  PIPE_BIND_TRANSFER_WRITE;
>  }
> +
> +if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
> +DBG("Application asked for Software Vertex Processing, "
> +"but this is unimplemented\n");
> +
>  if (pSharedHandle)
>  info->bind |= PIPE_BIND_SHARED;
>
> diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c 
> b/src/gallium/state_trackers/nine/vertexbuffer9.c
> index 11cc60f..8e2eaaf 100644
> --- a/src/gallium/state_trackers/nine/vertexbuffer9.c
> +++ b/src/gallium/state_trackers/nine/vertexbuffer9.c
> @@ -76,7 +76,9 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
>  /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
>  /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
>  /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
> -/* if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) { } */
> +if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
> +DBG("Application asked for Software Vertex Processing, "
> +"but this is unimplemented\n");
>  /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
>
>  info->height0 = 1;
> diff --git a/src/gallium/state_trackers/nine/volumetexture9.c 
> b/src/gallium/state_trackers/nine/volumetexture9.c
> index 6c0073f..08fdd

Re: [Mesa-dev] [PATCH 13/16] st/nine: Rework update_vertex_buffers

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> Previous code was trying to optimise to call set_vertex_buffers on
> big packets, and thus avoids as many calls as possible.
>
> However in practice doing so won't be faster (drivers implement
> set_vertex_buffers by a loop over the buffers we want to bind)
>
> When we want to unbind a buffer, we were calling set_vertex_buffers
> on a buffer with vtxbuf->buffer = NULL. It works on some drivers,
> but not on all of them, because it isn't in Gallium spec.
> This patch fixes that.
>
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/nine_state.c | 15 ---
>  1 file changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_state.c 
> b/src/gallium/state_trackers/nine/nine_state.c
> index 27800c6..6e01955 100644
> --- a/src/gallium/state_trackers/nine/nine_state.c
> +++ b/src/gallium/state_trackers/nine/nine_state.c
> @@ -631,7 +631,6 @@ update_vertex_buffers(struct NineDevice9 *device)
>  uint32_t mask = state->changed.vtxbuf;
>  unsigned i;
>  unsigned start;
> -unsigned count = 0;
>
>  DBG("mask=%x\n", mask);
>
> @@ -650,18 +649,12 @@ update_vertex_buffers(struct NineDevice9 *device)
>
>  for (i = 0; mask; mask >>= 1, ++i) {

FWIW, not sure if it's faster, but you could do like

while (mask) {
  i = ffs(mask) - 1;
  ...
  mask &= ~(1 << i);
}

In practice, if all the bits tend to be consecutive starting at 0,
this will probably even be slower.

Either way, this is

Reviewed-by: Ilia Mirkin 

>  if (mask & 1) {
> -if (!count)
> -start = i;
> -++count;
> -} else {
> -if (count)
> -pipe->set_vertex_buffers(pipe, start, count,
> - &state->vtxbuf[start]);
> -count = 0;
> +if (state->vtxbuf[i].buffer)
> +pipe->set_vertex_buffers(pipe, i, 1, &state->vtxbuf[i]);
> +else
> +pipe->set_vertex_buffers(pipe, i, 1, NULL);
>  }
>  }
> -if (count)
> -pipe->set_vertex_buffers(pipe, start, count, &state->vtxbuf[start]);
>
>  state->changed.vtxbuf = 0;
>  }
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/16] st/nine: Change x86 FPU Control word on device creation as on wined3d and windows

2015-04-24 Thread Henri Verbeet
On 24 April 2015 at 22:09, Axel Davy  wrote:
> +static void nine_setup_fpu(void)
> +{
> +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
> +WORD cw;
> +__asm__ volatile ("fnstcw %0" : "=m" (cw));
> +cw = (cw & ~0xf3f) | 0x3f;
> +__asm__ volatile ("fldcw %0" : : "m" (cw));
> +#else
> +WARN_ONCE("FPU setup not supported on non-x86 platforms\n");
> +#endif
> +}
> +
This is once again similar enough to the corresponding Wine source
that I feel the need to remind you, this time more strongly, that Wine
is licensed under LGPL 2.1+. ( For the curious, (warning, LGPL)
https://source.winehq.org/git/wine.git/blob/25f0606e84bef7d60ea5c681d19b368660cab8e3:/dlls/d3d9/device.c#l3604)
Besides, proper Gallium style would have been to use PIPE_CC_GCC and
PIPE_ARCH_X86/PIPE_ARCH_X86_64.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/16] st/nine: Fix computation of const_used_size

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Xavier Bouchoux 
>
> Was sometimes too large for PS.
>
> Reviewed-by: Axel Davy 
> Signed-off-by: Xavier Bouchoux 
> ---
>  src/gallium/state_trackers/nine/nine_shader.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
> b/src/gallium/state_trackers/nine/nine_shader.c
> index 1f1f7c8..7beb1ab 100644
> --- a/src/gallium/state_trackers/nine/nine_shader.c
> +++ b/src/gallium/state_trackers/nine/nine_shader.c
> @@ -3082,6 +3082,7 @@ nine_translate_shader(struct NineDevice9 *device, 
> struct nine_shader_info *info)
>  HRESULT hr = D3D_OK;
>  const unsigned processor = tgsi_processor_from_type(info->type);
>  unsigned s, slot_max;
> +unsigned max_const_f;
>
>  user_assert(processor != ~0, D3DERR_INVALIDCALL);
>
> @@ -3221,11 +3222,12 @@ nine_translate_shader(struct NineDevice9 *device, 
> struct nine_shader_info *info)
>  if (tx->indirect_const_access) /* vs only */
>  info->const_float_slots = device->max_vs_const_f;
>
> +max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
>  slot_max = info->const_bool_slots > 0 ?
> -   device->max_vs_const_f + NINE_MAX_CONST_I
> -   + info->const_bool_slots :
> +   max_const_f + NINE_MAX_CONST_I
> +   + (info->const_bool_slots+3)/4 :

aka DIV_ROUND_UP(info->const_bool_slots, 4) ?

With that fixed up,

Reviewed-by: Ilia Mirkin 

> info->const_int_slots > 0 ?
> -   device->max_vs_const_f + info->const_int_slots :
> +   max_const_f + info->const_int_slots :
> info->const_float_slots;
>  info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 
> 1 */
>
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 12/22] i965: Rename use_linear_1d_layout() and make it global

2015-04-24 Thread Pohjolainen, Topi
On Fri, Apr 17, 2015 at 04:51:33PM -0700, Anuj Phogat wrote:
> This function will be utilised in later patches.
> 
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h|  4 
>  src/mesa/drivers/dri/i965/brw_tex_layout.c | 10 +-
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 6c168a3..3d0b35c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1938,6 +1938,10 @@ gen6_upload_push_constants(struct brw_context *brw,
> struct brw_stage_state *stage_state,
> enum aub_state_struct_type type);
>  
> +bool
> +gen9_use_linear_1d_layout(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt);

I would use the opportunity to make both pointers constants - the function
only uses them for reading. Anyway:

Reviewed-by: Topi Pohjolainen 

> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
> b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> index 9342101..28927e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> @@ -430,9 +430,9 @@ align_cube(struct intel_mipmap_tree *mt)
>mt->total_height += 2;
>  }
>  
> -static bool
> -use_linear_1d_layout(struct brw_context *brw,
> - struct intel_mipmap_tree *mt)
> +bool
> +gen9_use_linear_1d_layout(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt)
>  {
> /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a
>  * horizontal line. This isn't done for depth/stencil buffers however
> @@ -458,7 +458,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
>  {
> int h0, h1;
> unsigned height = mt->physical_height0;
> -   bool layout_1d = use_linear_1d_layout(brw, mt);
> +   bool layout_1d = gen9_use_linear_1d_layout(brw, mt);
>  
> h0 = ALIGN(mt->physical_height0, mt->align_h);
> h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
> @@ -674,7 +674,7 @@ intel_miptree_total_width_height(struct brw_context *brw,
>   break;
>case INTEL_MSAA_LAYOUT_NONE:
>case INTEL_MSAA_LAYOUT_IMS:
> - if (use_linear_1d_layout(brw, mt))
> + if (gen9_use_linear_1d_layout(brw, mt))
>  gen9_miptree_layout_1d(mt);
>   else
>  brw_miptree_layout_2d(mt);
> -- 
> 2.3.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/16] st/nine: Ignore D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING when D3DUSAGE_RENDERTARGET is specified

2015-04-24 Thread Axel Davy
Oups, yep I changed the commit message to make it shorter, but I made it 
say the opposite that what it was suppose to...


Le 24/04/2015 23:32, Ilia Mirkin a écrit :

The diff does not agree with the commit description. Also please limit
commit descriptions to 72 chars.

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:

From: Xavier Bouchoux 

This behaviour matches windows drivers.

Reviewed-by: Axel Davy 
Signed-off-by: Xavier Bouchoux 
---
  src/gallium/state_trackers/nine/adapter9.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/nine/adapter9.c 
b/src/gallium/state_trackers/nine/adapter9.c
index 1d75155..9d6d659 100644
--- a/src/gallium/state_trackers/nine/adapter9.c
+++ b/src/gallium/state_trackers/nine/adapter9.c
@@ -311,7 +311,8 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
  if (CheckFormat == D3DFMT_ATOC && RType == D3DRTYPE_SURFACE)
  return D3D_OK;

-if (Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING)
+if ((Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) &&
+(Usage & D3DUSAGE_RENDERTARGET))
  bind |= PIPE_BIND_BLENDABLE;

  if (Usage & D3DUSAGE_DMAP) {
--
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 11/22] i965/gen9: Set tiled resource mode in surface state

2015-04-24 Thread Pohjolainen, Topi
On Fri, Apr 17, 2015 at 04:51:32PM -0700, Anuj Phogat wrote:
> This patch sets the tiled resource mode for texture and renderbuffer
> surfaces.
> 
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h|  6 ++
>  src/mesa/drivers/dri/i965/gen8_surface_state.c | 22 ++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index da6ed5b..c62c09b 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -588,6 +588,12 @@
>  #define GEN7_SURFACE_MOCS_SHIFT 16
>  #define GEN7_SURFACE_MOCS_MASK  INTEL_MASK(19, 16)
>  
> +#define GEN9_SURFACE_TRMODE_SHIFT  18
> +#define GEN9_SURFACE_TRMODE_MASK   INTEL_MASK(19, 18)
> +#define GEN9_SURFACE_TRMODE_NONE   0
> +#define GEN9_SURFACE_TRMODE_TILEYF 1
> +#define GEN9_SURFACE_TRMODE_TILEYS 2
> +
>  /* Surface state DW6 */
>  #define GEN7_SURFACE_MCS_ENABLE (1 << 0)
>  #define GEN7_SURFACE_MCS_PITCH_SHIFT3
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> index 7f82f53..d2eceff 100644
> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> @@ -56,6 +56,19 @@ swizzle_to_scs(unsigned swizzle)
>  }
>  
>  static uint32_t
> +surface_tiling_resource_mode(uint32_t tr_mode)
> +{
> +   switch (tr_mode) {
> +   case INTEL_MIPTREE_TRMODE_YF:
> +  return GEN9_SURFACE_TRMODE_TILEYF;
> +   case INTEL_MIPTREE_TRMODE_YS:
> +  return GEN9_SURFACE_TRMODE_TILEYS;
> +   default:
> +  return GEN9_SURFACE_TRMODE_NONE;
> +   }
> +}
> +
> +static uint32_t
>  surface_tiling_mode(uint32_t tiling)
>  {
> switch (tiling) {
> @@ -173,6 +186,8 @@ gen8_update_texture_surface(struct gl_context *ctx,
> }
>  
> unsigned tiling_mode, pitch;
> +   unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
> +
> if (format == MESA_FORMAT_S_UINT8) {
>tiling_mode = GEN8_SURFACE_TILING_W;
>pitch = 2 * mt->pitch;
> @@ -227,6 +242,9 @@ gen8_update_texture_surface(struct gl_context *ctx,
> GEN7_SURFACE_MIN_LOD) |
>   (intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */
>  
> +   if (brw->gen >= 9)
> +  surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);

I would just write (and the same further down):

 surf[5] |= SET_FIELD(surface_tiling_resource_mode(mt->tr_mode),
  GEN9_SURFACE_TRMODE);

Or at least declare 'tr_mode' as constant.

Up to you, and in any case:

Reviewed-by: Topi Pohjolainen 

> +
> if (aux_mt) {
>surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>  SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) 
> |
> @@ -318,6 +336,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
> unsigned height = mt->logical_height0;
> unsigned pitch = mt->pitch;
> uint32_t tiling = mt->tiling;
> +   unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
> uint32_t format = 0;
> uint32_t surf_type;
> bool is_array = false;
> @@ -398,6 +417,9 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
>  
> surf[5] = irb->mt_level - irb->mt->first_level;
>  
> +   if (brw->gen >= 9)
> +  surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
> +
> if (aux_mt) {
>surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>  SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) 
> |
> -- 
> 2.3.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/16] st/nine: Ignore D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING when D3DUSAGE_RENDERTARGET is specified

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 5:32 PM, Ilia Mirkin  wrote:
> The diff does not agree with the commit description. Also please limit
> commit descriptions to 72 chars.

er, of course I meant commit *subjects*. Although the each line of the
commit description should also be wrapped at 72. (Because they're
indented by a tab when shown with 'git show' and so on.)

>
> On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
>> From: Xavier Bouchoux 
>>
>> This behaviour matches windows drivers.
>>
>> Reviewed-by: Axel Davy 
>> Signed-off-by: Xavier Bouchoux 
>> ---
>>  src/gallium/state_trackers/nine/adapter9.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/state_trackers/nine/adapter9.c 
>> b/src/gallium/state_trackers/nine/adapter9.c
>> index 1d75155..9d6d659 100644
>> --- a/src/gallium/state_trackers/nine/adapter9.c
>> +++ b/src/gallium/state_trackers/nine/adapter9.c
>> @@ -311,7 +311,8 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 
>> *This,
>>  if (CheckFormat == D3DFMT_ATOC && RType == D3DRTYPE_SURFACE)
>>  return D3D_OK;
>>
>> -if (Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING)
>> +if ((Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) &&
>> +(Usage & D3DUSAGE_RENDERTARGET))
>>  bind |= PIPE_BIND_BLENDABLE;
>>
>>  if (Usage & D3DUSAGE_DMAP) {
>> --
>> 2.1.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/16] st/nine: Ignore D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING when D3DUSAGE_RENDERTARGET is specified

2015-04-24 Thread Ilia Mirkin
The diff does not agree with the commit description. Also please limit
commit descriptions to 72 chars.

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Xavier Bouchoux 
>
> This behaviour matches windows drivers.
>
> Reviewed-by: Axel Davy 
> Signed-off-by: Xavier Bouchoux 
> ---
>  src/gallium/state_trackers/nine/adapter9.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/nine/adapter9.c 
> b/src/gallium/state_trackers/nine/adapter9.c
> index 1d75155..9d6d659 100644
> --- a/src/gallium/state_trackers/nine/adapter9.c
> +++ b/src/gallium/state_trackers/nine/adapter9.c
> @@ -311,7 +311,8 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This,
>  if (CheckFormat == D3DFMT_ATOC && RType == D3DRTYPE_SURFACE)
>  return D3D_OK;
>
> -if (Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING)
> +if ((Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) &&
> +(Usage & D3DUSAGE_RENDERTARGET))
>  bind |= PIPE_BIND_BLENDABLE;
>
>  if (Usage & D3DUSAGE_DMAP) {
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/16] st/nine: Improve D3DQUERYTYPE_TIMESTAMP

2015-04-24 Thread Ilia Mirkin
That makes sense. Probably the FLUSH is supposed to imply something
else, which in turn just makes the timestamp query succeed...
something like

if (flush)
  pipe->flush()

or something. That's basically what telling it to wait does though, so
this is fine.

Reviewed-by: Ilia Mirkin 

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Xavier Bouchoux 
>
> Avoid blocking when retrieving D3DQUERYTYPE_TIMESTAMP result with
> NineQuery9_GetData(), when D3DGETDATA_FLUSH is not specified.
> This mimics Win behaviour and gives slightly better performance
> for some games.
>
> Reviewed-by: Axel Davy 
> Signed-off-by: Xavier Bouchoux 
> ---
>  src/gallium/state_trackers/nine/query9.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/query9.c 
> b/src/gallium/state_trackers/nine/query9.c
> index df49340..04f4aad 100644
> --- a/src/gallium/state_trackers/nine/query9.c
> +++ b/src/gallium/state_trackers/nine/query9.c
> @@ -227,8 +227,13 @@ NineQuery9_GetData( struct NineQuery9 *This,
>  wait_query_result = TRUE;
>  }
>
> -/* Wine tests: D3DQUERYTYPE_TIMESTAMP always succeeds */
> -wait_query_result |= This->type == D3DQUERYTYPE_TIMESTAMP;
> +/* The documention mentions no special case for D3DQUERYTYPE_TIMESTAMP.
> + * However Windows tests show that the query always succeeds when
> + * D3DGETDATA_FLUSH is specified. */
> +if (This->type == D3DQUERYTYPE_TIMESTAMP &&
> +(dwGetDataFlags & D3DGETDATA_FLUSH))
> +wait_query_result = TRUE;
> +
>
>  /* Note: We ignore dwGetDataFlags, because get_query_result will
>   * flush automatically if needed */
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 18/22] i965/skl: Modify the conditions to use blitter on skl+

2015-04-24 Thread Pohjolainen, Topi
On Fri, Apr 17, 2015 at 04:51:39PM -0700, Anuj Phogat wrote:
> Conditions modified allow skl+ to use blitter:
>  - for all tiling formats
>  - to write data to YF/YS tiled surfaces
> 
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index e547544..2d218b4 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -2518,13 +2518,17 @@ use_intel_mipree_map_blit(struct brw_context *brw,
>  {
> if (brw->has_llc &&
>/* It's probably not worth swapping to the blit ring because of
> -   * all the overhead involved.
> +   * all the overhead involved. But, we must use blitter for the
> +   * surfaces with INTEL_MIPTREE_TRMODE_{YF,YS}.
> */
> -   !(mode & GL_MAP_WRITE_BIT) &&
> +   (!(mode & GL_MAP_WRITE_BIT) ||
> +mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) &&
> !mt->compressed &&
> (mt->tiling == I915_TILING_X ||
>  /* Prior to Sandybridge, the blitter can't handle Y tiling */
> -(brw->gen >= 6 && mt->tiling == I915_TILING_Y)) &&
> +(brw->gen >= 6 && mt->tiling == I915_TILING_Y) ||
> +/* XY_FAST_COPY_BLT on Skylake also supports YF/YS tiling */
> +brw->gen >= 9) &&
 
Indent these two lines by one space, the condition here is part of the
preceeding OR.

With that:

Reviewed-by: Topi Pohjolainen 

> can_blit_slice(mt, level, slice))
>return true;
>  
> @@ -2591,6 +2595,8 @@ intel_miptree_map(struct brw_context *brw,
>intel_miptree_map_movntdqa(brw, mt, map, level, slice);
>  #endif
> } else {
> +  /* Yf/Ys tiled surfaces can't be mapped using GTT. */
> +  assert(mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE);
>intel_miptree_map_gtt(brw, mt, map, level, slice);
> }
>  
> -- 
> 2.3.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/16] st/nine: Fix D3DQUERYTYPE_TIMESTAMPFREQ query

2015-04-24 Thread Ilia Mirkin
From 
https://msdn.microsoft.com/en-us/library/windows/desktop/bb172594%28v=vs.85%29.aspx

D3DQUERYTYPE_TIMESTAMPFREQ
This query result is TRUE if the values from D3DQUERYTYPE_TIMESTAMP
queries cannot be guaranteed to be continuous throughout the duration
of the D3DQUERYTYPE_TIMESTAMPDISJOINT query. Otherwise, the query
result is FALSE.

Seems like you should be returning 0 here? Seems like contrary to the
name, it has nothing to do with frequency...


On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Xavier Bouchoux 
>
> D3DQUERYTYPE_TIMESTAMPFREQ is supposed to give the frequency
> at which the clock of D3DQUERYTYPE_TIMESTAMP runs.
>
> PIPE_QUERY_TIMESTAMP returns a value in ns, thus the corresponding
> frequency is 10.
> PIPE_QUERY_TIMESTAMP_DISJOINT returns the frequency at which
> PIPE_QUERY_TIMESTAMP value is updated. It isn't always
> 10.
>
> Reviewed-by: Axel Davy 
> Signed-off-by: Xavier Bouchoux 
> ---
>  src/gallium/state_trackers/nine/query9.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/nine/query9.c 
> b/src/gallium/state_trackers/nine/query9.c
> index 466b4ba..df49340 100644
> --- a/src/gallium/state_trackers/nine/query9.c
> +++ b/src/gallium/state_trackers/nine/query9.c
> @@ -254,7 +254,15 @@ NineQuery9_GetData( struct NineQuery9 *This,
>  nresult.b = presult.timestamp_disjoint.disjoint;
>  break;
>  case D3DQUERYTYPE_TIMESTAMPFREQ:
> -nresult.u64 = presult.timestamp_disjoint.frequency;
> +/* Applications use it to convert the TIMESTAMP value to time.
> +   AMD drivers on win seem to return the actual hardware clock
> +   resolution and corresponding values in TIMESTAMP.
> +   However, this behaviour is not easy to replicate here.
> +   So instead we do what wine and opengl do, and use
> +   nanoseconds TIMESTAMPs.
> +   (Which is also the unit used by PIPE_QUERY_TIMESTAMP.)
> +*/
> +nresult.u64 = 10;
>  break;
>  case D3DQUERYTYPE_VERTEXSTATS:
>  nresult.vertexstats.NumRenderedTriangles =
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/16] st/nine: Do not advertise D3DDEVCAPS_TEXTURESYSTEMMEMORY

2015-04-24 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> No major vendor advertises it, and we weren't supporting it.
>
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/adapter9.c |  2 +-
>  src/gallium/state_trackers/nine/device9.c  | 10 ++
>  2 files changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/adapter9.c 
> b/src/gallium/state_trackers/nine/adapter9.c
> index 069cc03..1d75155 100644
> --- a/src/gallium/state_trackers/nine/adapter9.c
> +++ b/src/gallium/state_trackers/nine/adapter9.c
> @@ -545,7 +545,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
>   /*D3DDEVCAPS_RTPATCHHANDLEZERO |*/
>   /*D3DDEVCAPS_SEPARATETEXTUREMEMORIES |*/
>   /*D3DDEVCAPS_TEXTURENONLOCALVIDMEM |*/
> - D3DDEVCAPS_TEXTURESYSTEMMEMORY |
> + /* D3DDEVCAPS_TEXTURESYSTEMMEMORY |*/
>   D3DDEVCAPS_TEXTUREVIDEOMEMORY |
>   D3DDEVCAPS_TLVERTEXSYSTEMMEMORY |
>   D3DDEVCAPS_TLVERTEXVIDEOMEMORY;
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 9ca1bb9..1a776a7 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -2406,14 +2406,8 @@ NineDevice9_SetTexture( struct NineDevice9 *This,
>  Stage == D3DDMAPSAMPLER ||
>  (Stage >= D3DVERTEXTEXTURESAMPLER0 &&
>   Stage <= D3DVERTEXTEXTURESAMPLER3), D3DERR_INVALIDCALL);
> -user_assert(!tex || tex->base.pool != D3DPOOL_SCRATCH, 
> D3DERR_INVALIDCALL);
> -
> -if (unlikely(tex && tex->base.pool == D3DPOOL_SYSTEMMEM)) {
> -/* TODO: Currently not implemented. Better return error
> - * with message telling what's wrong */
> -ERR("This=%p D3DPOOL_SYSTEMMEM not implemented for SetTexture\n", 
> This);
> -user_assert(tex->base.pool != D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
> -}
> +user_assert(!tex || (tex->base.pool != D3DPOOL_SCRATCH &&
> +tex->base.pool != D3DPOOL_SYSTEMMEM), D3DERR_INVALIDCALL);
>
>  if (Stage >= D3DDMAPSAMPLER)
>  Stage = Stage - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/16] st/nine: Fix comment in update_viewport

2015-04-24 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/nine_state.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_state.c 
> b/src/gallium/state_trackers/nine/nine_state.c
> index 6c7eab3..27800c6 100644
> --- a/src/gallium/state_trackers/nine/nine_state.c
> +++ b/src/gallium/state_trackers/nine/nine_state.c
> @@ -138,10 +138,9 @@ update_viewport(struct NineDevice9 *device)
>  const D3DVIEWPORT9 *vport = &device->state.viewport;
>  struct pipe_viewport_state pvport;
>
> -/* XXX:
> - * I hope D3D clip coordinates are still
> +/* D3D coordinates are:
>   * -1 .. +1 for X,Y and
> - *  0 .. +1 for Z (use pipe_rasterizer_state.clip_halfz)
> + *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
>   */
>  pvport.scale[0] = (float)vport->Width * 0.5f;
>  pvport.scale[1] = (float)vport->Height * -0.5f;
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/16] st/nine: Workaround barycentrics issue on some cards

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/device9.c|  4 +++-
>  src/gallium/state_trackers/nine/device9.h|  4 
>  src/gallium/state_trackers/nine/nine_state.c | 24 
>  3 files changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 43eb7e6..9ca1bb9 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -310,8 +310,10 @@ NineDevice9_ctor( struct NineDevice9 *This,
>  return E_OUTOFMEMORY;
>
>  if (strstr(pScreen->get_name(pScreen), "AMD") ||
> -strstr(pScreen->get_name(pScreen), "ATI"))
> +strstr(pScreen->get_name(pScreen), "ATI")) {
>  This->prefer_user_constbuf = TRUE;
> +This->driver_bugs.buggy_barycentrics = TRUE;
> +}
>
>  tmpl.target = PIPE_BUFFER;
>  tmpl.format = PIPE_FORMAT_R8_UNORM;
> diff --git a/src/gallium/state_trackers/nine/device9.h 
> b/src/gallium/state_trackers/nine/device9.h
> index f412088..d662f83 100644
> --- a/src/gallium/state_trackers/nine/device9.h
> +++ b/src/gallium/state_trackers/nine/device9.h
> @@ -118,6 +118,10 @@ struct NineDevice9
>  boolean ps_integer;
>  } driver_caps;
>
> +struct {
> +boolean buggy_barycentrics;

Might I suggest either describing the bug or the solution to the bug? e.g.

boolean shift_viewport;

Perhaps there's another different buggy barycentric problem which is
solved in an entirely different way.

Either way,

Acked-by: Ilia Mirkin 

(I don't sufficiently understand the problem to verify that your
solution is correct.)

BTW, have you thought about FBO flipping? Do you have to translate in
the other direction in that case? Or I guess the scaling takes care of
that...

> +} driver_bugs;
> +
>  struct u_upload_mgr *upload;
>
>  struct nine_range_pool range_pool;
> diff --git a/src/gallium/state_trackers/nine/nine_state.c 
> b/src/gallium/state_trackers/nine/nine_state.c
> index 495cc86..6c7eab3 100644
> --- a/src/gallium/state_trackers/nine/nine_state.c
> +++ b/src/gallium/state_trackers/nine/nine_state.c
> @@ -150,6 +150,30 @@ update_viewport(struct NineDevice9 *device)
>  pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
>  pvport.translate[2] = vport->MinZ;
>
> +/* We found R600 and SI cards have some imprecision
> + * on the barycentric coordinates used for interpolation.
> + * Some shaders rely on having something precise.
> + * We found that the proprietary driver has the imprecision issue,
> + * except when the render target width and height are powers of two.
> + * It is using some sort of workaround for these cases
> + * which covers likely all the cases the applications rely
> + * on something precise.
> + * We haven't found the workaround, but it seems like it's better
> + * for applications if the imprecision is biased towards infinity
> + * instead of -infinity (which is what measured). So shift slightly
> + * the viewport: not enough to change rasterization result (in particular
> + * for multisampling), but enough to make the imprecision biased
> + * towards infinity. We do this shift only if render target width and
> + * height are powers of two.
> + * Solves 'red shadows' bug on UE3 games.
> + */
> +if (device->driver_bugs.buggy_barycentrics &&
> +((vport->Width & (vport->Width-1)) == 0) &&
> +((vport->Height & (vport->Height-1)) == 0)) {
> +pvport.translate[0] -= 1.0f / 128.0f;
> +pvport.translate[1] -= 1.0f / 128.0f;
> +}
> +
>  pipe->set_viewport_states(pipe, 0, 1, &pvport);
>  }
>
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/16] st/nine: Clear struct pipe_blit_info before use.

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Xavier Bouchoux 
>
> render_condition_enable was uninitialized.
>
> Reviewed-by: Axel Davy 
> Signed-off-by: Xavier Bouchoux 

I assume d3d9 doesn't have conditional rendering? If it does, you
should double-check whether StretchRect and ResolveZ should be
affected. (But I don't see any render_condition use so it's probably
fine.)

Reviewed-by: Ilia Mirkin 

> ---
>  src/gallium/state_trackers/nine/device9.c| 2 ++
>  src/gallium/state_trackers/nine/swapchain9.c | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 1430ca5..43eb7e6 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -1342,6 +1342,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
>  (pSourceRect->left <= pSourceRect->right &&
>   pSourceRect->top <= pSourceRect->bottom), 
> D3DERR_INVALIDCALL);
>
> +memset(&blit, 0, sizeof(blit));
>  blit.dst.resource = dst_res;
>  blit.dst.level = dst->level;
>  blit.dst.box.z = dst->layer;
> @@ -2130,6 +2131,7 @@ NineDevice9_ResolveZ( struct NineDevice9 *This )
>  desc = util_format_description(dst->format);
>  user_assert(desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS, 
> D3DERR_INVALIDCALL);
>
> +memset(&blit, 0, sizeof(blit));
>  blit.src.resource = src;
>  blit.src.level = 0;
>  blit.src.format = src->format;
> diff --git a/src/gallium/state_trackers/nine/swapchain9.c 
> b/src/gallium/state_trackers/nine/swapchain9.c
> index 296d6dc..f91b3be 100644
> --- a/src/gallium/state_trackers/nine/swapchain9.c
> +++ b/src/gallium/state_trackers/nine/swapchain9.c
> @@ -554,6 +554,7 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, 
> struct pipe_resource *r
>  struct pipe_blit_info blit;
>
>  if (device->cursor.software && device->cursor.visible && 
> device->cursor.w) {
> +memset(&blit, 0, sizeof(blit));
>  blit.src.resource = device->cursor.image;
>  blit.src.level = 0;
>  blit.src.format = device->cursor.image->format;
> @@ -677,6 +678,7 @@ present( struct NineSwapChain9 *This,
>  handle_draw_cursor_and_hud(This, resource);
>
>  if (This->present_buffers) {
> +memset(&blit, 0, sizeof(blit));
>  blit.src.resource = resource;
>  blit.src.level = 0;
>  blit.src.format = resource->format;
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/16] st/nine: NineDevice9_Clear skip fastpath for bigger depth-buffers

2015-04-24 Thread Axel Davy

Le 24/04/2015 23:10, Ilia Mirkin a écrit :

What if the depth buffer is smaller than the color RT? I'd assume that
the fb would be reduced in that case as well... or does
This->state.fb.width/height only represent the min of the color RT's
sizes?



It is forbidden by the spec to have depth buffer smaller than RT,
or having several RT of different sizes.

Only cases allowed are:
. D3DFMT_NULL RT + depth buffer - > size of the depth buffer
. 1 RT + depth buffer -> size of the RT. depth buffer must be >=
. more than 1 RT + depth buffer -> the other RTs must match the size of 
the first one. depth buffer must be >=


If depth buffer is smaller, we don't bind it. Same for RTs that don't 
match main RT size.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/16] st/nine: Handle special LIT case

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 5:08 PM, Axel Davy  wrote:
> Le 24/04/2015 23:04, Ilia Mirkin a écrit :
>>
>> On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
>>>
>>> Signed-off-by: Axel Davy 
>>> ---
>>>   src/gallium/state_trackers/nine/nine_shader.c | 19 ++-
>>>   1 file changed, 18 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/state_trackers/nine/nine_shader.c
>>> b/src/gallium/state_trackers/nine/nine_shader.c
>>> index 2ba625e..0fd3d37 100644
>>> --- a/src/gallium/state_trackers/nine/nine_shader.c
>>> +++ b/src/gallium/state_trackers/nine/nine_shader.c
>>> @@ -2040,6 +2040,23 @@ DECL_SPECIAL(LOG)
>>>   return D3D_OK;
>>>   }
>>>
>>> +DECL_SPECIAL(LIT)
>>> +{
>>> +struct ureg_program *ureg = tx->ureg;
>>> +struct ureg_dst tmp = tx_scratch(tx);
>>> +struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
>>> +struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
>>> +ureg_LIT(ureg, tmp, src);
>>> +/* d3d9 LIT is the same than gallium LIT. One difference is that
>>> d3d9
>>> + * states that dst.z is 0 when src.y <= 0. Gallium definition can
>>> assign
>>> + * it 0^0 if src.w=0, which value is driver dependent. */
>>> +ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
>>> + ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
>>> + ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
>>> +ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW),
>>> ureg_src(tmp));
>>
>> Why use a separate temp? Couldn't you just
>>
>> ureg_LIT(ureg, dst, src);
>> ureg_CMP(ureg, writemask(dst, Z), -src.y, ureg_src(dst), ureg_imm1f(0.0))
>> ?
>>
> Isn't there a restriction in the case dst is an output register of the
> shader ? Reading from it would be forbidden.
> Also likely it is easier to optimiser for compilers with the temp version.

Ah yes. Usually that doesn't come in st/mesa, but I guess it can
happen with d3d9. OK.

Reviewed-by: Ilia Mirkin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/16] st/nine: NineDevice9_Clear skip fastpath for bigger depth-buffers

2015-04-24 Thread Ilia Mirkin
What if the depth buffer is smaller than the color RT? I'd assume that
the fb would be reduced in that case as well... or does
This->state.fb.width/height only represent the min of the color RT's
sizes?

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Patrick Rudolph 
>
> This adds an additional check to make sure the bound depth-buffer doesn't
> exceed the rendertarget size when clearing depth and color buffer at once.
> D3D9 clears only a rectangle with the same dimensions as the viewport, leaving
> other parts of the depth-buffer intact.
>
> This fixes failing WINE test visual.c:depth_buffer_test()
>
> Signed-off-by: Patrick Rudolph 
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/device9.c | 17 +
>  1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 78e148b..1430ca5 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -1756,12 +1756,21 @@ NineDevice9_Clear( struct NineDevice9 *This,
>  rt_mask |= 1 << i;
>  }
>
> +/* fast path, clears everything at once */
>  if (!Count &&
>  (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == This->state.rt_mask)) &&
> -rect.x1 == 0 && rect.x2 >= This->state.fb.width &&
> -rect.y1 == 0 && rect.y2 >= This->state.fb.height) {
> -/* fast path, clears everything at once */
> -DBG("fast path\n");
> +rect.x1 == 0 && rect.y1 == 0 &&
> +/* Case we clear only render target. Check clear region vs rt. */
> +((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
> + rect.x2 >= This->state.fb.width &&
> + rect.y2 >= This->state.fb.height) ||
> +/* Case we clear depth buffer (and eventually rt too).
> + * depth buffer size is always >= rt size. Compare to clear region */
> +((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
> + This->state.fb.zsbuf != NULL &&
> + rect.x2 >= zsbuf_surf->desc.Width &&
> + rect.y2 >= zsbuf_surf->desc.Height))) {
> +DBG("Clear fast path\n");
>  pipe->clear(pipe, bufs, &rgba, Z, Stencil);
>  return D3D_OK;
>  }
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/16] st/nine: Handle special LIT case

2015-04-24 Thread Axel Davy

Le 24/04/2015 23:04, Ilia Mirkin a écrit :

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:

Signed-off-by: Axel Davy 
---
  src/gallium/state_trackers/nine/nine_shader.c | 19 ++-
  1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
b/src/gallium/state_trackers/nine/nine_shader.c
index 2ba625e..0fd3d37 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -2040,6 +2040,23 @@ DECL_SPECIAL(LOG)
  return D3D_OK;
  }

+DECL_SPECIAL(LIT)
+{
+struct ureg_program *ureg = tx->ureg;
+struct ureg_dst tmp = tx_scratch(tx);
+struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+ureg_LIT(ureg, tmp, src);
+/* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
+ * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
+ * it 0^0 if src.w=0, which value is driver dependent. */
+ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
+ ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
+ ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
+ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));

Why use a separate temp? Couldn't you just

ureg_LIT(ureg, dst, src);
ureg_CMP(ureg, writemask(dst, Z), -src.y, ureg_src(dst), ureg_imm1f(0.0)) ?

Isn't there a restriction in the case dst is an output register of the 
shader ? Reading from it would be forbidden.

Also likely it is easier to optimiser for compilers with the temp version.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/16] st/nine: Fix wrong assert in nine_shader

2015-04-24 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> The sampler src index was wrong for texldl and texldd
>
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/nine_shader.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
> b/src/gallium/state_trackers/nine/nine_shader.c
> index 0fd3d37..1f1f7c8 100644
> --- a/src/gallium/state_trackers/nine/nine_shader.c
> +++ b/src/gallium/state_trackers/nine/nine_shader.c
> @@ -2493,8 +2493,8 @@ DECL_SPECIAL(TEXLDD)
>  tx_src_param(tx, &tx->insn.src[2]),
>  tx_src_param(tx, &tx->insn.src[3])
>  };
> -assert(tx->insn.src[3].idx >= 0 &&
> -   tx->insn.src[3].idx < Elements(tx->sampler_targets));
> +assert(tx->insn.src[1].idx >= 0 &&
> +   tx->insn.src[1].idx < Elements(tx->sampler_targets));
>  target = tx->sampler_targets[tx->insn.src[1].idx];
>
>  ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
> @@ -2509,8 +2509,8 @@ DECL_SPECIAL(TEXLDL)
> tx_src_param(tx, &tx->insn.src[0]),
> tx_src_param(tx, &tx->insn.src[1])
>  };
> -assert(tx->insn.src[3].idx >= 0 &&
> -   tx->insn.src[3].idx < Elements(tx->sampler_targets));
> +assert(tx->insn.src[1].idx >= 0 &&
> +   tx->insn.src[1].idx < Elements(tx->sampler_targets));
>  target = tx->sampler_targets[tx->insn.src[1].idx];
>
>  ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/16] st/nine: Handle special LIT case

2015-04-24 Thread Ilia Mirkin
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> Signed-off-by: Axel Davy 
> ---
>  src/gallium/state_trackers/nine/nine_shader.c | 19 ++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
> b/src/gallium/state_trackers/nine/nine_shader.c
> index 2ba625e..0fd3d37 100644
> --- a/src/gallium/state_trackers/nine/nine_shader.c
> +++ b/src/gallium/state_trackers/nine/nine_shader.c
> @@ -2040,6 +2040,23 @@ DECL_SPECIAL(LOG)
>  return D3D_OK;
>  }
>
> +DECL_SPECIAL(LIT)
> +{
> +struct ureg_program *ureg = tx->ureg;
> +struct ureg_dst tmp = tx_scratch(tx);
> +struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
> +struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
> +ureg_LIT(ureg, tmp, src);
> +/* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
> + * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
> + * it 0^0 if src.w=0, which value is driver dependent. */
> +ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
> + ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
> + ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
> +ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));

Why use a separate temp? Couldn't you just

ureg_LIT(ureg, dst, src);
ureg_CMP(ureg, writemask(dst, Z), -src.y, ureg_src(dst), ureg_imm1f(0.0)) ?

> +return D3D_OK;
> +}
> +
>  DECL_SPECIAL(NRM)
>  {
>  struct ureg_program *ureg = tx->ureg;
> @@ -2543,7 +2560,7 @@ struct sm1_op_info inst_table[] =
>  _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
>  _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
>  _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 
> 15 */
> -_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */
> +_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 
> 16 */
>  _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
>  _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
>  _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access

2015-04-24 Thread Rogovin, Kevin
One more comment, that I neglected to add: there are other checks for 
_CurrentFragmentProgram to be non-NULL, indeed function 
brw_upload_wm_abo_surface() [file brw_wm_surface_state.c], also 
has a check for it being non-NULL. That function is the emit for
the atom brw_wm_abo_surfaces which is present in both gen7_atoms
and gen8_atoms.

I would argue that _CurrentFragmentProgram can be NULL, given
that other places check for it and that without the check piglit gets
about 30 more crashes.

Sorry for not posting this in the first reply.

 -Kevin

-Original Message-
From: Rogovin, Kevin 
Sent: Friday, April 24, 2015 11:22 PM
To: 'Kenneth Graunke'; mesa-dev@lists.freedesktop.org
Cc: mesa-...@freedesktop.org; curroje...@riseup.net
Subject: RE: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader 
when fragment shader has atomic buffer access



> Checking brw->ctx.Shader._CurrentFragmentProgram != NULL is unnecessary.
> There is always a valid pixel shader.  (If the application is using 
> fixed-function, we supply a fragment shader for them.)  Please drop that 
> check.

Without this check(in the Gen7 function/code), about 30 crashes are induced on 
piglit tests for Gen7; the tests are all using GL fixed function pipeline. I 
have not run piglit without this check on Gen8 though.

> I thought that UAVs were essentially for Images...I'm not clear why this is 
> needed.  Perhaps Curro can confirm one way or another.

The essential reason is to guarantee that the pixel shader gets invoked by Gen 
even when all render target surfaces are NULL surfaces. There are other flags 
one can use, but the UAV seems (to me) the most natural.
 
-Kevin
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/16] st/nine: Change x86 FPU Control word on device creation as on wined3d and windows

2015-04-24 Thread Ilia Mirkin
Can you use the fe* class of functions for this? Not entirely sure
what this is setting, but you should be able to do it with
fegetenv/fesetenv, or the more specialized functions.

On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy  wrote:
> From: Tiziano Bacocco 
>
> Signed-off-by: Tiziano Bacocco 
> ---
>  src/gallium/state_trackers/nine/device9.c | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/src/gallium/state_trackers/nine/device9.c 
> b/src/gallium/state_trackers/nine/device9.c
> index 1a776a7..4ef02bb 100644
> --- a/src/gallium/state_trackers/nine/device9.c
> +++ b/src/gallium/state_trackers/nine/device9.c
> @@ -53,6 +53,18 @@
>
>  #define DBG_CHANNEL DBG_DEVICE
>
> +static void nine_setup_fpu(void)
> +{
> +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
> +WORD cw;
> +__asm__ volatile ("fnstcw %0" : "=m" (cw));
> +cw = (cw & ~0xf3f) | 0x3f;
> +__asm__ volatile ("fldcw %0" : : "m" (cw));
> +#else
> +WARN_ONCE("FPU setup not supported on non-x86 platforms\n");
> +#endif
> +}
> +
>  static void
>  NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
>  {
> @@ -168,6 +180,9 @@ NineDevice9_ctor( struct NineDevice9 *This,
>  IDirect3D9_AddRef(This->d3d9);
>  ID3DPresentGroup_AddRef(This->present);
>
> +if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE))
> +nine_setup_fpu();
> +
>  This->pipe = This->screen->context_create(This->screen, NULL);
>  if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
>
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] clover: this serie remove util/compat.*

2015-04-24 Thread Tom Stellard
On Fri, Apr 24, 2015 at 12:59:53PM +0200, EdB wrote:
> Since clover should compile use -std=c++11,
> compat classes are no longer neccessary
> 

Thank you for working on this, it is a nice improvement.  I will try to
review/test these early next week.

Thanks,
Tom

> EdB (4):
>   clover: remove compat class that matche std one
>   clover: remove compat::string
>   clover: make module::symbol::name a string
>   clover: remove util/compat
> 
>  src/gallium/state_trackers/clover/Makefile.sources |   2 -
>  src/gallium/state_trackers/clover/api/program.cpp  |  19 +-
>  .../state_trackers/clover/core/compiler.hpp|  14 +-
>  src/gallium/state_trackers/clover/core/error.hpp   |  10 +-
>  src/gallium/state_trackers/clover/core/kernel.cpp  |   2 +-
>  src/gallium/state_trackers/clover/core/module.cpp  |  56 ++-
>  src/gallium/state_trackers/clover/core/module.hpp  |  23 +-
>  src/gallium/state_trackers/clover/core/program.cpp |   4 +-
>  src/gallium/state_trackers/clover/core/program.hpp |   2 +-
>  .../state_trackers/clover/llvm/invocation.cpp  |  42 +-
>  .../state_trackers/clover/tgsi/compiler.cpp|  12 +-
>  src/gallium/state_trackers/clover/util/compat.cpp  |  38 --
>  src/gallium/state_trackers/clover/util/compat.hpp  | 444 
> -
>  13 files changed, 105 insertions(+), 563 deletions(-)
>  delete mode 100644 src/gallium/state_trackers/clover/util/compat.cpp
>  delete mode 100644 src/gallium/state_trackers/clover/util/compat.hpp
> 
> -- 
> 2.3.6
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/16] st/nine: NineDevice9_Clear skip fastpath for bigger depth-buffers

2015-04-24 Thread Axel Davy
From: Patrick Rudolph 

This adds an additional check to make sure the bound depth-buffer doesn't
exceed the rendertarget size when clearing depth and color buffer at once.
D3D9 clears only a rectangle with the same dimensions as the viewport, leaving
other parts of the depth-buffer intact.

This fixes failing WINE test visual.c:depth_buffer_test()

Signed-off-by: Patrick Rudolph 
Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/device9.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/nine/device9.c 
b/src/gallium/state_trackers/nine/device9.c
index 78e148b..1430ca5 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -1756,12 +1756,21 @@ NineDevice9_Clear( struct NineDevice9 *This,
 rt_mask |= 1 << i;
 }
 
+/* fast path, clears everything at once */
 if (!Count &&
 (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == This->state.rt_mask)) &&
-rect.x1 == 0 && rect.x2 >= This->state.fb.width &&
-rect.y1 == 0 && rect.y2 >= This->state.fb.height) {
-/* fast path, clears everything at once */
-DBG("fast path\n");
+rect.x1 == 0 && rect.y1 == 0 &&
+/* Case we clear only render target. Check clear region vs rt. */
+((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
+ rect.x2 >= This->state.fb.width &&
+ rect.y2 >= This->state.fb.height) ||
+/* Case we clear depth buffer (and eventually rt too).
+ * depth buffer size is always >= rt size. Compare to clear region */
+((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
+ This->state.fb.zsbuf != NULL &&
+ rect.x2 >= zsbuf_surf->desc.Width &&
+ rect.y2 >= zsbuf_surf->desc.Height))) {
+DBG("Clear fast path\n");
 pipe->clear(pipe, bufs, &rgba, Z, Stencil);
 return D3D_OK;
 }
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/16] st/nine: Rework texture data allocation

2015-04-24 Thread Axel Davy
Some applications assume the memory for multilevel
textures is allocated per continuous blocks.

This patch implements that behaviour.

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/cubetexture9.c | 42 --
 src/gallium/state_trackers/nine/cubetexture9.h |  1 +
 src/gallium/state_trackers/nine/nine_pipe.h| 34 ++
 src/gallium/state_trackers/nine/surface9.c | 50 +-
 src/gallium/state_trackers/nine/surface9.h |  1 -
 src/gallium/state_trackers/nine/texture9.c | 18 --
 src/gallium/state_trackers/nine/texture9.h |  1 +
 7 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/src/gallium/state_trackers/nine/cubetexture9.c 
b/src/gallium/state_trackers/nine/cubetexture9.c
index d81cc70..34ef4ec 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -40,8 +40,9 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
 struct pipe_resource *info = &This->base.base.info;
 struct pipe_screen *screen = pParams->device->screen;
 enum pipe_format pf;
-unsigned i;
+unsigned i, l, f, offset, face_size = 0;
 D3DSURFACE_DESC sfdesc;
+void *p;
 HRESULT hr;
 
 DBG("This=%p pParams=%p EdgeLength=%u Levels=%u Usage=%d "
@@ -97,6 +98,14 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
 DBG("Application asked for Software Vertex Processing, "
 "but this is unimplemented\n");
 
+if (Pool != D3DPOOL_DEFAULT) {
+face_size = nine_format_get_alloc_size(pf, EdgeLength, EdgeLength,
+   info->last_level);
+This->managed_buffer = MALLOC(6 * face_size);
+if (!This->managed_buffer)
+return E_OUTOFMEMORY;
+}
+
 This->surfaces = CALLOC(6 * (info->last_level + 1), 
sizeof(*This->surfaces));
 if (!This->surfaces)
 return E_OUTOFMEMORY;
@@ -117,16 +126,26 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
 sfdesc.Pool = Pool;
 sfdesc.MultiSampleType = D3DMULTISAMPLE_NONE;
 sfdesc.MultiSampleQuality = 0;
-for (i = 0; i < (info->last_level + 1) * 6; ++i) {
-sfdesc.Width = sfdesc.Height = u_minify(EdgeLength, i / 6);
-
-hr = NineSurface9_new(This->base.base.base.device, NineUnknown(This),
-  This->base.base.resource, NULL, 
D3DRTYPE_CUBETEXTURE,
-  i / 6, i % 6,
-  &sfdesc, &This->surfaces[i]);
-if (FAILED(hr))
-return hr;
+/* We allocate the memory for the surfaces as continous blocks.
+ * This is the expected behaviour, however we haven't tested for
+ * cube textures in which order the faces/levels should be in memory
+ */
+for (f = 0; f < 6; f++) {
+offset = f * face_size;
+for (l = 0; l < info->last_level + 1; l++) {
+sfdesc.Width = sfdesc.Height = u_minify(EdgeLength, l);
+p = This->managed_buffer ? This->managed_buffer + offset +
+nine_format_get_p_offset(pf, EdgeLength, EdgeLength, l) :
+NULL;
+
+hr = NineSurface9_new(This->base.base.base.device, 
NineUnknown(This),
+  This->base.base.resource, p, 
D3DRTYPE_CUBETEXTURE,
+  l, f, &sfdesc, &This->surfaces[f + 6 * l]);
+if (FAILED(hr))
+return hr;
+}
 }
+
 for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */
 This->dirty_rect[i].depth = 1;
 
@@ -146,6 +165,9 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This )
 FREE(This->surfaces);
 }
 
+if (This->managed_buffer)
+FREE(This->managed_buffer);
+
 NineBaseTexture9_dtor(&This->base);
 }
 
diff --git a/src/gallium/state_trackers/nine/cubetexture9.h 
b/src/gallium/state_trackers/nine/cubetexture9.h
index e8594d3..ee7e275 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.h
+++ b/src/gallium/state_trackers/nine/cubetexture9.h
@@ -31,6 +31,7 @@ struct NineCubeTexture9
 struct NineBaseTexture9 base;
 struct NineSurface9 **surfaces;
 struct pipe_box dirty_rect[6]; /* covers all mip levels */
+uint8_t *managed_buffer;
 };
 static INLINE struct NineCubeTexture9 *
 NineCubeTexture9( void *data )
diff --git a/src/gallium/state_trackers/nine/nine_pipe.h 
b/src/gallium/state_trackers/nine/nine_pipe.h
index b8e728e..20916b7 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.h
+++ b/src/gallium/state_trackers/nine/nine_pipe.h
@@ -673,4 +673,38 @@ 
d3dtexturefiltertype_to_pipe_tex_mipfilter(D3DTEXTUREFILTERTYPE filter)
 }
 }
 
+static INLINE unsigned nine_format_get_stride(enum pipe_format format,
+  unsigned width)
+{
+unsigned stride = util_format_get_stride(format, width);
+
+return align(stride, 4);
+}
+
+static INLINE unsigned nine_format_

[Mesa-dev] [PATCH 14/16] st/nine: Add debug warning when application uses sw processing

2015-04-24 Thread Axel Davy
Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/cubetexture9.c   |  3 +++
 src/gallium/state_trackers/nine/device9.c|  5 +
 src/gallium/state_trackers/nine/indexbuffer9.c   | 14 --
 src/gallium/state_trackers/nine/texture9.c   |  5 +
 src/gallium/state_trackers/nine/vertexbuffer9.c  |  4 +++-
 src/gallium/state_trackers/nine/volumetexture9.c |  3 +++
 6 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/nine/cubetexture9.c 
b/src/gallium/state_trackers/nine/cubetexture9.c
index e250315..d81cc70 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -93,6 +93,9 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
 PIPE_BIND_TRANSFER_READ |
 PIPE_BIND_TRANSFER_WRITE;
 }
+if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
+DBG("Application asked for Software Vertex Processing, "
+"but this is unimplemented\n");
 
 This->surfaces = CALLOC(6 * (info->last_level + 1), 
sizeof(*This->surfaces));
 if (!This->surfaces)
diff --git a/src/gallium/state_trackers/nine/device9.c 
b/src/gallium/state_trackers/nine/device9.c
index 4ef02bb..924f755 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -183,6 +183,11 @@ NineDevice9_ctor( struct NineDevice9 *This,
 if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE))
 nine_setup_fpu();
 
+if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING)
+DBG("Application asked full Software Vertex Processing. Ignoring.\n");
+if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
+DBG("Application asked mixed Software Vertex Processing. Ignoring.\n");
+
 This->pipe = This->screen->context_create(This->screen, NULL);
 if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
 
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c 
b/src/gallium/state_trackers/nine/indexbuffer9.c
index 48553fd..860313b 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.c
+++ b/src/gallium/state_trackers/nine/indexbuffer9.c
@@ -63,12 +63,14 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This,
 if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
 info->usage = PIPE_USAGE_STAGING;
 
-/* if (This->desc.Usage & D3DUSAGE_DONOTCLIP) { } */
-/* if (This->desc.Usage & D3DUSAGE_NONSECURE) { } */
-/* if (This->desc.Usage & D3DUSAGE_NPATCHES) { } */
-/* if (This->desc.Usage & D3DUSAGE_POINTS) { } */
-/* if (This->desc.Usage & D3DUSAGE_RTPATCHES) { } */
-/* if (This->desc.Usage & D3DUSAGE_SOFTWAREPROCESSING) { } */
+/* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
+/* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
+/* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
+/* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
+/* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
+if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
+DBG("Application asked for Software Vertex Processing, "
+"but this is unimplemented\n");
 
 info->height0 = 1;
 info->depth0 = 1;
diff --git a/src/gallium/state_trackers/nine/texture9.c 
b/src/gallium/state_trackers/nine/texture9.c
index 0dbdd37..78a632f 100644
--- a/src/gallium/state_trackers/nine/texture9.c
+++ b/src/gallium/state_trackers/nine/texture9.c
@@ -125,6 +125,11 @@ NineTexture9_ctor( struct NineTexture9 *This,
 PIPE_BIND_TRANSFER_READ |
 PIPE_BIND_TRANSFER_WRITE;
 }
+
+if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
+DBG("Application asked for Software Vertex Processing, "
+"but this is unimplemented\n");
+
 if (pSharedHandle)
 info->bind |= PIPE_BIND_SHARED;
 
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c 
b/src/gallium/state_trackers/nine/vertexbuffer9.c
index 11cc60f..8e2eaaf 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.c
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.c
@@ -76,7 +76,9 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This,
 /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
 /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
 /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
-/* if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) { } */
+if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
+DBG("Application asked for Software Vertex Processing, "
+"but this is unimplemented\n");
 /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
 
 info->height0 = 1;
diff --git a/src/gallium/state_trackers/nine/volumetexture9.c 
b/src/gallium/state_trackers/nine/volumetexture9.c
index 6c0073f..08fdd3d 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.c
+++ b/src/gallium/state_trackers/nine/volumetexture9.c
@@ -90,6 +90,9 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This,
 PIPE_BIND_TRANSFER_READ |
 PIPE_BIND_TRANSFER_WRITE;
 

[Mesa-dev] [PATCH 01/11] st/nine: Encapsulate variables for MANAGED resource

2015-04-24 Thread Axel Davy
Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/basetexture9.c   | 82 
 src/gallium/state_trackers/nine/basetexture9.h   | 19 +++---
 src/gallium/state_trackers/nine/cubetexture9.c   |  2 +-
 src/gallium/state_trackers/nine/device9.c|  2 +-
 src/gallium/state_trackers/nine/nine_state.c |  2 +-
 src/gallium/state_trackers/nine/stateblock9.c|  2 +-
 src/gallium/state_trackers/nine/surface9.c   |  2 +-
 src/gallium/state_trackers/nine/texture9.c   |  2 +-
 src/gallium/state_trackers/nine/volume9.c|  2 +-
 src/gallium/state_trackers/nine/volumetexture9.c |  2 +-
 10 files changed, 60 insertions(+), 57 deletions(-)

diff --git a/src/gallium/state_trackers/nine/basetexture9.c 
b/src/gallium/state_trackers/nine/basetexture9.c
index f2ca35b..75a305f 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -67,8 +67,8 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This,
 This->pipe = pParams->device->pipe;
 This->mipfilter = (Usage & D3DUSAGE_AUTOGENMIPMAP) ?
 D3DTEXF_LINEAR : D3DTEXF_NONE;
-This->lod = 0;
-This->lod_resident = -1;
+This->managed.lod = 0;
+This->managed.lod_resident = -1;
 /* When a depth buffer is sampled, it is for shadow mapping, except for
  * D3DFMT_INTZ, D3DFMT_DF16 and D3DFMT_DF24.
  * In addition D3DFMT_INTZ can be used for both texturing and depth 
buffering
@@ -103,15 +103,15 @@ DWORD WINAPI
 NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This,
  DWORD LODNew )
 {
-DWORD old = This->lod;
+DWORD old = This->managed.lod;
 
 DBG("This=%p LODNew=%d\n", This, LODNew);
 
 user_assert(This->base.pool == D3DPOOL_MANAGED, 0);
 
-This->lod = MIN2(LODNew, This->base.info.last_level);
+This->managed.lod = MIN2(LODNew, This->base.info.last_level);
 
-if (This->lod != old && This->bind_count && LIST_IS_EMPTY(&This->list))
+if (This->managed.lod != old && This->bind_count && 
LIST_IS_EMPTY(&This->list))
list_add(&This->list, &This->base.base.device->update_textures);
 
 return old;
@@ -122,7 +122,7 @@ NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This )
 {
 DBG("This=%p\n", This);
 
-return This->lod;
+return This->managed.lod;
 }
 
 DWORD WINAPI
@@ -165,7 +165,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
 unsigned last_level = This->base.info.last_level;
 unsigned l;
 
-DBG("This=%p dirty=%i type=%s\n", This, This->dirty,
+DBG("This=%p dirty=%i type=%s\n", This, This->managed.dirty,
 nine_D3DRTYPE_to_str(This->base.type));
 
 assert(This->base.pool == D3DPOOL_MANAGED);
@@ -173,10 +173,10 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 
*This )
 if (This->base.usage & D3DUSAGE_AUTOGENMIPMAP)
 last_level = 0; /* TODO: What if level 0 is not resident ? */
 
-if (This->lod_resident != This->lod) {
+if (This->managed.lod_resident != This->managed.lod) {
 struct pipe_resource *res;
 
-DBG("updating LOD from %u to %u ...\n", This->lod_resident, This->lod);
+DBG("updating LOD from %u to %u ...\n", This->managed.lod_resident, 
This->managed.lod);
 
 pipe_sampler_view_reference(&This->view[0], NULL);
 pipe_sampler_view_reference(&This->view[1], NULL);
@@ -192,13 +192,13 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 
*This )
 state->changed.group |= NINE_STATE_TEXTURE;
 }
 
-hr = NineBaseTexture9_CreatePipeResource(This, This->lod_resident != 
-1);
+hr = NineBaseTexture9_CreatePipeResource(This, 
This->managed.lod_resident != -1);
 if (FAILED(hr))
 return hr;
 res = This->base.resource;
 
-if (This->lod_resident == -1) /* no levels were resident */
-This->lod_resident = This->base.info.last_level + 1;
+if (This->managed.lod_resident == -1) /* no levels were resident */
+This->managed.lod_resident = This->base.info.last_level + 1;
 
 if (This->base.type == D3DRTYPE_TEXTURE) {
 struct NineTexture9 *tex = NineTexture9(This);
@@ -207,15 +207,15 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 
*This )
 /* Mark uninitialized levels as dirty. */
 box.x = box.y = box.z = 0;
 box.depth = 1;
-for (l = This->lod; l < This->lod_resident; ++l) {
+for (l = This->managed.lod; l < This->managed.lod_resident; ++l) {
 box.width = u_minify(This->base.info.width0, l);
 box.height = u_minify(This->base.info.height0, l);
 NineSurface9_AddDirtyRect(tex->surfaces[l], &box);
 }
-for (l = 0; l < This->lod; ++l)
+for (l = 0; l < This->managed.lod; ++l)
 NineSurface9_SetResource(tex->surfaces[l], NULL, -1);
 for (; l <= This->base.info.last_level; ++l)
-

[Mesa-dev] [PATCH 03/11] st/nine: Simplify Surface9 Managed resources implementation

2015-04-24 Thread Axel Davy
Remove the Surface9 code for dirty rects, used only for Managed
resources. Instead convey the information to the parent texture.

According to documentation, this seems to be the expected behaviour,
and if documentation is wrong there, that's not a problem since it can
only leads to more texture updates in corner cases.

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/basetexture9.c |  15 ++--
 src/gallium/state_trackers/nine/nine_pipe.h|  11 ---
 src/gallium/state_trackers/nine/surface9.c | 108 ++---
 src/gallium/state_trackers/nine/surface9.h |  12 +--
 4 files changed, 53 insertions(+), 93 deletions(-)

diff --git a/src/gallium/state_trackers/nine/basetexture9.c 
b/src/gallium/state_trackers/nine/basetexture9.c
index 75a305f..9b7976c 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -279,16 +279,17 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 
*This )
 tex->dirty_rect.x, tex->dirty_rect.y,
 tex->dirty_rect.width, tex->dirty_rect.height);
 
+/* Note: for l < This->managed.lod, the resource is
+ * non-existing, and thus will be entirely re-uploaded
+ * if This->managed.lod changes */
 if (tex->dirty_rect.width) {
-for (l = 0; l <= last_level; ++l) {
+for (l = This->managed.lod; l <= last_level; ++l) {
 u_box_minify_2d(&box, &tex->dirty_rect, l);
-NineSurface9_AddDirtyRect(tex->surfaces[l], &box);
+NineSurface9_UploadSelf(tex->surfaces[l], &box);
 }
 memset(&tex->dirty_rect, 0, sizeof(tex->dirty_rect));
 tex->dirty_rect.depth = 1;
 }
-for (l = This->managed.lod; l <= last_level; ++l)
-NineSurface9_UploadSelf(tex->surfaces[l]);
 } else
 if (This->base.type == D3DRTYPE_CUBETEXTURE) {
 struct NineCubeTexture9 *tex = NineCubeTexture9(This);
@@ -303,15 +304,13 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 
*This )
 tex->dirty_rect[z].width, tex->dirty_rect[z].height);
 
 if (tex->dirty_rect[z].width) {
-for (l = 0; l <= last_level; ++l) {
+for (l = This->managed.lod; l <= last_level; ++l) {
 u_box_minify_2d(&box, &tex->dirty_rect[z], l);
-NineSurface9_AddDirtyRect(tex->surfaces[l * 6 + z], &box);
+NineSurface9_UploadSelf(tex->surfaces[l * 6 + z], &box);
 }
 memset(&tex->dirty_rect[z], 0, sizeof(tex->dirty_rect[z]));
 tex->dirty_rect[z].depth = 1;
 }
-for (l = This->managed.lod; l <= last_level; ++l)
-NineSurface9_UploadSelf(tex->surfaces[l * 6 + z]);
 }
 } else
 if (This->base.type == D3DRTYPE_VOLUMETEXTURE) {
diff --git a/src/gallium/state_trackers/nine/nine_pipe.h 
b/src/gallium/state_trackers/nine/nine_pipe.h
index 20916b7..d9c643c 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.h
+++ b/src/gallium/state_trackers/nine/nine_pipe.h
@@ -108,17 +108,6 @@ rect_to_pipe_box_flip(struct pipe_box *dst, const RECT 
*src)
 }
 
 static INLINE void
-nine_u_rect_to_pipe_box(struct pipe_box *dst, const struct u_rect *rect, int z)
-{
-dst->x = rect->x0;
-dst->y = rect->y0;
-dst->z = z;
-dst->width = rect->x1 - rect->x0;
-dst->height = rect->y1 - rect->y0;
-dst->depth = 1;
-}
-
-static INLINE void
 rect_to_pipe_box_xy_only(struct pipe_box *dst, const RECT *src)
 {
 user_warn(src->left > src->right || src->top > src->bottom);
diff --git a/src/gallium/state_trackers/nine/surface9.c 
b/src/gallium/state_trackers/nine/surface9.c
index e934941..2706efa 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -22,7 +22,11 @@
 
 #include "surface9.h"
 #include "device9.h"
-#include "basetexture9.h" /* for marking dirty */
+
+/* for marking dirty */
+#include "basetexture9.h"
+#include "texture9.h"
+#include "cubetexture9.h"
 
 #include "nine_helpers.h"
 #include "nine_pipe.h"
@@ -145,7 +149,6 @@ NineSurface9_dtor( struct NineSurface9 *This )
 {
 if (This->transfer)
 NineSurface9_UnlockRect(This);
-NineSurface9_ClearDirtyRects(This);
 
 pipe_surface_reference(&This->surface[0], NULL);
 pipe_surface_reference(&This->surface[1], NULL);
@@ -256,55 +259,38 @@ NineSurface9_GetDesc( struct NineSurface9 *This,
 return D3D_OK;
 }
 
-/* Wine just keeps a single directy rect and expands it to cover all
- * the dirty rects ever added.
- * We'll keep 2, and expand the one that fits better, just for fun.
- */
+/* Add the dirty rects to the source texture */
 INLINE void
 NineSurface9_AddDirtyRect( struct NineSurface9 *This,
const struct pipe_box *box )
 {
-float area[2];
-struct u_rect rect, cover_a, cover_b;
+RECT dirty_rect;
 
 DBG("

[Mesa-dev] [PATCH 04/11] st/nine: Bound the dirty regions to resource size

2015-04-24 Thread Axel Davy
Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/cubetexture9.c   | 4 
 src/gallium/state_trackers/nine/texture9.c   | 3 +++
 src/gallium/state_trackers/nine/volumetexture9.c | 9 +
 3 files changed, 16 insertions(+)

diff --git a/src/gallium/state_trackers/nine/cubetexture9.c 
b/src/gallium/state_trackers/nine/cubetexture9.c
index d501d29..9c99b7c 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -272,6 +272,10 @@ NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 
*This,
 rect_to_pipe_box_clamp(&box, pDirtyRect);
 u_box_union_2d(&This->dirty_rect[FaceType], 
&This->dirty_rect[FaceType],
&box);
+(void) u_box_clip_2d(&This->dirty_rect[FaceType],
+ &This->dirty_rect[FaceType],
+ This->base.base.info.width0,
+ This->base.base.info.height0);
 }
 return D3D_OK;
 }
diff --git a/src/gallium/state_trackers/nine/texture9.c 
b/src/gallium/state_trackers/nine/texture9.c
index bee8095..e0aec3c 100644
--- a/src/gallium/state_trackers/nine/texture9.c
+++ b/src/gallium/state_trackers/nine/texture9.c
@@ -301,6 +301,9 @@ NineTexture9_AddDirtyRect( struct NineTexture9 *This,
 struct pipe_box box;
 rect_to_pipe_box_clamp(&box, pDirtyRect);
 u_box_union_2d(&This->dirty_rect, &This->dirty_rect, &box);
+(void) u_box_clip_2d(&This->dirty_rect, &This->dirty_rect,
+ This->base.base.info.width0,
+ This->base.base.info.height0);
 }
 return D3D_OK;
 }
diff --git a/src/gallium/state_trackers/nine/volumetexture9.c 
b/src/gallium/state_trackers/nine/volumetexture9.c
index b566173..8f1de13 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.c
+++ b/src/gallium/state_trackers/nine/volumetexture9.c
@@ -223,6 +223,15 @@ NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 
*This,
 struct pipe_box box;
 d3dbox_to_pipe_box(&box, pDirtyBox);
 u_box_union_3d(&This->dirty_box, &This->dirty_box, &box);
+This->dirty_box.x = MAX2(This->dirty_box.x, 0);
+This->dirty_box.y = MAX2(This->dirty_box.y, 0);
+This->dirty_box.z = MAX2(This->dirty_box.z, 0);
+This->dirty_box.width = MIN2(This->dirty_box.width,
+ This->base.base.info.width0 - 
This->dirty_box.x);
+This->dirty_box.height = MIN2(This->dirty_box.height,
+ This->base.base.info.height0 - 
This->dirty_box.y);
+This->dirty_box.depth = MIN2(This->dirty_box.depth,
+ This->base.base.info.depth0 - 
This->dirty_box.z);
 }
 return D3D_OK;
 }
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/11] st/nine: Rewrite Managed texture uploads

2015-04-24 Thread Axel Davy
That part of the code was quite obscure.
This new implementation tries to make it clearer
by separating the differents parts, and commenting more.

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/basetexture9.c | 221 ++---
 1 file changed, 125 insertions(+), 96 deletions(-)

diff --git a/src/gallium/state_trackers/nine/basetexture9.c 
b/src/gallium/state_trackers/nine/basetexture9.c
index 9b7976c..330827a 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -163,7 +163,8 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
 {
 HRESULT hr;
 unsigned last_level = This->base.info.last_level;
-unsigned l;
+unsigned l, min_level_dirty = This->managed.lod;
+BOOL update_lod;
 
 DBG("This=%p dirty=%i type=%s\n", This, This->managed.dirty,
 nine_D3DRTYPE_to_str(This->base.type));
@@ -173,7 +174,14 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This 
)
 if (This->base.usage & D3DUSAGE_AUTOGENMIPMAP)
 last_level = 0; /* TODO: What if level 0 is not resident ? */
 
-if (This->managed.lod_resident != This->managed.lod) {
+update_lod = This->managed.lod_resident != This->managed.lod;
+if (!update_lod && !This->managed.dirty)
+return D3D_OK;
+
+/* Allocate a new resource with the correct number of levels,
+ * Mark states for update, and tell the nine surfaces/volumes
+ * their new resource. */
+if (update_lod) {
 struct pipe_resource *res;
 
 DBG("updating LOD from %u to %u ...\n", This->managed.lod_resident, 
This->managed.lod);
@@ -192,148 +200,169 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 
*This )
 state->changed.group |= NINE_STATE_TEXTURE;
 }
 
+/* Allocate a new resource */
 hr = NineBaseTexture9_CreatePipeResource(This, 
This->managed.lod_resident != -1);
 if (FAILED(hr))
 return hr;
 res = This->base.resource;
 
-if (This->managed.lod_resident == -1) /* no levels were resident */
+if (This->managed.lod_resident == -1) {/* no levels were resident */
+This->managed.dirty = FALSE; /* We are going to upload everything. 
*/
 This->managed.lod_resident = This->base.info.last_level + 1;
+}
+
+if (This->base.type == D3DRTYPE_TEXTURE) {
+struct NineTexture9 *tex = NineTexture9(This);
+
+/* last content (if apply) has been copied to the new resource.
+ * Note: We cannot render to surfaces of managed textures.
+ * Note2: the level argument passed is to get the level offset
+ * right when the texture is uploaded (the texture first level
+ * corresponds to This->managed.lod).
+ * Note3: We don't care about the value passed for the surfaces
+ * before This->managed.lod, negative with this implementation. */
+for (l = 0; l <= This->base.info.last_level; ++l)
+NineSurface9_SetResource(tex->surfaces[l], res, l - 
This->managed.lod);
+} else
+if (This->base.type == D3DRTYPE_CUBETEXTURE) {
+struct NineCubeTexture9 *tex = NineCubeTexture9(This);
+unsigned z;
+
+for (l = 0; l <= This->base.info.last_level; ++l) {
+for (z = 0; z < 6; ++z)
+NineSurface9_SetResource(tex->surfaces[l * 6 + z],
+ res, l - This->managed.lod);
+}
+} else
+if (This->base.type == D3DRTYPE_VOLUMETEXTURE) {
+struct NineVolumeTexture9 *tex = NineVolumeTexture9(This);
+
+for (l = 0; l <= This->base.info.last_level; ++l)
+NineVolume9_SetResource(tex->volumes[l], res, l - 
This->managed.lod);
+} else {
+assert(!"invalid texture type");
+}
+
+/* We are going to fully upload the new levels,
+ * no need to update dirty parts of the texture for these */
+min_level_dirty = MAX2(This->managed.lod, This->managed.lod_resident);
+}
+
+/* Update dirty parts of the texture */
+if (This->managed.dirty) {
+if (This->base.type == D3DRTYPE_TEXTURE) {
+struct NineTexture9 *tex = NineTexture9(This);
+struct pipe_box box;
+box.z = 0;
+box.depth = 1;
+
+DBG("TEXTURE: dirty rect=(%u,%u) (%ux%u)\n",
+tex->dirty_rect.x, tex->dirty_rect.y,
+tex->dirty_rect.width, tex->dirty_rect.height);
+
+/* Note: for l < min_level_dirty, the resource is
+ * either non-existing (and thus will be entirely re-uploaded
+ * if the lod changes) or going to have a full upload */
+if (tex->dirty_rect.width) {
+for (l = min_level_dirty; l <= last_level; ++l) {
+u_box_minify_2d(&box, &tex->dirty_rect, l);

  1   2   >