[Mesa-dev] [PATCH v2] i965/fs: Add remove_extra_rounding_modes optimization

2017-08-29 Thread Alejandro Piñeiro
Although from SPIR-V point of view, rounding modes are attached to the
operation/destination, on i965 it is a status, so we don't need to
explicitly set the rounding mode if the one we want is already set.

v2: Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
with the rounding mode (Curro)

Signed-off-by: Jose Maria Casanova Crespo 
Signed-off-by: Alejandro Piñeiro opcode == SHADER_OPCODE_RND_MODE) {
+ assert(inst->src[0].file == BRW_IMMEDIATE_VALUE);
+
+ current_mode = (brw_rnd_mode) inst->src[0].d;
+
+ if (current_mode == prev_mode) {
+inst->remove(block);
+progress = true;
+ } else {
+prev_mode = current_mode;
+ }
+  }
+   }
+
+   if (progress)
+  invalidate_live_intervals();
+
+   return progress;
+}
+
+
 static void
 clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
 {
@@ -5748,6 +5788,7 @@ fs_visitor::optimize()
int pass_num = 0;
 
OPT(opt_drop_redundant_mov_to_flags);
+   OPT(remove_extra_rounding_modes);
 
do {
   progress = false;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index f1ba193de7e..b9476e69edb 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -150,6 +150,7 @@ public:
bool eliminate_find_live_channel();
bool dead_code_eliminate();
bool remove_duplicate_mrf_writes();
+   bool remove_extra_rounding_modes();
 
bool opt_sampler_eot();
bool virtual_grf_interferes(int a, int b);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] i965/fs: Enable rounding mode on f2f16 ops

2017-08-29 Thread Alejandro Piñeiro
By default we don't set the rounding mode. We only set
round-to-near-even or round-to-zero mode if explicitly set from nir.

v2: Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
with the rounding mode (Curro)

Signed-off-by: Jose Maria Casanova Crespo 
Signed-off-by: Alejandro Piñeiro 
---
 src/intel/compiler/brw_fs_nir.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index a356327c690..7b06cb139d6 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -717,6 +717,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
nir_alu_instr *instr)
   inst->saturate = instr->dest.saturate;
   break;
 
+   case nir_op_f2f16_rtne:
+   case nir_op_f2f16_rtz:
+  if (instr->op == nir_op_f2f16_rtz)
+ bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), 
brw_imm_d(BRW_RND_MODE_RTZ));
+  else if (instr->op == nir_op_f2f16_rtne)
+ bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), 
brw_imm_d(BRW_RND_MODE_RTNE));
+  /* fallthrough */
+
case nir_op_f2f16:
   /* In theory, it would be better to use BRW_OPCODE_F32TO16. Depending
* on the HW gen, it is a special hw opcode or just a MOV, and
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] i965/fs: Define new shader opcode to set rounding modes

2017-08-29 Thread Alejandro Piñeiro
Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having a specific opcode makes it easier to remove duplicate settings
later.

v2: (Curro)
  - Set thread control to 'switch' when using the control register
  - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
with the rounding mode.
  - Avoid magic numbers setting rounding mode field at control register.

Signed-off-by:  Alejandro Piñeiro 
Signed-off-by:  Jose Maria Casanova Crespo 
---
 src/intel/compiler/brw_eu.h |  3 +++
 src/intel/compiler/brw_eu_defines.h | 17 +
 src/intel/compiler/brw_eu_emit.c| 34 +
 src/intel/compiler/brw_fs_generator.cpp |  5 +
 src/intel/compiler/brw_shader.cpp   |  4 
 5 files changed, 63 insertions(+)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 8e597b212a6..106bf03530d 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
   struct brw_reg src,
   struct brw_reg idx);
 
+void
+brw_rounding_mode(struct brw_codegen *p,
+  enum brw_rnd_mode mode);
 /***
  * brw_eu_util.c:
  */
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index da482b73c58..91d88fe8952 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -388,6 +388,9 @@ enum opcode {
SHADER_OPCODE_TYPED_SURFACE_WRITE,
SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
 
+
+   SHADER_OPCODE_RND_MODE,
+
SHADER_OPCODE_MEMORY_FENCE,
 
SHADER_OPCODE_GEN4_SCRATCH_READ,
@@ -1214,4 +1217,18 @@ enum brw_message_target {
 /* R0 */
 # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27
 
+/* CR0.0[5:4] Floating-Point Rounding Modes
+ *  Skylake PRM, Volume 7 Part 1, "Control Register", page 756
+ */
+
+#define BRW_CR0_RND_MODE_MASK 0x30
+#define BRW_CR0_RND_MODE_SHIFT4
+
+enum PACKED brw_rnd_mode {
+   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
+   BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */
+   BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */
+   BRW_RND_MODE_RTZ = 3/* Round Toward Zero */
+};
+
 #endif /* BRW_EU_DEFINES_H */
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 8c952e7da26..12164653e47 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3530,3 +3530,37 @@ brw_WAIT(struct brw_codegen *p)
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 }
+
+/**
+ * Changes the floating point rounding mode updating the control register
+ * field defined at cr0.0[5-6] bits. This function supports the changes to
+ * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
+ * Only RTNE and RTZ rounding are enabled at nir.
+ */
+
+void
+brw_rounding_mode(struct brw_codegen *p,
+  enum brw_rnd_mode mode)
+{
+   const unsigned bits  = mode << BRW_CR0_RND_MODE_SHIFT;
+
+   if (bits != BRW_CR0_RND_MODE_MASK) {
+  brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+   brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
+
+  /* From the Skylake PRM, Volume 7, page 760:
+   *  "Implementation Restriction on Register Access: When the control
+   *   register is used as an explicit source and/or destination, hardware
+   *   does not ensure execution pipeline coherency. Software must set the
+   *   thread control field to ‘switch’ for an instruction that uses
+   *   control register as an explicit operand."
+   */
+  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+}
+
+   if (bits) {
+  brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+  brw_imm_ud(bits));
+  brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+   }
+}
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index afaec5c9497..ff9880ebfe8 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2144,6 +2144,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F));
  break;
 
+  case SHADER_OPCODE_RND_MODE:
+ assert(src[0].file == BRW_IMMEDIATE_VALUE);
+ brw_rounding_mode(p, (brw_rnd_mode) src[0].d);
+ break;
+
   default:
  unreachable("Unsupported opcode");
 
diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index c62b8ba6140..19dd960be3a 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -486,6 +486,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, 
enum opc

[Mesa-dev] [Bug 98691] egl.h:55: error: redefinition of typedef ‘EGLDisplay’

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=98691

Vinson Lee  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Vinson Lee  ---
commit 4e97084591c206338af4425c33edb833de348816
Author: Matt Turner 
Date:   Thu Jul 6 18:40:53 2017 -0700

egl: Fix inclusion of egl.h+mesa_glinterop.h

Previously clang would warn about redefinition of typedef EGLDisplay. Avoid
this by adding preprocessor guards to mesa_glinterop.h and including it
after EGL.h is indirectly included.

Reviewed-by: Jordan Justen 

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/dri2: only destroy created objects

2017-08-29 Thread Tapani Pälli



On 08/30/2017 06:50 AM, Tapani Pälli wrote:


On 08/29/2017 12:51 PM, Michael Olbrich wrote:

dri2_display_destroy may be called by dri2_initialize_wayland_drm() if
initialization fails. In this case, these objects may not be initialized.


Same thing can happen with dri2_initialize_wayland_swrast, would be good 
to fix both at one go.


and that is actually what the patch does, both call dri2_display_destroy 
the same way :)


Reviewed-by: Tapani Pälli 



Signed-off-by: Michael Olbrich 
---
  src/egl/drivers/dri2/egl_dri2.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c 
b/src/egl/drivers/dri2/egl_dri2.c

index a197e0456fd2..d86c6af225fc 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -944,9 +944,12 @@ dri2_display_destroy(_EGLDisplay *disp)
    zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf);
    if (dri2_dpy->wl_shm)
    wl_shm_destroy(dri2_dpy->wl_shm);
-  wl_registry_destroy(dri2_dpy->wl_registry);
-  wl_event_queue_destroy(dri2_dpy->wl_queue);
-  wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
+  if (dri2_dpy->wl_registry)
+ wl_registry_destroy(dri2_dpy->wl_registry);
+  if (dri2_dpy->wl_queue)
+ wl_event_queue_destroy(dri2_dpy->wl_queue);
+  if (dri2_dpy->wl_dpy_wrapper)
+ wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
    u_vector_finish(&dri2_dpy->wl_modifiers.argb);
    u_vector_finish(&dri2_dpy->wl_modifiers.xrgb);
    u_vector_finish(&dri2_dpy->wl_modifiers.rgb565);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/dri2: only destroy created objects

2017-08-29 Thread Tapani Pälli


On 08/29/2017 12:51 PM, Michael Olbrich wrote:

dri2_display_destroy may be called by dri2_initialize_wayland_drm() if
initialization fails. In this case, these objects may not be initialized.


Same thing can happen with dri2_initialize_wayland_swrast, would be good 
to fix both at one go.



Signed-off-by: Michael Olbrich 
---
  src/egl/drivers/dri2/egl_dri2.c | 9 ++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index a197e0456fd2..d86c6af225fc 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -944,9 +944,12 @@ dri2_display_destroy(_EGLDisplay *disp)
zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf);
if (dri2_dpy->wl_shm)
wl_shm_destroy(dri2_dpy->wl_shm);
-  wl_registry_destroy(dri2_dpy->wl_registry);
-  wl_event_queue_destroy(dri2_dpy->wl_queue);
-  wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
+  if (dri2_dpy->wl_registry)
+ wl_registry_destroy(dri2_dpy->wl_registry);
+  if (dri2_dpy->wl_queue)
+ wl_event_queue_destroy(dri2_dpy->wl_queue);
+  if (dri2_dpy->wl_dpy_wrapper)
+ wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
u_vector_finish(&dri2_dpy->wl_modifiers.argb);
u_vector_finish(&dri2_dpy->wl_modifiers.xrgb);
u_vector_finish(&dri2_dpy->wl_modifiers.rgb565);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101709] [llvmpipe] piglit gl-1.0-scissor-offscreen regression

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101709

Brian Paul  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #4 from Brian Paul  ---
Fixed w/ commit 88cdf16871a0f1cd8ec3844072051ee38e945600

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR

2017-08-29 Thread Brian Paul

On 08/29/2017 06:17 PM, Roland Scheidegger wrote:

Looks good to me, thanks.

Reviewed-by: Roland Scheidegger 

Albeit I'm not quite sure why it is never set here?


This particular test sets a 0 x 0 scissor region.  All the 
pipe_scissor_state members are zero and when the state tracker does a 
memcmp() with its copy it sees no difference and skips the call to 
pipe->set_scissor_state().


-Brian



This bug looks similar in nature to the uninitialized fb (see
8bfe451ed30918244618608871423b2a72cf9767) and I thought that was
impossible to hit with gl state tracker...

Roland

Am 28.08.2017 um 22:20 schrieb Brian Paul:

If llvmpipe_set_scissor_states() is never called, we still need to be sure
that derived scissor/clip state is updated.  As of commit 743ad599a97d09b1
that function might not be called.

Fixes regressed Piglit gl-1.0-scissor-offscreen -fbo -auto test.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101709
---
  src/gallium/drivers/llvmpipe/lp_context.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c 
b/src/gallium/drivers/llvmpipe/lp_context.c
index 599a9f1..613d60f 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -227,6 +227,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void 
*priv,

 lp_reset_counters();

+   /* If llvmpipe_set_scissor_states() is never called, we still need to
+* make sure that derived scissor state is computed.
+* See https://bugs.freedesktop.org/show_bug.cgi?id=101709
+*/
+   llvmpipe->dirty |= LP_NEW_SCISSOR;
+
 return &llvmpipe->pipe;

   fail:





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2 1/2] gallivm: correct channel shift logic on big endian

2017-08-29 Thread Matt Turner
On Tue, Aug 29, 2017 at 1:13 PM, Roland Scheidegger  wrote:
> That said, I already reviewed this:
> https://lists.freedesktop.org/archives/mesa-dev/2017-August/167515.html

Since I don't think Ben has commit access, it seems that you should be
the one to commit it as well.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: ensure cache flushes happen before SET_PREDICATION packets

2017-08-29 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Fri, Aug 25, 2017 at 4:40 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> The data is read when the render_cond_atom is emitted, so we must
> delay emitting the atom until after the flush.
>
> Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a 
> pipeline sync")
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.h |  3 ++-
>  src/gallium/drivers/radeon/r600_query.c   |  9 ++---
>  src/gallium/drivers/radeonsi/si_state_draw.c  | 15 ++-
>  3 files changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index dca56734cd7..f78e38b65af 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -54,21 +54,22 @@ struct u_log_context;
>  #define R600_RESOURCE_FLAG_TRANSFER(PIPE_RESOURCE_FLAG_DRV_PRIV 
> << 0)
>  #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV 
> << 1)
>  #define R600_RESOURCE_FLAG_FORCE_TILING
> (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
>  #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV 
> << 3)
>  #define R600_RESOURCE_FLAG_UNMAPPABLE  (PIPE_RESOURCE_FLAG_DRV_PRIV 
> << 4)
>
>  #define R600_CONTEXT_STREAMOUT_FLUSH   (1u << 0)
>  /* Pipeline & streamout query controls. */
>  #define R600_CONTEXT_START_PIPELINE_STATS  (1u << 1)
>  #define R600_CONTEXT_STOP_PIPELINE_STATS   (1u << 2)
> -#define R600_CONTEXT_PRIVATE_FLAG  (1u << 3)
> +#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
> +#define R600_CONTEXT_PRIVATE_FLAG  (1u << 4)
>
>  /* special primitive types */
>  #define R600_PRIM_RECTANGLE_LIST   PIPE_PRIM_MAX
>
>  #define R600_NOT_QUERY 0x
>
>  /* Debug flags. */
>  /* logging and features */
>  #define DBG_TEX(1 << 0)
>  #define DBG_NIR(1 << 1)
> diff --git a/src/gallium/drivers/radeon/r600_query.c 
> b/src/gallium/drivers/radeon/r600_query.c
> index f937612bc1f..03ff1018a71 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -1828,25 +1828,28 @@ static void r600_render_condition(struct pipe_context 
> *ctx,
>  * from launching the compute grid.
>  */
> rctx->render_cond = NULL;
>
> ctx->get_query_result_resource(
> ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
> &rquery->workaround_buf->b.b, 
> rquery->workaround_offset);
>
> /* Settings this in the render cond atom is too late,
>  * so set it here. */
> -   rctx->flags |= rctx->screen->barrier_flags.L2_to_cp;
> -
> -   atom->num_dw = 5;
> +   rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
> +  R600_CONTEXT_FLUSH_FOR_RENDER_COND;
>
> rctx->render_cond_force_off = old_force_off;
> +   }
> +
> +   if (needs_workaround) {
> +   atom->num_dw = 5;
> } else {
> for (qbuf = &rquery->buffer; qbuf; qbuf = 
> qbuf->previous)
> atom->num_dw += (qbuf->results_end / 
> rquery->result_size) * 5;
>
> if (rquery->b.type == 
> PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
> atom->num_dw *= R600_MAX_STREAMS;
> }
> }
>
> rctx->render_cond = query;
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 1d8be49a480..81751d2186e 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1385,34 +1385,39 @@ void si_draw_vbo(struct pipe_context *ctx, const 
> struct pipe_draw_info *info)
>   SI_CONTEXT_PS_PARTIAL_FLUSH |
>   SI_CONTEXT_CS_PARTIAL_FLUSH))) {
> /* If we have to wait for idle, set all states first, so that 
> all
>  * SET packets are processed in parallel with previous draw 
> calls.
>  * Then upload descriptors, set shader pointers, and draw, and
>  * prefetch at the end. This ensures that the time the CUs
>  * are idle is very short. (there are only SET_SH packets 
> between
>  * the wait and the draw)
>  */
> struct r600_atom *shader_pointers = 
> &sctx->shader_pointers.atom;
> +   unsigned masked_atoms = 1u << shader_pointers->id;
>
> -   /* Emit all states except shader pointers

[Mesa-dev] [PATCH] egl/dri2: only destroy created objects

2017-08-29 Thread Michael Olbrich
dri2_display_destroy may be called by dri2_initialize_wayland_drm() if
initialization fails. In this case, these objects may not be initialized.

Signed-off-by: Michael Olbrich 
---
 src/egl/drivers/dri2/egl_dri2.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index a197e0456fd2..d86c6af225fc 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -944,9 +944,12 @@ dri2_display_destroy(_EGLDisplay *disp)
   zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf);
   if (dri2_dpy->wl_shm)
   wl_shm_destroy(dri2_dpy->wl_shm);
-  wl_registry_destroy(dri2_dpy->wl_registry);
-  wl_event_queue_destroy(dri2_dpy->wl_queue);
-  wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
+  if (dri2_dpy->wl_registry)
+ wl_registry_destroy(dri2_dpy->wl_registry);
+  if (dri2_dpy->wl_queue)
+ wl_event_queue_destroy(dri2_dpy->wl_queue);
+  if (dri2_dpy->wl_dpy_wrapper)
+ wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
   u_vector_finish(&dri2_dpy->wl_modifiers.argb);
   u_vector_finish(&dri2_dpy->wl_modifiers.xrgb);
   u_vector_finish(&dri2_dpy->wl_modifiers.rgb565);
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default

2017-08-29 Thread Marek Olšák
I have to conclude that I don't see a way to use LOAD with CONSTBUF
and keep the same performance as before. It looks like there are some
deficiencies in our compiler stack that are unfixable in Mesa alone.

Marek

On Wed, Aug 30, 2017 at 2:11 AM, Marek Olšák  wrote:
> Related IRC discussion:
>
> 00:01 < mareko> arsenm: what are the chances I can convince you to
> allow me to set mayLoad = 0 on s_buffer_load_dword? :) the instruction
> always reads from read-only memory with Mesa
> 00:02 < mareko> apparently, readnone doesn't get through
> 00:02 < arsenm> mareko: you should get the same effect by having
> invariant on the MMO
> 00:03 < mareko> arsenm: and how would I set invariant on SI.load.const?
> 00:04 < arsenm> mareko: we create MMOs for a few other intrinsics
> already, it should be the same
> 00:05 < mareko> if only I had time to play with LLVM
> 00:05 < arsenm> mareko: it looks like that is already done so it might
> be a more specific problem
> 00:05 < arsenm> that rematerializable scalar loads patch is probably
> OK now though
> 00:07 < arsenm> https://reviews.llvm.org/D11621
>
> Marek
>
>
> On Wed, Aug 30, 2017 at 1:58 AM, Marek Olšák  wrote:
>> Interesting. It may be that glsl_to_tgsi uses copy propagation to fold
>> those CONST loads into operands, which puts them next to their uses in LLVM.
>>
>> I guess LLVM doesn't understand that s_buffer_load_dword loads from
>> immutable dereferenceable memory. It would benefit from mayLoad = 0 in
>> this case I think.
>>
>> Marek
>>
>> On Thu, Aug 24, 2017 at 11:48 AM, Timothy Arceri  
>> wrote:
>>>
>>>
>>> On 24/08/17 18:12, Nicolai Hähnle wrote:

 On 24.08.2017 09:45, Timothy Arceri wrote:
>
>
>
> On 22/08/17 22:14, Timothy Arceri wrote:
>>
>> I'm a little unsure what to do with this now. Below is my shader-db
>> results, the majority of negative changes are from Natural Selection
>> 2.
>>
>> I looked at some dumps of the worst Natural Selection 2 shaders and
>> it seems to just be scheduling differences causing the regressions.
>>
>> I tested with sisched but that just made things even worse.
>>
>> Obviously we should be aiming to improve the schedulare, but since
>> this regresses things and I have no evidence of it helping anything
>> it makes the case for adding it pretty weak.
>>
>> Thoughts??
>>
>> PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR  MaxWaves
>> 
>>   All affected57972.92 3.05 %5.04 %   -2.94
>>   ---
>>   Total  722870.28 %0.34 %0.33 %  -0.21 %
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
> As far as I can tell this is because after this chnage we end up with
> large sections of consecutive loads. Any thoughts on avoid this?


 Odd. Do you see the same change in TGSI?

 This is one of those things that ideally LLVM would be smart about, but
 unfortunately it isn't really.
>>>
>>>
>>> Yeah I assume it's very doable since SSA makes this stuff reasonably easy to
>>> deal with. However I'm not really sure where to begin, or how welcome a pass
>>> to do this sorting would be. We have a similar pass in nir for moving
>>> comparisons to where they are first used.
>>>
>>> The TGSI is introduces an extra temp to store the value of the LOAD, this is
>>> probably what triggers the difference in LLVM.
>>>
>>> eg.
>>>
>>>  LOAD TEMP[61], UBO[2], IMM[2].
>>>  LOAD TEMP[62], UBO[2], IMM[1].
>>>  LOAD TEMP[63], UBO[2], IMM[1].
>>>  LOAD TEMP[64], UBO[2], IMM[2].
>>>  DP4 TEMP[65].x, TEMP[60], TEMP[61]
>>>  DP4 TEMP[66].x, TEMP[60], TEMP[62]
>>>  MOV TEMP[65].y, TEMP[66].
>>>  DP4 TEMP[67].x, TEMP[60], TEMP[63]
>>>  MOV TEMP[65].z, TEMP[67].
>>>  DP4 TEMP[68].x, TEMP[60], TEMP[64]
>>>  MOV TEMP[69].w, TEMP[68].
>>>  MOV TEMP[69].xyz, TEMP[65].xyzx
>>>  LOAD TEMP[70], UBO[1], IMM[6].
>>>  LOAD TEMP[71], UBO[1], IMM[6].
>>>  DP4 TEMP[72].x, TEMP[69], TEMP[70]
>>>  DP4 TEMP[73].x, TEMP[69], TEMP[71]
>>>  LOAD TEMP[74], UBO[1], IMM[6].
>>>  LOAD TEMP[75], UBO[1], IMM[7].
>>>  LOAD TEMP[76], UBO[1], IMM[7].
>>>  LOAD TEMP[77], UBO[1], IMM[7].
>>>  DP4 TEMP[78].x, TEMP[69], TEMP[74]
>>>  DP4 TEMP[79].x, TEMP[69], TEMP[75]
>>>  MOV TEMP[78].y, TEMP[79].
>>>  DP4 TEMP[80].x, TEMP[69], TEMP[76]
>>>  MOV TEMP[78].z, TEMP[80].
>>>  DP4 TEMP[81].x, TEMP[69], TEMP[77]
>>>  MOV TEMP[78].w, TEMP[81].
>>>
>>> vs
>>>
>>>  DP4 TEMP[63].x, TEMP[62], CONST[2][0]
>>>  DP4 TEMP[64].x, TEMP[62], CONST[2][1]
>>>  MOV TEMP[63].y, TEMP[64].
>>>  DP4 TEMP[65].x, TEMP[62], CONST[2][2]
>>>  MOV TEMP[63].z, TEMP[65].xxx

Re: [Mesa-dev] [PATCH] llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR

2017-08-29 Thread Roland Scheidegger
Looks good to me, thanks.

Reviewed-by: Roland Scheidegger 

Albeit I'm not quite sure why it is never set here?
This bug looks similar in nature to the uninitialized fb (see
8bfe451ed30918244618608871423b2a72cf9767) and I thought that was
impossible to hit with gl state tracker...

Roland

Am 28.08.2017 um 22:20 schrieb Brian Paul:
> If llvmpipe_set_scissor_states() is never called, we still need to be sure
> that derived scissor/clip state is updated.  As of commit 743ad599a97d09b1
> that function might not be called.
> 
> Fixes regressed Piglit gl-1.0-scissor-offscreen -fbo -auto test.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101709
> ---
>  src/gallium/drivers/llvmpipe/lp_context.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/src/gallium/drivers/llvmpipe/lp_context.c 
> b/src/gallium/drivers/llvmpipe/lp_context.c
> index 599a9f1..613d60f 100644
> --- a/src/gallium/drivers/llvmpipe/lp_context.c
> +++ b/src/gallium/drivers/llvmpipe/lp_context.c
> @@ -227,6 +227,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void 
> *priv,
>  
> lp_reset_counters();
>  
> +   /* If llvmpipe_set_scissor_states() is never called, we still need to
> +* make sure that derived scissor state is computed.
> +* See https://bugs.freedesktop.org/show_bug.cgi?id=101709
> +*/
> +   llvmpipe->dirty |= LP_NEW_SCISSOR;
> +
> return &llvmpipe->pipe;
>  
>   fail:
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default

2017-08-29 Thread Marek Olšák
Related IRC discussion:

00:01 < mareko> arsenm: what are the chances I can convince you to
allow me to set mayLoad = 0 on s_buffer_load_dword? :) the instruction
always reads from read-only memory with Mesa
00:02 < mareko> apparently, readnone doesn't get through
00:02 < arsenm> mareko: you should get the same effect by having
invariant on the MMO
00:03 < mareko> arsenm: and how would I set invariant on SI.load.const?
00:04 < arsenm> mareko: we create MMOs for a few other intrinsics
already, it should be the same
00:05 < mareko> if only I had time to play with LLVM
00:05 < arsenm> mareko: it looks like that is already done so it might
be a more specific problem
00:05 < arsenm> that rematerializable scalar loads patch is probably
OK now though
00:07 < arsenm> https://reviews.llvm.org/D11621

Marek


On Wed, Aug 30, 2017 at 1:58 AM, Marek Olšák  wrote:
> Interesting. It may be that glsl_to_tgsi uses copy propagation to fold
> those CONST loads into operands, which puts them next to their uses in LLVM.
>
> I guess LLVM doesn't understand that s_buffer_load_dword loads from
> immutable dereferenceable memory. It would benefit from mayLoad = 0 in
> this case I think.
>
> Marek
>
> On Thu, Aug 24, 2017 at 11:48 AM, Timothy Arceri  
> wrote:
>>
>>
>> On 24/08/17 18:12, Nicolai Hähnle wrote:
>>>
>>> On 24.08.2017 09:45, Timothy Arceri wrote:



 On 22/08/17 22:14, Timothy Arceri wrote:
>
> I'm a little unsure what to do with this now. Below is my shader-db
> results, the majority of negative changes are from Natural Selection
> 2.
>
> I looked at some dumps of the worst Natural Selection 2 shaders and
> it seems to just be scheduling differences causing the regressions.
>
> I tested with sisched but that just made things even worse.
>
> Obviously we should be aiming to improve the schedulare, but since
> this regresses things and I have no evidence of it helping anything
> it makes the case for adding it pretty weak.
>
> Thoughts??
>
> PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR  MaxWaves
> 
>   All affected57972.92 3.05 %5.04 %   -2.94
>   ---
>   Total  722870.28 %0.34 %0.33 %  -0.21 %
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>


 As far as I can tell this is because after this chnage we end up with
 large sections of consecutive loads. Any thoughts on avoid this?
>>>
>>>
>>> Odd. Do you see the same change in TGSI?
>>>
>>> This is one of those things that ideally LLVM would be smart about, but
>>> unfortunately it isn't really.
>>
>>
>> Yeah I assume it's very doable since SSA makes this stuff reasonably easy to
>> deal with. However I'm not really sure where to begin, or how welcome a pass
>> to do this sorting would be. We have a similar pass in nir for moving
>> comparisons to where they are first used.
>>
>> The TGSI is introduces an extra temp to store the value of the LOAD, this is
>> probably what triggers the difference in LLVM.
>>
>> eg.
>>
>>  LOAD TEMP[61], UBO[2], IMM[2].
>>  LOAD TEMP[62], UBO[2], IMM[1].
>>  LOAD TEMP[63], UBO[2], IMM[1].
>>  LOAD TEMP[64], UBO[2], IMM[2].
>>  DP4 TEMP[65].x, TEMP[60], TEMP[61]
>>  DP4 TEMP[66].x, TEMP[60], TEMP[62]
>>  MOV TEMP[65].y, TEMP[66].
>>  DP4 TEMP[67].x, TEMP[60], TEMP[63]
>>  MOV TEMP[65].z, TEMP[67].
>>  DP4 TEMP[68].x, TEMP[60], TEMP[64]
>>  MOV TEMP[69].w, TEMP[68].
>>  MOV TEMP[69].xyz, TEMP[65].xyzx
>>  LOAD TEMP[70], UBO[1], IMM[6].
>>  LOAD TEMP[71], UBO[1], IMM[6].
>>  DP4 TEMP[72].x, TEMP[69], TEMP[70]
>>  DP4 TEMP[73].x, TEMP[69], TEMP[71]
>>  LOAD TEMP[74], UBO[1], IMM[6].
>>  LOAD TEMP[75], UBO[1], IMM[7].
>>  LOAD TEMP[76], UBO[1], IMM[7].
>>  LOAD TEMP[77], UBO[1], IMM[7].
>>  DP4 TEMP[78].x, TEMP[69], TEMP[74]
>>  DP4 TEMP[79].x, TEMP[69], TEMP[75]
>>  MOV TEMP[78].y, TEMP[79].
>>  DP4 TEMP[80].x, TEMP[69], TEMP[76]
>>  MOV TEMP[78].z, TEMP[80].
>>  DP4 TEMP[81].x, TEMP[69], TEMP[77]
>>  MOV TEMP[78].w, TEMP[81].
>>
>> vs
>>
>>  DP4 TEMP[63].x, TEMP[62], CONST[2][0]
>>  DP4 TEMP[64].x, TEMP[62], CONST[2][1]
>>  MOV TEMP[63].y, TEMP[64].
>>  DP4 TEMP[65].x, TEMP[62], CONST[2][2]
>>  MOV TEMP[63].z, TEMP[65].
>>  DP4 TEMP[66].x, TEMP[62], CONST[2][3]
>>  MOV TEMP[67].w, TEMP[66].
>>  MOV TEMP[67].xyz, TEMP[63].xyzx
>>  DP4 TEMP[68].x, TEMP[67], CONST[1][14]
>>  DP4 TEMP[69].x, TEMP[67], CONST[1][15]
>>  DP4 TEMP[70].x, TEMP[67], CONST[1][8]
>>  DP4 TEMP[71].x, TEMP[67], CONST[1][9]
>>  MOV TEMP[70].y, TEMP[71].
>>  DP4 TEMP[72].x, TEMP[67], CONST[1][10]
>>  MOV TEMP[70].z, TEMP[72].
>>  DP4 TEMP[73].

Re: [Mesa-dev] V2 radeonsi use STD430 packing of UBOs by default

2017-08-29 Thread Marek Olšák
Interesting. It may be that glsl_to_tgsi uses copy propagation to fold
those CONST loads into operands, which puts them next to their uses in LLVM.

I guess LLVM doesn't understand that s_buffer_load_dword loads from
immutable dereferenceable memory. It would benefit from mayLoad = 0 in
this case I think.

Marek

On Thu, Aug 24, 2017 at 11:48 AM, Timothy Arceri  wrote:
>
>
> On 24/08/17 18:12, Nicolai Hähnle wrote:
>>
>> On 24.08.2017 09:45, Timothy Arceri wrote:
>>>
>>>
>>>
>>> On 22/08/17 22:14, Timothy Arceri wrote:

 I'm a little unsure what to do with this now. Below is my shader-db
 results, the majority of negative changes are from Natural Selection
 2.

 I looked at some dumps of the worst Natural Selection 2 shaders and
 it seems to just be scheduling differences causing the regressions.

 I tested with sisched but that just made things even worse.

 Obviously we should be aiming to improve the schedulare, but since
 this regresses things and I have no evidence of it helping anything
 it makes the case for adding it pretty weak.

 Thoughts??

 PERCENTAGE DELTASShaders SGPRs VGPRs SpillSGPR  MaxWaves
 
   All affected57972.92 3.05 %5.04 %   -2.94
   ---
   Total  722870.28 %0.34 %0.33 %  -0.21 %

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 https://lists.freedesktop.org/mailman/listinfo/mesa-dev

>>>
>>>
>>> As far as I can tell this is because after this chnage we end up with
>>> large sections of consecutive loads. Any thoughts on avoid this?
>>
>>
>> Odd. Do you see the same change in TGSI?
>>
>> This is one of those things that ideally LLVM would be smart about, but
>> unfortunately it isn't really.
>
>
> Yeah I assume it's very doable since SSA makes this stuff reasonably easy to
> deal with. However I'm not really sure where to begin, or how welcome a pass
> to do this sorting would be. We have a similar pass in nir for moving
> comparisons to where they are first used.
>
> The TGSI is introduces an extra temp to store the value of the LOAD, this is
> probably what triggers the difference in LLVM.
>
> eg.
>
>  LOAD TEMP[61], UBO[2], IMM[2].
>  LOAD TEMP[62], UBO[2], IMM[1].
>  LOAD TEMP[63], UBO[2], IMM[1].
>  LOAD TEMP[64], UBO[2], IMM[2].
>  DP4 TEMP[65].x, TEMP[60], TEMP[61]
>  DP4 TEMP[66].x, TEMP[60], TEMP[62]
>  MOV TEMP[65].y, TEMP[66].
>  DP4 TEMP[67].x, TEMP[60], TEMP[63]
>  MOV TEMP[65].z, TEMP[67].
>  DP4 TEMP[68].x, TEMP[60], TEMP[64]
>  MOV TEMP[69].w, TEMP[68].
>  MOV TEMP[69].xyz, TEMP[65].xyzx
>  LOAD TEMP[70], UBO[1], IMM[6].
>  LOAD TEMP[71], UBO[1], IMM[6].
>  DP4 TEMP[72].x, TEMP[69], TEMP[70]
>  DP4 TEMP[73].x, TEMP[69], TEMP[71]
>  LOAD TEMP[74], UBO[1], IMM[6].
>  LOAD TEMP[75], UBO[1], IMM[7].
>  LOAD TEMP[76], UBO[1], IMM[7].
>  LOAD TEMP[77], UBO[1], IMM[7].
>  DP4 TEMP[78].x, TEMP[69], TEMP[74]
>  DP4 TEMP[79].x, TEMP[69], TEMP[75]
>  MOV TEMP[78].y, TEMP[79].
>  DP4 TEMP[80].x, TEMP[69], TEMP[76]
>  MOV TEMP[78].z, TEMP[80].
>  DP4 TEMP[81].x, TEMP[69], TEMP[77]
>  MOV TEMP[78].w, TEMP[81].
>
> vs
>
>  DP4 TEMP[63].x, TEMP[62], CONST[2][0]
>  DP4 TEMP[64].x, TEMP[62], CONST[2][1]
>  MOV TEMP[63].y, TEMP[64].
>  DP4 TEMP[65].x, TEMP[62], CONST[2][2]
>  MOV TEMP[63].z, TEMP[65].
>  DP4 TEMP[66].x, TEMP[62], CONST[2][3]
>  MOV TEMP[67].w, TEMP[66].
>  MOV TEMP[67].xyz, TEMP[63].xyzx
>  DP4 TEMP[68].x, TEMP[67], CONST[1][14]
>  DP4 TEMP[69].x, TEMP[67], CONST[1][15]
>  DP4 TEMP[70].x, TEMP[67], CONST[1][8]
>  DP4 TEMP[71].x, TEMP[67], CONST[1][9]
>  MOV TEMP[70].y, TEMP[71].
>  DP4 TEMP[72].x, TEMP[67], CONST[1][10]
>  MOV TEMP[70].z, TEMP[72].
>  DP4 TEMP[73].x, TEMP[67], CONST[1][11]
>  MOV TEMP[70].w, TEMP[73].
>  MOV TEMP[74].xyw, TEMP[70].xyxw
>
>>
>> Cheers,
>> Nicolai
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] radeonsi: make use of LOAD for UBOs

2017-08-29 Thread Timothy Arceri

On 30/08/17 09:39, Marek Olšák wrote:

Did you check shader-db for differences in code generation?


Yes, for more details and discussion please see the cover letter.

Thanks,
Tim



Marek

On Tue, Aug 22, 2017 at 2:14 PM, Timothy Arceri  wrote:

v2: always set can_speculate and allow_smem to true
---
  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 31 +++
  1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index f8c99ff7e7..83cd8cd938 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -66,32 +66,36 @@ static LLVMValueRef get_buffer_size(
   LLVMConstInt(ctx->i32, 0x3FFF, 0), "");

 size = LLVMBuildUDiv(builder, size, stride, "");
 }

 return size;
  }

  static LLVMValueRef
  shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
-const struct tgsi_full_src_register *reg)
+const struct tgsi_full_src_register *reg,
+bool ubo)
  {
 LLVMValueRef index;

 if (!reg->Register.Indirect) {
 index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
 } else {
 index = si_get_indirect_index(ctx, ®->Indirect,
   reg->Register.Index);
 }

-   return ctx->abi.load_ssbo(&ctx->abi, index, false);
+   if (ubo)
+   return ctx->abi.load_ubo(&ctx->abi, index);
+   else
+   return ctx->abi.load_ssbo(&ctx->abi, index, false);
  }

  static bool tgsi_is_array_sampler(unsigned target)
  {
 return target == TGSI_TEXTURE_1D_ARRAY ||
target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
target == TGSI_TEXTURE_2D_ARRAY ||
target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
@@ -356,26 +360,28 @@ static void load_fetch_args(
 struct lp_build_emit_data * emit_data)
  {
 struct si_shader_context *ctx = si_shader_context(bld_base);
 struct gallivm_state *gallivm = &ctx->gallivm;
 const struct tgsi_full_instruction * inst = emit_data->inst;
 unsigned target = inst->Memory.Texture;
 LLVMValueRef rsrc;

 emit_data->dst_type = ctx->v4f32;

-   if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
+   if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+  inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
 LLVMBuilderRef builder = gallivm->builder;
 LLVMValueRef offset;
 LLVMValueRef tmp;

-   rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
+   bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
+   rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);

 tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
 offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");

 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
offset, false, false);
 } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
 LLVMValueRef coords;

@@ -407,21 +413,21 @@ static unsigned get_load_intr_attribs(bool can_speculate)

  static unsigned get_store_intr_attribs(bool writeonly_memory)
  {
 return writeonly_memory && HAVE_LLVM >= 0x0400 ?
   LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
   LP_FUNC_ATTR_WRITEONLY;
  }

  static void load_emit_buffer(struct si_shader_context *ctx,
  struct lp_build_emit_data *emit_data,
-bool can_speculate)
+bool can_speculate, bool allow_smem)
  {
 const struct tgsi_full_instruction *inst = emit_data->inst;
 uint writemask = inst->Dst[0].Register.WriteMask;
 uint count = util_last_bit(writemask);
 LLVMValueRef *args = emit_data->args;

 /* Don't use SMEM for shader buffer loads, because LLVM doesn't
  * select SMEM for SI.load.const with a non-constant offset, and
  * constant offsets practically don't exist with shader buffers.
  *
@@ -434,21 +440,21 @@ static void load_emit_buffer(struct si_shader_context 
*ctx,
  *   After that, si_memory_barrier should invalidate sL1 for shader
  *   buffers.
  */

 assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */
 emit_data->output[emit_data->chan] =
 ac_build_buffer_load(&ctx->ac, args[0], count, NULL,
 

Re: [Mesa-dev] [PATCH 7/7] radeonsi: enable STD430 packing of UBOs by default

2017-08-29 Thread Marek Olšák
On Tue, Aug 22, 2017 at 2:14 PM, Timothy Arceri  wrote:
> Before this change we were defaulting to STD140 which is slightly
> less efficient at packing arrays.
> ---
>  src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
>  src/mesa/state_tracker/st_context.c| 3 +++
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 1d1db02c76..b41a6f5ce7 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -522,20 +522,21 @@ static int si_get_param(struct pipe_screen* pscreen, 
> enum pipe_cap param)
> case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
> case PIPE_CAP_DOUBLES:
> case PIPE_CAP_TGSI_TEX_TXF_LZ:
> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
> case PIPE_CAP_BINDLESS_TEXTURE:
> case PIPE_CAP_QUERY_TIMESTAMP:
> case PIPE_CAP_QUERY_TIME_ELAPSED:
> case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
> case PIPE_CAP_QUERY_SO_OVERFLOW:
> case PIPE_CAP_MEMOBJ:
> +   case PIPE_CAP_LOAD_CONSTBUF:
> return 1;
>
> case PIPE_CAP_INT64:
> case PIPE_CAP_INT64_DIVMOD:
> case PIPE_CAP_TGSI_CLOCK:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> return 1;
>
> case PIPE_CAP_TGSI_VOTE:
> @@ -611,21 +612,20 @@ static int si_get_param(struct pipe_screen* pscreen, 
> enum pipe_cap param)
> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
> case PIPE_CAP_VERTEXID_NOBASE:
> case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
> case PIPE_CAP_MAX_WINDOW_RECTANGLES:
> case PIPE_CAP_NATIVE_FENCE_FD:
> case PIPE_CAP_TGSI_FS_FBFETCH:
> case PIPE_CAP_TGSI_MUL_ZERO_WINS:
> case PIPE_CAP_UMA:
> case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
> case PIPE_CAP_POST_DEPTH_COVERAGE:
> -   case PIPE_CAP_LOAD_CONSTBUF:
> return 0;
>
> case PIPE_CAP_QUERY_BUFFER_OBJECT:
> return si_have_tgsi_compute(sscreen);
>
> case PIPE_CAP_DRAW_PARAMETERS:
> case PIPE_CAP_MULTI_DRAW_INDIRECT:
> case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
> return sscreen->has_draw_indirect_multi;
>
> diff --git a/src/mesa/state_tracker/st_context.c 
> b/src/mesa/state_tracker/st_context.c
> index ef2e73e741..bd990fc34a 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -367,20 +367,23 @@ st_create_context_priv( struct gl_context *ctx, struct 
> pipe_context *pipe,
>
> /* Need these flags:
>  */
> ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
>
> ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
>
> if (no_error)
>ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
>
> +   ctx->Const.UseSTD430AsDefaultPacking =
> +  screen->get_param(screen, PIPE_CAP_LOAD_CONSTBUF);
> +

This st/mesa change should be a separate patch. Also, ctx->Const is
typically initialized in st_extensions.c and CAPs are also checked
mostly there. If you split this patch into two, you can add my Rb to
both IF you initialize ctx->Const... in st_extensions.c.

Other than that, for patches 1-3, 5-6:

Reviewed-by: Marek Olšák 

I commented on patch 4.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] radeonsi: make use of LOAD for UBOs

2017-08-29 Thread Marek Olšák
Did you check shader-db for differences in code generation?

Marek

On Tue, Aug 22, 2017 at 2:14 PM, Timothy Arceri  wrote:
> v2: always set can_speculate and allow_smem to true
> ---
>  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 31 
> +++
>  1 file changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> index f8c99ff7e7..83cd8cd938 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
> @@ -66,32 +66,36 @@ static LLVMValueRef get_buffer_size(
>   LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
>
> size = LLVMBuildUDiv(builder, size, stride, "");
> }
>
> return size;
>  }
>
>  static LLVMValueRef
>  shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
> -const struct tgsi_full_src_register *reg)
> +const struct tgsi_full_src_register *reg,
> +bool ubo)
>  {
> LLVMValueRef index;
>
> if (!reg->Register.Indirect) {
> index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
> } else {
> index = si_get_indirect_index(ctx, ®->Indirect,
>   reg->Register.Index);
> }
>
> -   return ctx->abi.load_ssbo(&ctx->abi, index, false);
> +   if (ubo)
> +   return ctx->abi.load_ubo(&ctx->abi, index);
> +   else
> +   return ctx->abi.load_ssbo(&ctx->abi, index, false);
>  }
>
>  static bool tgsi_is_array_sampler(unsigned target)
>  {
> return target == TGSI_TEXTURE_1D_ARRAY ||
>target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
>target == TGSI_TEXTURE_2D_ARRAY ||
>target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
>target == TGSI_TEXTURE_CUBE_ARRAY ||
>target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
> @@ -356,26 +360,28 @@ static void load_fetch_args(
> struct lp_build_emit_data * emit_data)
>  {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> struct gallivm_state *gallivm = &ctx->gallivm;
> const struct tgsi_full_instruction * inst = emit_data->inst;
> unsigned target = inst->Memory.Texture;
> LLVMValueRef rsrc;
>
> emit_data->dst_type = ctx->v4f32;
>
> -   if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
> +   if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
> +  inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
> LLVMBuilderRef builder = gallivm->builder;
> LLVMValueRef offset;
> LLVMValueRef tmp;
>
> -   rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
> +   bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
> +   rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
>
> tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
> offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
>
> buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
>offset, false, false);
> } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
>tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
> LLVMValueRef coords;
>
> @@ -407,21 +413,21 @@ static unsigned get_load_intr_attribs(bool 
> can_speculate)
>
>  static unsigned get_store_intr_attribs(bool writeonly_memory)
>  {
> return writeonly_memory && HAVE_LLVM >= 0x0400 ?
>   LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
>   LP_FUNC_ATTR_WRITEONLY;
>  }
>
>  static void load_emit_buffer(struct si_shader_context *ctx,
>  struct lp_build_emit_data *emit_data,
> -bool can_speculate)
> +bool can_speculate, bool allow_smem)
>  {
> const struct tgsi_full_instruction *inst = emit_data->inst;
> uint writemask = inst->Dst[0].Register.WriteMask;
> uint count = util_last_bit(writemask);
> LLVMValueRef *args = emit_data->args;
>
> /* Don't use SMEM for shader buffer loads, because LLVM doesn't
>  * select SMEM for SI.load.const with a non-constant offset, and
>  * constant offsets practically don't exist with shader buffers.
>  *
> @@ -434,21 +440,21 @@ static void load_emit_buffer(struct si_shader_context 
> *ctx,
>  *   After that, si_memory_barrier should invalidate sL1 for 
> shader
>  *   buffers.
>  */
>
> assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */
> emit_data->output[emit_data->chan] =
> ac_build_buffer_load(&ctx->ac, args[0

Re: [Mesa-dev] [PATCH 5/8] glcpp: Avoid unnecessary call to strlen

2017-08-29 Thread Timothy Arceri

On 30/08/17 05:56, Thomas Helland wrote:

Length of the token was already calculated by flex and stored in yyleng,
no need to implicitly call strlen() via linear_strdup().
---
  src/compiler/glsl/glcpp/glcpp-lex.l | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
b/src/compiler/glsl/glcpp/glcpp-lex.l
index 381b97364a..93e5cb3845 100644
--- a/src/compiler/glsl/glcpp/glcpp-lex.l
+++ b/src/compiler/glsl/glcpp/glcpp-lex.l
@@ -101,7 +101,8 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
  #define RETURN_STRING_TOKEN(token)\
do {\
if (! parser->skipping) {\
-   yylval->str = linear_strdup(yyextra->linalloc, yytext); 
  \
+   yylval->str = linear_alloc_child(yyextra->linalloc, 
yyleng + 1);  \
+   memcpy(yylval->str, yytext, yyleng + 1);\
RETURN_TOKEN_NEVER_SKIP (token);\
}   \
} while(0)



Copy and pasted comments from the last time this was sent:

Seems reasonable. This pattern is also happening in glsl_lexer.ll maybe 
we should do this there also?


It might also make sense to add a comment here to say why we don't use 
linear_strdup(), so the optimisation doesn't get removed in future.


With that:

Reviewed-by: Timothy Arceri 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] intel/isl: Set MOCS based on view usage

2017-08-29 Thread Jason Ekstrand

On August 29, 2017 3:00:51 PM Kenneth Graunke  wrote:


On Tuesday, August 1, 2017 3:48:29 PM PDT Jason Ekstrand wrote:

This little series changes things around so that, instead of passing MOCS
values into ISL, ISL knows how to set them itself.  This allows us to
centralize some of the decisions about how MOCS gets set for surfaces and
hopefully, if we ever do anything crazy in the future, we can share it
between GL and Vulkan.  Unfortunately, surfaces are not the only places
where MOCS is used.  It also shows up in vertex buffers, index buffers, and
streamout buffers.  However those are always set to the platform equivalent
of I915_MOCS_CACHED (and that's not all that liable to change) so they're
not particularly interesting.

If people like this approach, I'd like to Cc it to stable for 17.2 because
it has the side-effect of making Vulkan MOCS a bit more sane.

Jason Ekstrand (5):
  intel/isl: Set MOCS based on usage for surface states
  intel/blorp: Delete the MOCS plumbing
  i965: Stop passing MOCS information into ISL
  anv: Stop passing MOCS information into ISL
  intel/isl: Get rid of the mocs fields in fill/emit_info

 src/intel/blorp/blorp.h  |  6 ---
 src/intel/blorp/blorp_genX_exec.h| 37 +++--
 src/intel/isl/isl.h  | 22 --
 src/intel/isl/isl_emit_depth_stencil.c   | 12 +++---
 src/intel/isl/isl_genX_mocs.h| 53 
 src/intel/isl/isl_surface_state.c|  9 ++--
 src/intel/vulkan/anv_blorp.c |  3 --
 src/intel/vulkan/anv_device.c|  1 -
 src/intel/vulkan/anv_image.c | 12 ++
 src/intel/vulkan/anv_private.h   |  2 -
 src/intel/vulkan/genX_cmd_buffer.c   | 13 ++
 src/intel/vulkan/genX_state.c|  3 --
 src/mesa/drivers/dri/i965/brw_blorp.c| 15 ---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 26 ++--
 14 files changed, 101 insertions(+), 113 deletions(-)
 create mode 100644 src/intel/isl/isl_genX_mocs.h




I guess this is okay.  I'm not crazy about it, mostly because the logic is
still spread out in a bunch of places.  If there was a vertex buffer usage
bit and we could handle that there, I think that would be nice, even if it
doesn't otherwise seem relevant to ISL.  *shrug*


Yeah, vertex buffers make this a bit awkward.  I wish there were a better 
way than scattering things around more.  I'm honestly not sure if this 
series improves the scattering or not.



Having ISL do this itself rather than passing in values makes sense.

Reviewed-by: Kenneth Graunke 



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Disable multilayer & multilevel DCC.

2017-08-29 Thread Bas Nieuwenhuizen
The current DCC init routine doesn't account for initializing a
single layer or level. Multilayer seems hard for small textures on
pre-GFX9 as tre metadata for the layers can be interleaved. For
GFX9 multilevel textures are a problem for similar reasons.

So just disable this for now, until we handle the texture modes
correctly.

Fixes: f4e499ec791 "radv: add initial non-conformant radv vulkan driver"
---
 src/amd/vulkan/radv_image.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index eb8ceef1db1..4b1bbefeecb 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -118,6 +118,7 @@ radv_init_surface(struct radv_device *device,
   VK_IMAGE_USAGE_STORAGE_BIT)) ||
(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
 (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
+pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
 device->physical_device->rad_info.chip_class < VI ||
 create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) 
||
 !radv_is_colorbuffer_format_supported(pCreateInfo->format, 
&blendable))
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] i965: Use BLORP for buffer object stall avoidance blits instead of BLT.

2017-08-29 Thread Jason Ekstrand

On August 29, 2017 2:29:25 PM Kenneth Graunke  wrote:


Improves performance of GFXBench4 tests at 1024x768 on a Kabylake GT2:
- Manhattan 3.1 by 1.32134% +/- 0.322734% (n=8).
- Car Chase by 1.25607% +/- 0.291262% (n=5).


Woohoo!  That's not bad.  Apart from my little niggle on patch 1, 
everything looks good to me.


Reviewed-by: Jason Ekstrand 


---
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c

index 01443c2b7fc..49e68bd7392 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -37,7 +37,7 @@
 #include "x86/common_x86_asm.h"

 #include "brw_context.h"
-#include "intel_blit.h"
+#include "brw_blorp.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
 #include "intel_tiled_memcpy.h"
@@ -294,9 +294,9 @@ brw_buffer_subdata(struct gl_context *ctx,

  brw_bo_subdata(temp_bo, 0, size, data);

- intel_emit_linear_blit(brw,
-intel_obj->buffer, offset,
+ brw_blorp_copy_buffers(brw,
 temp_bo, 0,
+intel_obj->buffer, offset,
 size);
  brw_emit_mi_flush(brw);

@@ -533,11 +533,11 @@ brw_flush_mapped_buffer_range(struct gl_context *ctx,
 * another blit of that area and the complete newer data will land the
 * second time.
 */
-   intel_emit_linear_blit(brw,
-  intel_obj->buffer,
-  obj->Mappings[index].Offset + offset,
+   brw_blorp_copy_buffers(brw,
   intel_obj->range_map_bo[index],
   intel_obj->map_extra[index] + offset,
+  intel_obj->buffer,
+  obj->Mappings[index].Offset + offset,
   length);
mark_buffer_gpu_usage(intel_obj,
  obj->Mappings[index].Offset + offset,
@@ -565,10 +565,10 @@ brw_unmap_buffer(struct gl_context *ctx,
   brw_bo_unmap(intel_obj->range_map_bo[index]);

   if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
- intel_emit_linear_blit(brw,
-intel_obj->buffer, 
obj->Mappings[index].Offset,

+ brw_blorp_copy_buffers(brw,
 intel_obj->range_map_bo[index],
 intel_obj->map_extra[index],
+intel_obj->buffer, 
obj->Mappings[index].Offset,

 obj->Mappings[index].Length);
  mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
obj->Mappings[index].Length);
@@ -646,9 +646,9 @@ brw_copy_buffer_subdata(struct gl_context *ctx,
dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size, true);
src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size, false);

-   intel_emit_linear_blit(brw,
-  dst_bo, write_offset,
-  src_bo, read_offset, size);
+   brw_blorp_copy_buffers(brw,
+  src_bo, read_offset,
+  dst_bo, write_offset, size);

/* Since we've emitted some blits to buffers that will (likely) be used
 * in rendering operations in other cache domains in this batch, emit a
--
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.

2017-08-29 Thread Jason Ekstrand

On August 29, 2017 3:14:08 PM Chris Wilson  wrote:


Quoting Kenneth Graunke (2017-08-29 22:28:38)

Anvil already had code to copy between two buffer objects in the most
efficient way possible, using large bpp copies, then smaller bpp copies.

This patch moves that logic into BLORP as blorp_buffer_copy(), so we
can reuse it in i965 as well.


Just some food for thought...

The render access pattern is terrible for wide linear buffers, as the
pixel block ends up being scattered across many pages, thrashing the TLB.
But if instead of telling it to copy as wide as possible, you just
limited it to a stride of 4096/4 (if memory serves 4 is the largest
pixel block height) then each pixel block is located inside just one
page (and hopefully cluster across the eu, further reducing the stride
may help if the spread ends up taller than wider).


Yes, some benchmarking would have to be done to truly substantiate Ken's 
claim.  I think you may be right that the access patterns aren't great.  If 
we hit the case where you do a one pixel tall blit, that's also not good.  
I think there is a lot of time that could be sink into making this more 
efficient.



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.

2017-08-29 Thread Jason Ekstrand

On August 29, 2017 2:29:26 PM Kenneth Graunke  wrote:


Anvil already had code to copy between two buffer objects in the most
efficient way possible, using large bpp copies, then smaller bpp copies.

This patch moves that logic into BLORP as blorp_buffer_copy(), so we
can reuse it in i965 as well.
---
 src/intel/blorp/blorp.h  |   6 ++
 src/intel/blorp/blorp_blit.c | 128 +++
 src/intel/vulkan/anv_blorp.c | 104 +++
 3 files changed, 141 insertions(+), 97 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 4d9a44b0926..951a97f5daa 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -132,6 +132,12 @@ blorp_copy(struct blorp_batch *batch,
uint32_t dst_x, uint32_t dst_y,
uint32_t src_width, uint32_t src_height);

+void
+blorp_buffer_copy(struct blorp_batch *batch,
+  void *src_bo, uint64_t src_offset,
+  void *dst_bo, uint64_t dst_offset,
+  uint64_t size);
+
 void
 blorp_fast_clear(struct blorp_batch *batch,
  const struct blorp_surf *surf, enum isl_format format,
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 35008cbbb0a..998fd9b6d39 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2513,3 +2513,131 @@ blorp_copy(struct blorp_batch *batch,

do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords);
 }
+
+static enum isl_format
+isl_format_for_size(unsigned size_B)
+{
+   switch (size_B) {
+   case 1:  return ISL_FORMAT_R8_UINT;
+   case 2:  return ISL_FORMAT_R8G8_UINT;
+   case 4:  return ISL_FORMAT_R8G8B8A8_UINT;
+   case 8:  return ISL_FORMAT_R16G16B16A16_UINT;
+   case 16: return ISL_FORMAT_R32G32B32A32_UINT;
+   default:
+  unreachable("Not a power-of-two format size");
+   }
+}
+
+/**
+ * Returns the greatest common divisor of a and b that is a power of two.
+ */
+static uint64_t
+gcd_pow2_u64(uint64_t a, uint64_t b)
+{
+   assert(a > 0 || b > 0);
+
+   unsigned a_log2 = ffsll(a) - 1;
+   unsigned b_log2 = ffsll(b) - 1;
+
+   /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
+* case, the MIN2() will take the other one.  If both are 0 then we will
+* hit the assert above.
+*/
+   return 1 << MIN2(a_log2, b_log2);
+}
+
+static void
+do_buffer_copy(struct blorp_batch *batch,
+   void *src_bo, uint64_t src_offset,
+   void *dst_bo, uint64_t dst_offset,
+   int width, int height, int block_size)
+{
+   /* The actual format we pick doesn't matter as blorp will throw it away.
+* The only thing that actually matters is the size.
+*/
+   enum isl_format format = isl_format_for_size(block_size);
+
+   UNUSED bool ok;
+   struct isl_surf surf;
+   ok = isl_surf_init(batch->blorp->isl_dev, &surf,
+  .dim = ISL_SURF_DIM_2D,
+  .format = format,
+  .width = width,
+  .height = height,
+  .depth = 1,
+  .levels = 1,
+  .array_len = 1,
+  .samples = 1,
+  .row_pitch = width * block_size,
+  .usage = ISL_SURF_USAGE_TEXTURE_BIT |
+   ISL_SURF_USAGE_RENDER_TARGET_BIT,
+  .tiling_flags = ISL_TILING_LINEAR_BIT);
+   assert(ok);
+
+   struct blorp_surf src_blorp_surf = {
+  .surf = &surf,
+  .addr = {
+ .buffer = src_bo,
+ .offset = src_offset,
+  },
+   };
+
+   struct blorp_surf dst_blorp_surf = {
+  .surf = &surf,
+  .addr = {
+ .buffer = dst_bo,
+ .offset = dst_offset,
+  },
+   };
+
+   blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
+  0, 0, 0, 0, width, height);
+}
+
+/* This is maximum possible width/height our HW can handle */
+#define MAX_SURFACE_DIM (1ull << 14)
+
+void
+blorp_buffer_copy(struct blorp_batch *batch,
+  void *src_bo, uint64_t src_offset,
+  void *dst_bo, uint64_t dst_offset,


I think I have a mild preference for making this take a pair of 
blorp_address structs instead of void pointers and offsets.  Of course, if 
that's a pain for some reason, this is fine.



+  uint64_t size)
+{
+   uint64_t copy_size = size;
+
+   /* First, we compute the biggest format that can be used with the
+* given offsets and size.
+*/
+   int bs = 16;
+   bs = gcd_pow2_u64(bs, src_offset);
+   bs = gcd_pow2_u64(bs, dst_offset);
+   bs = gcd_pow2_u64(bs, size);
+
+   /* First, we make a bunch of max-sized copies */
+   uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
+   while (copy_size >= max_copy_size) {
+  do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset,
+ MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
+  copy_size -= max_copy_size;
+  

[Mesa-dev] [PATCH 2/2] radeonsi: eliminate PS color outputs when colormask kills them

2017-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 4 
 src/gallium/drivers/radeonsi/si_state.h | 1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 1 +
 3 files changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 41b08f8..5ee8bb9 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -434,20 +434,22 @@ static void *si_create_blend_state_mode(struct 
pipe_context *ctx,
   S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
   S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
   S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
   S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
   S_028B70_ALPHA_TO_MASK_OFFSET3(2));
 
if (state->alpha_to_coverage)
blend->need_src_alpha_4bit |= 0xf;
 
blend->cb_target_mask = 0;
+   blend->cb_target_enabled_4bit = 0;
+
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
const int j = state->independent_blend_enable ? i : 0;
 
unsigned eqRGB = state->rt[j].rgb_func;
unsigned srcRGB = state->rt[j].rgb_src_factor;
unsigned dstRGB = state->rt[j].rgb_dst_factor;
unsigned eqA = state->rt[j].alpha_func;
unsigned srcA = state->rt[j].alpha_src_factor;
unsigned dstA = state->rt[j].alpha_dst_factor;
@@ -475,20 +477,22 @@ static void *si_create_blend_state_mode(struct 
pipe_context *ctx,
if (blend->dual_src_blend &&
(eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
assert(!"Unsupported equation for dual source 
blending");
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, 
blend_cntl);
continue;
}
 
/* cb_render_state will disable unused ones */
blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 
* i);
+   if (state->rt[j].colormask)
+   blend->cb_target_enabled_4bit |= 0xf << (4 * i);
 
if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, 
blend_cntl);
continue;
}
 
/* Blending optimizations for RB+.
 * These transformations don't change the behavior.
 *
 * First, get rid of DST in the blend factors:
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 26c7b4c..7b7d96c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -48,20 +48,21 @@ struct si_shader_selector;
 
 struct si_state_blend {
struct si_pm4_state pm4;
uint32_tcb_target_mask;
boolalpha_to_coverage;
boolalpha_to_one;
booldual_src_blend;
/* Set 0xf or 0x0 (4 bits) per render target if the following is
 * true. ANDed with spi_shader_col_format.
 */
+   unsignedcb_target_enabled_4bit;
unsignedblend_enable_4bit;
unsignedneed_src_alpha_4bit;
 };
 
 struct si_state_rasterizer {
struct si_pm4_state pm4;
/* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */
struct si_pm4_state *pm4_poly_offset;
unsignedpa_sc_line_stipple;
unsignedpa_cl_clip_cntl;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 71d7987..061b3d2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1348,20 +1348,21 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
 */
key->part.ps.epilog.spi_shader_col_format =
(blend->blend_enable_4bit & 
blend->need_src_alpha_4bit &
 
sctx->framebuffer.spi_shader_col_format_blend_alpha) |
(blend->blend_enable_4bit & 
~blend->need_src_alpha_4bit &
 sctx->framebuffer.spi_shader_col_format_blend) 
|
(~blend->blend_enable_4bit & 
blend->need_src_alpha_4bit &
 sctx->framebuffer.spi_shader_col_format_alpha) 
|
(~blend->blend_enable_4bit & 
~blend->need_src_alpha_4bit &
 sctx->

[Mesa-dev] [PATCH 1/2] gallium/radeon: sort DBG shader flags according to pipe_shader_type

2017-08-29 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/r600/r600_pipe.c  |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c | 20 ++--
 src/gallium/drivers/radeon/r600_pipe_common.h | 25 -
 src/gallium/drivers/radeonsi/si_pipe.c|  5 ++---
 4 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 023f1b4..9844add 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -659,21 +659,21 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws,
if (rscreen->b.info.chip_class >= EVERGREEN) {
rscreen->b.b.is_format_supported = 
evergreen_is_format_supported;
} else {
rscreen->b.b.is_format_supported = r600_is_format_supported;
}
 
rscreen->b.debug_flags |= debug_get_flags_option("R600_DEBUG", 
r600_debug_options, 0);
if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE))
rscreen->b.debug_flags |= DBG_COMPUTE;
if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE))
-   rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | 
DBG_CS | DBG_TCS | DBG_TES;
+   rscreen->b.debug_flags |= DBG_ALL_SHADERS | DBG_FS;
if (!debug_get_bool_option("R600_HYPERZ", TRUE))
rscreen->b.debug_flags |= DBG_NO_HYPERZ;
 
if (rscreen->b.family == CHIP_UNKNOWN) {
fprintf(stderr, "r600: Unknown chipset 0x%04X\n", 
rscreen->b.info.pci_id);
FREE(rscreen);
return NULL;
}
 
/* Figure out streamout kernel support. */
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 7c12565..6b61901 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -892,22 +892,21 @@ static const char *r600_get_family_name(const struct 
r600_common_screen *rscreen
case CHIP_STONEY: return "AMD STONEY";
case CHIP_VEGA10: return "AMD VEGA10";
case CHIP_RAVEN: return "AMD RAVEN";
default: return "AMD unknown";
}
 }
 
 static void r600_disk_cache_create(struct r600_common_screen *rscreen)
 {
/* Don't use the cache if shader dumping is enabled. */
-   if (rscreen->debug_flags &
-   (DBG_FS | DBG_VS | DBG_TCS | DBG_TES | DBG_GS | DBG_PS | DBG_CS))
+   if (rscreen->debug_flags & DBG_ALL_SHADERS)
return;
 
uint32_t mesa_timestamp;
if (disk_cache_get_function_timestamp(r600_disk_cache_create,
  &mesa_timestamp)) {
char *timestamp_str;
int res = -1;
if (rscreen->chip_class < SI) {
res = asprintf(×tamp_str, "%u",mesa_timestamp);
}
@@ -1528,36 +1527,21 @@ void r600_destroy_common_screen(struct 
r600_common_screen *rscreen)
slab_destroy_parent(&rscreen->pool_transfers);
 
disk_cache_destroy(rscreen->disk_shader_cache);
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
 }
 
 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
  unsigned processor)
 {
-   switch (processor) {
-   case PIPE_SHADER_VERTEX:
-   return (rscreen->debug_flags & DBG_VS) != 0;
-   case PIPE_SHADER_TESS_CTRL:
-   return (rscreen->debug_flags & DBG_TCS) != 0;
-   case PIPE_SHADER_TESS_EVAL:
-   return (rscreen->debug_flags & DBG_TES) != 0;
-   case PIPE_SHADER_GEOMETRY:
-   return (rscreen->debug_flags & DBG_GS) != 0;
-   case PIPE_SHADER_FRAGMENT:
-   return (rscreen->debug_flags & DBG_PS) != 0;
-   case PIPE_SHADER_COMPUTE:
-   return (rscreen->debug_flags & DBG_CS) != 0;
-   default:
-   return false;
-   }
+   return rscreen->debug_flags && (1 << processor);
 }
 
 bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned 
processor)
 {
return (rscreen->debug_flags & DBG_CHECK_IR) ||
   r600_can_dump_shader(rscreen, processor);
 }
 
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct 
pipe_resource *dst,
  uint64_t offset, uint64_t size, unsigned value)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 59886ec..b0cd502 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -62,34 +62,33 @@ struct u_log_context;
 #define R600_CONTEXT_START_PIPELINE_STATS  (1u << 1)
 #define R600_CONTEXT_STOP_PIPELINE_STATS   (1u << 2)
 #define R600_CONTEXT_PRIVATE_FLAG  (1u << 3)
 
 /* special primitive types */
 #define R600_PRIM_RECTANGLE_LIST   PIPE_PRIM_MAX
 
 

Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type

2017-08-29 Thread Marek Olšák
I'd like CONSTBUF to stay. I think CONST is already too overloaded.

If we start treating CONST[0] and CONST[0][0] differently, things are
just going to get more ugly. If the second dimension is unspecified,
it's implied to be 0. TGSI doesn't allow specifying the second
dimension index and omit the first dimenstion, which means it can't
say that CONST[0] is constbuf 0 without changing existing conventions.

Marek


On Wed, Aug 23, 2017 at 5:15 PM, Roland Scheidegger  wrote:
> Am 23.08.2017 um 17:03 schrieb Nicolai Hähnle:
>> On 23.08.2017 16:03, Roland Scheidegger wrote:
>>> Am 23.08.2017 um 15:49 schrieb Ilia Mirkin:
 On Wed, Aug 23, 2017 at 9:20 AM, Nicolai Hähnle 
 wrote:
> On 22.08.2017 16:56, Ilia Mirkin wrote:
>>
>> On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger
>> 
>> wrote:
>>>
>>> I am probably missing something here, but why do you need a new
>>> register
>>> file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before,
>>> can't
>>> you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same
>>> thing?
>>> Or do you need to know how it's going to be accessed in advance?
>>
>>
>> With bindless, LOAD can take a CONST I believe [which contains the
>> value of the bindless id]. I think it's nice to keep those concepts
>> separate... having CONST sometimes mean the value and other times mean
>> the address is a bit weird. This way CONSTBUF[0] is the address of the
>> 0th constbuf.
>
>
> I'm still not quite convinced. The levels of indirection should
> clarify the
> meaning, shouldn't they?
>
> You get
>
>LOAD dst, CONST[0][0], IMM[0]
>
> when loading from offset IMM[0] of a bindless buffer whose handle is
> at the
> beginning of the buffer CONST[0].
>
> You get
>
>LOAD dst, CONST[0], IMM[0]
>
> when loading from offset IMM[0] of non-bindless buffer 0.
>
> Is there ever really a situation where the two could be confused?

 I always considered CONST[0] == CONST[0][0]. Technically they're not,
 since once has the second dimension in the TGSI encoding while the
 other doesn't. But practically,

 MOV TEMP[0], CONST[0]

 and

 MOV TEMP[0], CONST[0][0]

 are in every way identical. Currently st/mesa will just use CONST[0]
 everywhere, never adding the 2nd dimension.
>>> Maybe it would be worth the effort to fix this?
>>
>> Would be nice. One thing that makes this a bit awkward is that older
>> drivers just don't support two-dimensional CONST at all -- see
>> PIPE_SHADER_CAP_MAX_CONST_BUFFERS. Giving them a shader that loads
>> CONST[0][n] is going to fail.
> I suppose it wouldn't be too difficult to make them just accept this
> (basically ignoring the buffer index).
> But anyway, I don't know if it's worth the hassle, I just brought it up
> because if it's a problem going forward, it should be possible to change
> it. (Albeit we definitely have code relying on these 1d constants too...)
>
> Roland
>
>
>>
>> Basically, changing this is a backward-compatible change to state
>> trackers, which would have to promise not to produce one-dimensional
>> CONST for the usual, vec4-based constant fetching.
>>
>> On the other hand, maybe we're over-complicating this. The only
>> instruction that is really affected is LOAD. And for LOAD, there
>> shouldn't be a compatibility problem. Hmm...
>>
>> Cheers,
>> Nicolai
>>
>>>
>>> Roland
>>>
>>>
>>>   As such, I don't think we
 should start having behavioural differences for those on some
 instructions.

>>>
>>
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.

2017-08-29 Thread Chris Wilson
Quoting Kenneth Graunke (2017-08-29 22:28:38)
> Anvil already had code to copy between two buffer objects in the most
> efficient way possible, using large bpp copies, then smaller bpp copies.
> 
> This patch moves that logic into BLORP as blorp_buffer_copy(), so we
> can reuse it in i965 as well.

Just some food for thought...

The render access pattern is terrible for wide linear buffers, as the
pixel block ends up being scattered across many pages, thrashing the TLB.
But if instead of telling it to copy as wide as possible, you just
limited it to a stride of 4096/4 (if memory serves 4 is the largest
pixel block height) then each pixel block is located inside just one
page (and hopefully cluster across the eu, further reducing the stride
may help if the spread ends up taller than wider).
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] intel/isl: Set MOCS based on usage for surface states

2017-08-29 Thread Kenneth Graunke
On Tuesday, August 1, 2017 3:48:30 PM PDT Jason Ekstrand wrote:
> This makes ISL now ignore the MOCS data provided by the caller and just
> set it based on surface usage.
> ---
>  src/intel/isl/isl_emit_depth_stencil.c | 12 
>  src/intel/isl/isl_genX_mocs.h  | 53 
> ++
>  src/intel/isl/isl_surface_state.c  |  9 +++---
>  3 files changed, 65 insertions(+), 9 deletions(-)
>  create mode 100644 src/intel/isl/isl_genX_mocs.h
> 
> diff --git a/src/intel/isl/isl_emit_depth_stencil.c 
> b/src/intel/isl/isl_emit_depth_stencil.c
> index 0d541fd..212ed88 100644
> --- a/src/intel/isl/isl_emit_depth_stencil.c
> +++ b/src/intel/isl/isl_emit_depth_stencil.c
> @@ -23,6 +23,9 @@
>  
>  #include 
>  
> +#include "isl_priv.h"
> +#include "isl_genX_mocs.h"
> +
>  #define __gen_address_type uint64_t
>  #define __gen_user_data void
>  
> @@ -35,8 +38,6 @@ __gen_combine_address(void *data, void *loc, uint64_t addr, 
> uint32_t delta)
>  #include "genxml/gen_macros.h"
>  #include "genxml/genX_pack.h"
>  
> -#include "isl_priv.h"
> -
>  #define __PASTE2(x, y) x ## y
>  #define __PASTE(x, y) __PASTE2(x, y)
>  #define isl_genX(x) __PASTE(isl_, genX(x))
> @@ -96,7 +97,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device 
> *dev, void *batch,
>  #endif
>db.SurfaceBaseAddress = info->depth_address;
>  #if GEN_GEN >= 6
> -  db.DepthBufferMOCS = info->mocs;
> +  db.DepthBufferMOCS = get_mocs_for_usage(ISL_SURF_USAGE_DEPTH_BIT);
>  #endif
>  
>  #if GEN_GEN <= 6
> @@ -140,7 +141,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct 
> isl_device *dev, void *batch,
>  #endif
>sb.SurfaceBaseAddress = info->stencil_address;
>  #if GEN_GEN >= 6
> -  sb.StencilBufferMOCS = info->mocs;
> +  sb.StencilBufferMOCS = get_mocs_for_usage(ISL_SURF_USAGE_STENCIL_BIT);
>  #endif
>sb.SurfacePitch = info->stencil_surf->row_pitch - 1;
>  #if GEN_GEN >= 8
> @@ -163,7 +164,8 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct 
> isl_device *dev, void *batch,
>db.HierarchicalDepthBufferEnable = true;
>  
>hiz.SurfaceBaseAddress = info->hiz_address;
> -  hiz.HierarchicalDepthBufferMOCS = info->mocs;
> +  hiz.HierarchicalDepthBufferMOCS =
> + get_mocs_for_usage(ISL_SURF_USAGE_HIZ_BIT);
>hiz.SurfacePitch = info->hiz_surf->row_pitch - 1;
>  #if GEN_GEN >= 8
>/* From the SKL PRM Vol2a:
> diff --git a/src/intel/isl/isl_genX_mocs.h b/src/intel/isl/isl_genX_mocs.h
> new file mode 100644
> index 000..158ebec
> --- /dev/null
> +++ b/src/intel/isl/isl_genX_mocs.h
> @@ -0,0 +1,53 @@
> +/*
> + * Copyright 2017 Intel Corporation
> + *
> + *  Permission is hereby granted, free of charge, to any person obtaining a
> + *  copy of this software and associated documentation files (the 
> "Software"),
> + *  to deal in the Software without restriction, including without limitation
> + *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + *  and/or sell copies of the Software, and to permit persons to whom the
> + *  Software is furnished to do so, subject to the following conditions:
> + *
> + *  The above copyright notice and this permission notice (including the next
> + *  paragraph) shall be included in all copies or substantial portions of the
> + *  Software.
> + *
> + *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
> OR
> + *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
> OTHER
> + *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + *  IN THE SOFTWARE.
> + */
> +
> +#include 
> +
> +#include "isl_priv.h"
> +
> +#include "genxml/gen_macros.h"
> +
> +#if GEN_GEN >= 6
> +static uint32_t
> +get_mocs_for_usage(isl_surf_usage_flags_t usage)
> +{
> +#if GEN_GEN == 6
> +   return 0; /* PTE */
> +#elif GEN_GEN == 7
> +   return 1; /* Cache in L3$, LLC and eLLC use PTE */

Using a semi-colon would be clearer:

/* Cache in L3; LLC and eLLC use PTE */

otherwise it might be possible to read it as "Cache in L3, LLC, and eLLC ..."

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] intel/isl: Set MOCS based on view usage

2017-08-29 Thread Kenneth Graunke
On Tuesday, August 1, 2017 3:48:29 PM PDT Jason Ekstrand wrote:
> This little series changes things around so that, instead of passing MOCS
> values into ISL, ISL knows how to set them itself.  This allows us to
> centralize some of the decisions about how MOCS gets set for surfaces and
> hopefully, if we ever do anything crazy in the future, we can share it
> between GL and Vulkan.  Unfortunately, surfaces are not the only places
> where MOCS is used.  It also shows up in vertex buffers, index buffers, and
> streamout buffers.  However those are always set to the platform equivalent
> of I915_MOCS_CACHED (and that's not all that liable to change) so they're
> not particularly interesting.
> 
> If people like this approach, I'd like to Cc it to stable for 17.2 because
> it has the side-effect of making Vulkan MOCS a bit more sane.
> 
> Jason Ekstrand (5):
>   intel/isl: Set MOCS based on usage for surface states
>   intel/blorp: Delete the MOCS plumbing
>   i965: Stop passing MOCS information into ISL
>   anv: Stop passing MOCS information into ISL
>   intel/isl: Get rid of the mocs fields in fill/emit_info
> 
>  src/intel/blorp/blorp.h  |  6 ---
>  src/intel/blorp/blorp_genX_exec.h| 37 +++--
>  src/intel/isl/isl.h  | 22 --
>  src/intel/isl/isl_emit_depth_stencil.c   | 12 +++---
>  src/intel/isl/isl_genX_mocs.h| 53 
> 
>  src/intel/isl/isl_surface_state.c|  9 ++--
>  src/intel/vulkan/anv_blorp.c |  3 --
>  src/intel/vulkan/anv_device.c|  1 -
>  src/intel/vulkan/anv_image.c | 12 ++
>  src/intel/vulkan/anv_private.h   |  2 -
>  src/intel/vulkan/genX_cmd_buffer.c   | 13 ++
>  src/intel/vulkan/genX_state.c|  3 --
>  src/mesa/drivers/dri/i965/brw_blorp.c| 15 ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 26 ++--
>  14 files changed, 101 insertions(+), 113 deletions(-)
>  create mode 100644 src/intel/isl/isl_genX_mocs.h
> 
> 

I guess this is okay.  I'm not crazy about it, mostly because the logic is
still spread out in a bunch of places.  If there was a vertex buffer usage
bit and we could handle that there, I think that would be nice, even if it
doesn't otherwise seem relevant to ISL.  *shrug*

Having ISL do this itself rather than passing in values makes sense.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() helper.

2017-08-29 Thread Kenneth Graunke
Anvil already had code to copy between two buffer objects in the most
efficient way possible, using large bpp copies, then smaller bpp copies.

This patch moves that logic into BLORP as blorp_buffer_copy(), so we
can reuse it in i965 as well.
---
 src/intel/blorp/blorp.h  |   6 ++
 src/intel/blorp/blorp_blit.c | 128 +++
 src/intel/vulkan/anv_blorp.c | 104 +++
 3 files changed, 141 insertions(+), 97 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 4d9a44b0926..951a97f5daa 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -132,6 +132,12 @@ blorp_copy(struct blorp_batch *batch,
uint32_t dst_x, uint32_t dst_y,
uint32_t src_width, uint32_t src_height);
 
+void
+blorp_buffer_copy(struct blorp_batch *batch,
+  void *src_bo, uint64_t src_offset,
+  void *dst_bo, uint64_t dst_offset,
+  uint64_t size);
+
 void
 blorp_fast_clear(struct blorp_batch *batch,
  const struct blorp_surf *surf, enum isl_format format,
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 35008cbbb0a..998fd9b6d39 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2513,3 +2513,131 @@ blorp_copy(struct blorp_batch *batch,
 
do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords);
 }
+
+static enum isl_format
+isl_format_for_size(unsigned size_B)
+{
+   switch (size_B) {
+   case 1:  return ISL_FORMAT_R8_UINT;
+   case 2:  return ISL_FORMAT_R8G8_UINT;
+   case 4:  return ISL_FORMAT_R8G8B8A8_UINT;
+   case 8:  return ISL_FORMAT_R16G16B16A16_UINT;
+   case 16: return ISL_FORMAT_R32G32B32A32_UINT;
+   default:
+  unreachable("Not a power-of-two format size");
+   }
+}
+
+/**
+ * Returns the greatest common divisor of a and b that is a power of two.
+ */
+static uint64_t
+gcd_pow2_u64(uint64_t a, uint64_t b)
+{
+   assert(a > 0 || b > 0);
+
+   unsigned a_log2 = ffsll(a) - 1;
+   unsigned b_log2 = ffsll(b) - 1;
+
+   /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
+* case, the MIN2() will take the other one.  If both are 0 then we will
+* hit the assert above.
+*/
+   return 1 << MIN2(a_log2, b_log2);
+}
+
+static void
+do_buffer_copy(struct blorp_batch *batch,
+   void *src_bo, uint64_t src_offset,
+   void *dst_bo, uint64_t dst_offset,
+   int width, int height, int block_size)
+{
+   /* The actual format we pick doesn't matter as blorp will throw it away.
+* The only thing that actually matters is the size.
+*/
+   enum isl_format format = isl_format_for_size(block_size);
+
+   UNUSED bool ok;
+   struct isl_surf surf;
+   ok = isl_surf_init(batch->blorp->isl_dev, &surf,
+  .dim = ISL_SURF_DIM_2D,
+  .format = format,
+  .width = width,
+  .height = height,
+  .depth = 1,
+  .levels = 1,
+  .array_len = 1,
+  .samples = 1,
+  .row_pitch = width * block_size,
+  .usage = ISL_SURF_USAGE_TEXTURE_BIT |
+   ISL_SURF_USAGE_RENDER_TARGET_BIT,
+  .tiling_flags = ISL_TILING_LINEAR_BIT);
+   assert(ok);
+
+   struct blorp_surf src_blorp_surf = {
+  .surf = &surf,
+  .addr = {
+ .buffer = src_bo,
+ .offset = src_offset,
+  },
+   };
+
+   struct blorp_surf dst_blorp_surf = {
+  .surf = &surf,
+  .addr = {
+ .buffer = dst_bo,
+ .offset = dst_offset,
+  },
+   };
+
+   blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0,
+  0, 0, 0, 0, width, height);
+}
+
+/* This is maximum possible width/height our HW can handle */
+#define MAX_SURFACE_DIM (1ull << 14)
+
+void
+blorp_buffer_copy(struct blorp_batch *batch,
+  void *src_bo, uint64_t src_offset,
+  void *dst_bo, uint64_t dst_offset,
+  uint64_t size)
+{
+   uint64_t copy_size = size;
+
+   /* First, we compute the biggest format that can be used with the
+* given offsets and size.
+*/
+   int bs = 16;
+   bs = gcd_pow2_u64(bs, src_offset);
+   bs = gcd_pow2_u64(bs, dst_offset);
+   bs = gcd_pow2_u64(bs, size);
+
+   /* First, we make a bunch of max-sized copies */
+   uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
+   while (copy_size >= max_copy_size) {
+  do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset,
+ MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
+  copy_size -= max_copy_size;
+  src_offset += max_copy_size;
+  dst_offset += max_copy_size;
+   }
+
+   /* Now make a max-width copy */
+   uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
+   assert(height < MAX_SURFACE_DIM);
+   if (height != 0) {
+  uint64_t re

[Mesa-dev] [PATCH 4/6] i965: Add PIPE_CONTRTOL_DATA_CACHE flush to brw_emit_mi_flush().

2017-08-29 Thread Kenneth Graunke
Although we're phasing out brw_emit_mi_flush(), we still use it in some
places in order to "flush everything".  In a number of those places, we
write data to a buffer that we may then bind as an image surface, SSBO,
or atomic buffer.  Those usages require us to flush the data cache.
---
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 2a84fb8864e..53b00564b21 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -435,6 +435,7 @@ brw_emit_mi_flush(struct brw_context *brw)
   if (brw->gen >= 6) {
  flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
   PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+  PIPE_CONTROL_DATA_CACHE_FLUSH |
   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   PIPE_CONTROL_VF_CACHE_INVALIDATE |
   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] i965: Always flush caches after blitting to a GL buffer object.

2017-08-29 Thread Kenneth Graunke
When we blit data into a buffer object, we may need to invalidate any
caches that might contain stale data, so the new data becomes visible.

For example, if the buffer object is bound as a vertex buffer, we need
to invalidate the vertex fetch cache.

While this flushing was missing, it usually happened implicitly for
non-obvious reasons: we're usually on the render ring, and calling
intel_emit_linear_blit() would require switching to the BLT ring,
causing an implicit flush.  This likely provoked the kernel to do
PIPE_CONTROLs on our behalf.  Although, Gen4-5 wouldn't have this
behavior.  At any rate, we should do it ourselves.
---
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index dd6fca12d0b..01443c2b7fc 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -298,6 +298,7 @@ brw_buffer_subdata(struct gl_context *ctx,
 intel_obj->buffer, offset,
 temp_bo, 0,
 size);
+ brw_emit_mi_flush(brw);
 
  brw_bo_unreference(temp_bo);
  mark_buffer_valid_data(intel_obj, offset, size);
@@ -541,6 +542,7 @@ brw_flush_mapped_buffer_range(struct gl_context *ctx,
mark_buffer_gpu_usage(intel_obj,
  obj->Mappings[index].Offset + offset,
  length);
+   brw_emit_mi_flush(brw);
 }
 
 
@@ -570,6 +572,7 @@ brw_unmap_buffer(struct gl_context *ctx,
 obj->Mappings[index].Length);
  mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
obj->Mappings[index].Length);
+ brw_emit_mi_flush(brw);
   }
 
   /* Since we've emitted some blits to buffers that will (likely) be used
@@ -577,7 +580,6 @@ brw_unmap_buffer(struct gl_context *ctx,
* flush.  Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
-  brw_emit_mi_flush(brw);
 
   brw_bo_unreference(intel_obj->range_map_bo[index]);
   intel_obj->range_map_bo[index] = NULL;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] blorp: Make blorp_buffer_copy work on Gen4-6.

2017-08-29 Thread Kenneth Graunke
Gen4-6 can only handle surfaces up to 8192.  Only Gen7+ can do 16384.
---
 src/intel/blorp/blorp_blit.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 998fd9b6d39..d3b51a18161 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2594,17 +2594,18 @@ do_buffer_copy(struct blorp_batch *batch,
   0, 0, 0, 0, width, height);
 }
 
-/* This is maximum possible width/height our HW can handle */
-#define MAX_SURFACE_DIM (1ull << 14)
-
 void
 blorp_buffer_copy(struct blorp_batch *batch,
   void *src_bo, uint64_t src_offset,
   void *dst_bo, uint64_t dst_offset,
   uint64_t size)
 {
+   const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
uint64_t copy_size = size;
 
+   /* This is maximum possible width/height our HW can handle */
+   uint64_t max_surface_dim = 1 << (devinfo->gen >= 7 ? 14 : 13);
+
/* First, we compute the biggest format that can be used with the
 * given offsets and size.
 */
@@ -2614,22 +2615,22 @@ blorp_buffer_copy(struct blorp_batch *batch,
bs = gcd_pow2_u64(bs, size);
 
/* First, we make a bunch of max-sized copies */
-   uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
+   uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs;
while (copy_size >= max_copy_size) {
   do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset,
- MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs);
+ max_surface_dim, max_surface_dim, bs);
   copy_size -= max_copy_size;
   src_offset += max_copy_size;
   dst_offset += max_copy_size;
}
 
/* Now make a max-width copy */
-   uint64_t height = copy_size / (MAX_SURFACE_DIM * bs);
-   assert(height < MAX_SURFACE_DIM);
+   uint64_t height = copy_size / (max_surface_dim * bs);
+   assert(height < max_surface_dim);
if (height != 0) {
-  uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs;
+  uint64_t rect_copy_size = height * max_surface_dim * bs;
   do_buffer_copy(batch, src_bo, src_offset, dst_bo, dst_offset,
- MAX_SURFACE_DIM, height, bs);
+ max_surface_dim, height, bs);
   copy_size -= rect_copy_size;
   src_offset += rect_copy_size;
   dst_offset += rect_copy_size;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] i965: Use BLORP for buffer object stall avoidance blits instead of BLT.

2017-08-29 Thread Kenneth Graunke
Improves performance of GFXBench4 tests at 1024x768 on a Kabylake GT2:
- Manhattan 3.1 by 1.32134% +/- 0.322734% (n=8).
- Car Chase by 1.25607% +/- 0.291262% (n=5).
---
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 01443c2b7fc..49e68bd7392 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -37,7 +37,7 @@
 #include "x86/common_x86_asm.h"
 
 #include "brw_context.h"
-#include "intel_blit.h"
+#include "brw_blorp.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
 #include "intel_tiled_memcpy.h"
@@ -294,9 +294,9 @@ brw_buffer_subdata(struct gl_context *ctx,
 
  brw_bo_subdata(temp_bo, 0, size, data);
 
- intel_emit_linear_blit(brw,
-intel_obj->buffer, offset,
+ brw_blorp_copy_buffers(brw,
 temp_bo, 0,
+intel_obj->buffer, offset,
 size);
  brw_emit_mi_flush(brw);
 
@@ -533,11 +533,11 @@ brw_flush_mapped_buffer_range(struct gl_context *ctx,
 * another blit of that area and the complete newer data will land the
 * second time.
 */
-   intel_emit_linear_blit(brw,
-  intel_obj->buffer,
-  obj->Mappings[index].Offset + offset,
+   brw_blorp_copy_buffers(brw,
   intel_obj->range_map_bo[index],
   intel_obj->map_extra[index] + offset,
+  intel_obj->buffer,
+  obj->Mappings[index].Offset + offset,
   length);
mark_buffer_gpu_usage(intel_obj,
  obj->Mappings[index].Offset + offset,
@@ -565,10 +565,10 @@ brw_unmap_buffer(struct gl_context *ctx,
   brw_bo_unmap(intel_obj->range_map_bo[index]);
 
   if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
- intel_emit_linear_blit(brw,
-intel_obj->buffer, obj->Mappings[index].Offset,
+ brw_blorp_copy_buffers(brw,
 intel_obj->range_map_bo[index],
 intel_obj->map_extra[index],
+intel_obj->buffer, obj->Mappings[index].Offset,
 obj->Mappings[index].Length);
  mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
obj->Mappings[index].Length);
@@ -646,9 +646,9 @@ brw_copy_buffer_subdata(struct gl_context *ctx,
dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size, true);
src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size, false);
 
-   intel_emit_linear_blit(brw,
-  dst_bo, write_offset,
-  src_bo, read_offset, size);
+   brw_blorp_copy_buffers(brw,
+  src_bo, read_offset,
+  dst_bo, write_offset, size);
 
/* Since we've emitted some blits to buffers that will (likely) be used
 * in rendering operations in other cache domains in this batch, emit a
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] i965: Add a brw_blorp_copy_buffers() command.

2017-08-29 Thread Kenneth Graunke
This exposes the new blorp_copy_buffer() functionality to i965.
It should be a drop-in replacement for intel_emit_linear_blit()
(other than the arguments being backwards, for consistency with BLORP).
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 18 ++
 src/mesa/drivers/dri/i965/brw_blorp.h |  8 
 2 files changed, 26 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 3ea396d726e..3d8203619f5 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -428,6 +428,24 @@ brw_blorp_copy_miptrees(struct brw_context *brw,
   dst_aux_usage);
 }
 
+void
+brw_blorp_copy_buffers(struct brw_context *brw,
+   struct brw_bo *src_bo,
+   unsigned src_offset,
+   struct brw_bo *dst_bo,
+   unsigned dst_offset,
+   unsigned size)
+{
+   DBG("%s %d bytes from %p[%d] to %p[%d]",
+   __func__, size, src_bo, src_offset, dst_bo, dst_offset);
+
+   struct blorp_batch batch;
+   blorp_batch_init(&brw->blorp, &batch, brw, 0);
+   blorp_buffer_copy(&batch, src_bo, src_offset, dst_bo, dst_offset, size);
+   blorp_batch_finish(&batch);
+}
+
+
 static struct intel_mipmap_tree *
 find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index c65a68a53d3..cf781ec53cb 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -59,6 +59,14 @@ brw_blorp_copy_miptrees(struct brw_context *brw,
 unsigned dst_x, unsigned dst_y,
 unsigned src_width, unsigned src_height);
 
+void
+brw_blorp_copy_buffers(struct brw_context *brw,
+   struct brw_bo *src_bo,
+   unsigned src_offset,
+   struct brw_bo *dst_bo,
+   unsigned dst_offset,
+   unsigned size);
+
 void
 brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
   GLbitfield mask, bool partial_clear, bool encode_srgb);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: don't assert on empty hash table

2017-08-29 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Sun, Aug 27, 2017, at 23:44, Grazvydas Ignotas wrote:
> Currently if table_size is 0, it's falling through to:
> 
> unreachable("hash table should never be full");
> 
> But table_size can be 0 when RADV_DEBUG=nocache is set, or when the
> table allocation fails (which is not considered an error).
> 
> Fixes: f4e499ec791 "radv: add initial non-conformant radv vulkan driver"
> Signed-off-by: Grazvydas Ignotas 
> ---
>  src/amd/vulkan/radv_pipeline_cache.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/amd/vulkan/radv_pipeline_cache.c
> b/src/amd/vulkan/radv_pipeline_cache.c
> index 99a614d..beed35b 100644
> --- a/src/amd/vulkan/radv_pipeline_cache.c
> +++ b/src/amd/vulkan/radv_pipeline_cache.c
> @@ -116,10 +116,13 @@ radv_pipeline_cache_search_unlocked(struct
> radv_pipeline_cache *cache,
>   const unsigned char *sha1)
>  {
>   const uint32_t mask = cache->table_size - 1;
>   const uint32_t start = (*(uint32_t *) sha1);
>  
> +   if (cache->table_size == 0)
> +   return NULL;
> +
>   for (uint32_t i = 0; i < cache->table_size; i++) {
>   const uint32_t index = (start + i) & mask;
>   struct cache_entry *entry = cache->hash_table[index];
>  
>   if (!entry)
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] winsys/amdgpu: add BO to the global list only when RADEON_ALL_BOS is set

2017-08-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Aug 29, 2017 at 4:24 PM, Samuel Pitoiset
 wrote:
> Only useful when that debug option is enabled.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 20 
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c |  4 +---
>  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  3 +++
>  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  1 +
>  4 files changed, 17 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> index 97bbe235a4..1323be8356 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> @@ -167,10 +167,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
>
> assert(bo->bo && "must not be called for slab entries");
>
> -   mtx_lock(&bo->ws->global_bo_list_lock);
> -   LIST_DEL(&bo->u.real.global_list_item);
> -   bo->ws->num_buffers--;
> -   mtx_unlock(&bo->ws->global_bo_list_lock);
> +   if (bo->ws->debug_all_bos) {
> +  mtx_lock(&bo->ws->global_bo_list_lock);
> +  LIST_DEL(&bo->u.real.global_list_item);
> +  bo->ws->num_buffers--;
> +  mtx_unlock(&bo->ws->global_bo_list_lock);
> +   }
>
> amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
> amdgpu_va_range_free(bo->u.real.va_handle);
> @@ -360,10 +362,12 @@ static void amdgpu_add_buffer_to_global_list(struct 
> amdgpu_winsys_bo *bo)
>
> assert(bo->bo);
>
> -   mtx_lock(&ws->global_bo_list_lock);
> -   LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
> -   ws->num_buffers++;
> -   mtx_unlock(&ws->global_bo_list_lock);
> +   if (ws->debug_all_bos) {
> +  mtx_lock(&ws->global_bo_list_lock);
> +  LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
> +  ws->num_buffers++;
> +  mtx_unlock(&ws->global_bo_list_lock);
> +   }
>  }
>
>  static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index 9cadfc4298..5ddde8e794 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -903,8 +903,6 @@ static unsigned amdgpu_cs_get_buffer_list(struct 
> radeon_winsys_cs *rcs,
>  return cs->num_real_buffers;
>  }
>
> -DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
> -
>  static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs,
>  struct amdgpu_cs_buffer *buffer)
>  {
> @@ -1097,7 +1095,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
> /* Create the buffer list.
>  * Use a buffer list containing all allocated buffers if requested.
>  */
> -   if (debug_get_option_all_bos()) {
> +   if (ws->debug_all_bos) {
>struct amdgpu_winsys_bo *bo;
>amdgpu_bo_handle *handles;
>unsigned num = 0;
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> index a8b5e5b41a..5e0c1fd8d8 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> @@ -50,6 +50,8 @@
>  static struct util_hash_table *dev_tab = NULL;
>  static mtx_t dev_tab_mutex = _MTX_INITIALIZER_NP;
>
> +DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
> +
>  /* Helper function to do the ioctls needed for setup and init. */
>  static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
>  {
> @@ -70,6 +72,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
> }
>
> ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != 
> NULL;
> +   ws->debug_all_bos = debug_get_option_all_bos();
>
> return true;
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> index 7aca612f45..de54470ede 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> @@ -79,6 +79,7 @@ struct amdgpu_winsys {
> ADDR_HANDLE addrlib;
>
> bool check_vm;
> +   bool debug_all_bos;
>
> /* List of all allocated buffers */
> mtx_t global_bo_list_lock;
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] winsys/amdgpu: set AMDGPU_GEM_CREATE_LOCAL if possible

2017-08-29 Thread Marek Olšák
On Tue, Aug 29, 2017 at 4:47 PM, Christian König
 wrote:
> From: Christian König 
>
> When the kernel supports it set the local flag and
> stop adding those BOs to the BO list.
>
> Can probably be optimized much more.
>
> Signed-off-by: Christian König 
> ---
>  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c |  8 
>  src/gallium/winsys/amdgpu/drm/amdgpu_bo.h |  2 ++
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 22 +-
>  3 files changed, 27 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> index 08348ed..1238d58 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> @@ -38,6 +38,10 @@
>  #include 
>  #include 
>
> +#ifndef AMDGPU_GEM_CREATE_LOCAL
> +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6)
> +#endif
> +
>  /* Set to 1 for verbose output showing committed sparse buffer ranges. */
>  #define DEBUG_SPARSE_COMMITS 0
>
> @@ -402,6 +406,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
> amdgpu_winsys *ws,
>request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
> if (flags & RADEON_FLAG_GTT_WC)
>request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> +   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
> +   ws->info.drm_minor >= 20)
> +  request.flags |= AMDGPU_GEM_CREATE_LOCAL;
>
> r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
> if (r) {
> @@ -435,6 +442,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
> amdgpu_winsys *ws,
> bo->u.real.va_handle = va_handle;
> bo->initial_domain = initial_domain;
> bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
> +   bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_LOCAL);

I think that "bool" doesn't need "!!" and automatically converts the
value to true/false, but I'm not 100% sure. Anyway, the series is:

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: update dirty_level_mask before dispatching

2017-08-29 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Aug 29, 2017 at 5:37 PM, Samuel Pitoiset
 wrote:
> This fixes a rendering issue with Hitman when bindless textures
> are enabled.
>
> Fixes: 2263610827 ("radeonsi: flush DB caches only when transitioning from DB 
> to texturing")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
>  src/gallium/drivers/radeonsi/si_compute.c | 5 +
>  2 files changed, 6 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 59886e..d76d4a1384 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -569,6 +569,7 @@ struct r600_common_context {
> unsignedgpu_reset_counter;
> unsignedlast_dirty_tex_counter;
> unsignedlast_compressed_colortex_counter;
> +   unsignedlast_num_draw_calls;
>
> struct threaded_context *tc;
> struct u_suballocator   *allocator_zeroed_memory;
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 3ebd22c3c1..ca334949d7 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -782,6 +782,11 @@ static void si_launch_grid(
> program->shader.compilation_failed)
> return;
>
> +   if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) {
> +   si_update_fb_dirtiness_after_rendering(sctx);
> +   sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
> +   }
> +
> si_decompress_compute_textures(sctx);
>
> /* Add buffer sizes for memory checking in need_cs_space. */
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2 1/2] gallivm: correct channel shift logic on big endian

2017-08-29 Thread Roland Scheidegger
Am 29.08.2017 um 18:51 schrieb Emil Velikov:
> On 23 August 2017 at 21:32, Ben Crocker  wrote:
>> From: Ray Strode 
>>
>> lp_build_fetch_rgba_soa fetches a texel from a texture.
>> Part of that process involves first gathering the element
>> together from memory into a packed format, and then breaking
>> out the individual color channels into separate, parallel
>> arrays.
>>
>> The code fails to account for endianess when reading the packed
>> values.
>>
>> This commit attempts to correct the problem by reversing the order
>> the packed values are read on big endian systems.
>>
>> Bugzilla: 
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D100613&d=DwIBaQ&c=uilaK90D4TOVoH58JNXRgQ&r=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0&m=-owfKrLkZG_-D07ad7TcBmuZ0jASKVGMIE-e-E9DLr0&s=zMHob1rOLWVOOgtwdGeaQ_qRwgTRdsKq5wrlSVRyK9o&e=
>>  
>> Cc: "17.2" "17.1" 
>>
>> ---
>>  src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++-
>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c 
>> b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
>> index 98eb694..22c19b1 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
>> @@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
>>   for (i = 0; i < format_desc->nr_channels; i++) {
>>  struct util_format_channel_description chan_desc = 
>> format_desc->channel[i];
>>  unsigned blockbits = type.width;
>> -unsigned vec_nr = chan_desc.shift / type.width;
>> +unsigned vec_nr;
>> +
>> +#ifdef PIPE_ARCH_BIG_ENDIAN
>> +vec_nr = (format_desc->block.bits - (chan_desc.shift + 
>> chan_desc.size)) / type.width;
>> +#else
>> +vec_nr = chan_desc.shift / type.width;
>> +#endif
>>  chan_desc.shift %= type.width;
>>
> Roland, I think you're one of the more knowledgeable people for things 
> gallivm.
> Can you please review this patch?

Well I'm the one who regularly breaks the BE stuff :-).
That said, I already reviewed this:
https://lists.freedesktop.org/archives/mesa-dev/2017-August/167515.html

Roland


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Chema Casanova


On 29/08/17 21:18, Francisco Jerez wrote:
> Chema Casanova  writes:
> 
>> El 25/08/17 a las 20:09, Francisco Jerez escribió:
>>> Alejandro Piñeiro  writes:
>>>
 Although it is possible to emit them directly as AND/OR on brw_fs_nir,
 having specific opcodes makes it easier to remove duplicate settings
 later.

 Signed-off-by:  Alejandro Piñeiro 
 Signed-off-by:  Jose Maria Casanova Crespo 
 ---
  src/intel/compiler/brw_eu.h |  3 +++
  src/intel/compiler/brw_eu_defines.h |  9 +
  src/intel/compiler/brw_eu_emit.c| 19 +++
  src/intel/compiler/brw_fs_generator.cpp |  8 
  src/intel/compiler/brw_shader.cpp   |  5 +
  5 files changed, 44 insertions(+)

 diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
 index a3a9c63239d..0a7f8020398 100644
 --- a/src/intel/compiler/brw_eu.h
 +++ b/src/intel/compiler/brw_eu.h
 @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
struct brw_reg src,
struct brw_reg idx);
  
 +void
 +brw_rounding_mode(struct brw_codegen *p,
 +  enum brw_rnd_mode mode);
  /***
   * brw_eu_util.c:
   */
 diff --git a/src/intel/compiler/brw_eu_defines.h 
 b/src/intel/compiler/brw_eu_defines.h
 index 1af835d47ed..50435df2fcf 100644
 --- a/src/intel/compiler/brw_eu_defines.h
 +++ b/src/intel/compiler/brw_eu_defines.h
 @@ -388,6 +388,9 @@ enum opcode {
 SHADER_OPCODE_TYPED_SURFACE_WRITE,
 SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
  
 +   SHADER_OPCODE_RND_MODE_RTE,
 +   SHADER_OPCODE_RND_MODE_RTZ,
 +
>>> We don't need an opcode for each possible rounding mode (there's also RU
>>> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
>>> taking an immediate with the right rounding mode?
>> I like the proposal. It is better having a unique opcode for setting the
>> rounding mode. Changed for v2 of this patch.
>>> Also, you should be marking the rounding mode opcodes as
>>> has_side_effects(), because otherwise you're giving the scheduler the
>>> freedom of moving your rounding mode update instruction past the
>>> instruction you wanted it to have an effect on...
>> Well pointed, we already realized that it was missing while debugging
>> the latency problem of the control register. It was hiding the problem
>> re-scheduling the cr0 modification to the beginning of the shader.
 SHADER_OPCODE_MEMORY_FENCE,
  
 SHADER_OPCODE_GEN4_SCRATCH_READ,
 @@ -1233,4 +1236,10 @@ enum brw_message_target {
  /* R0 */
  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT27
  
 +enum PACKED brw_rnd_mode {
 +   BRW_RND_MODE_UNSPECIFIED,
 +   BRW_RND_MODE_RTE,
 +   BRW_RND_MODE_RTZ,
>>> Since you're introducing a back-end-specific rounding mode enum already,
>>> why not use the hardware values right away so you avoid hard-coding
>>> magic constants below.
>> At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
>> include Round Up and Down in the enum assignation based using the PRM
>> values. Also renamed RTE for RTNE to maintain coherence with nir
>> conversion modifiers using the same acronym as PRM.
>>
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
>> +   BRW_RND_MODE_RU = 1,/* Round Up, toward +inf */
>> +   BRW_RND_MODE_RD = 2,/* Round Down, toward -inf */
>> +   BRW_RND_MODE_RTZ = 3/* Round Toward Zero */
>> +};
>>
>> I have a doubt about how to avoid the magic constants to close the v2 of
>> this patch. One approach would be using the same code structure and
>> taking advantage using the codification of rounding field. This way we
>> just formula and expect that the C compiler optimizer to guess that the
>> immediate value is a constant.
>>
> 
> Hm, I'm not particularly worried about the C++ compiler behavior, it's
> most likely immaterial.
> 
>> switch (mode) {
>> case BRW_RND_MODE_RTZ:
>> -  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0x0030u));
>> +  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4)));
>>break;
>> -   case BRW_RND_MODE_RTE:
>> -  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0xffcfu));
>> +   case BRW_RND_MODE_RTNE:
>> +  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
>>break;
>> default:
>>
> 
> I don't think this is particularly advantageous since you still need to
> special-case every possible rounding mode.
> 
>> Another approach could be to implement a general solution for all
>> supported rounding modes by the hw including Round Up and Down using
>> bitwise operations.
>>
>> /**
>>  * Changes the 

[Mesa-dev] [PATCH 8/8] fix test makefile

2017-08-29 Thread Thomas Helland
---
 src/util/tests/string_buffer/Makefile.am | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/util/tests/string_buffer/Makefile.am 
b/src/util/tests/string_buffer/Makefile.am
index 60039a90d2..fdf285fc5d 100644
--- a/src/util/tests/string_buffer/Makefile.am
+++ b/src/util/tests/string_buffer/Makefile.am
@@ -23,12 +23,16 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/util \
$(DEFINES)
 
-LDADD = \
+string_buffer_test_SOURCES = \
+   append_and_print.cpp
+
+string_buffer_test_CFLAGS = \
+   -I$(top_srcdir)/include \
+   -I$(top_srcdir)/src/util \
+   $(DEFINES) \
+   $(PTHREAD_CFLAGS)
+
+string_buffer_test_LDADD = \
$(top_builddir)/src/util/libmesautil.la \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS)
-
-TESTS = append_and_print \
-   $()
-
-check_PROGRAMS = $(TESTS)
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] glcpp: Use string_buffer for line continuation removal

2017-08-29 Thread Thomas Helland
Migrate removal of line continuations to string_buffer. Before this
it used ralloc_strncat() to append strings, which internally
each time calculates strlen() of its argument. Its argument is
entire shader, so it multiple time scans the whole shader text.

Signed-off-by: Vladislav Egorov 

V2: Adapt to different API of string buffer (Thomas Helland)
---
 src/compiler/glsl/glcpp/pp.c | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c
index 40e0f033d2..c3827e439f 100644
--- a/src/compiler/glsl/glcpp/pp.c
+++ b/src/compiler/glsl/glcpp/pp.c
@@ -97,17 +97,25 @@ skip_newline (const char *str)
return ret;
 }
 
+/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected
+ * to minimize total amount of allocated memory during shader-db run.
+ */
+#define INITIAL_PP_OUTPUT_BUF_SIZE 4048
+
 /* Remove any line continuation characters in the shader, (whether in
  * preprocessing directives or in GLSL code).
  */
 static char *
 remove_line_continuations(glcpp_parser_t *ctx, const char *shader)
 {
-   char *clean = ralloc_strdup(ctx, "");
+   struct _mesa_string_buffer *sb =
+   _mesa_string_buffer_create(ctx, INITIAL_PP_OUTPUT_BUF_SIZE);
+
const char *backslash, *newline, *search_start;
 const char *cr, *lf;
 char newline_separator[3];
int collapsed_newlines = 0;
+   int separator_len;
 
backslash = strchr(shader, '\\');
 
@@ -153,6 +161,7 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
newline_separator[0] = '\n';
newline_separator[1] = '\r';
}
+   separator_len = strlen(newline_separator);
 
while (true) {
/* If we have previously collapsed any line-continuations,
@@ -172,10 +181,12 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
if (newline &&
(backslash == NULL || newline < backslash))
{
-   ralloc_strncat(&clean, shader,
-  newline - shader + 1);
+   _mesa_string_buffer_append_len(sb, shader,
+  newline - shader 
+ 1);
while (collapsed_newlines) {
-   ralloc_strcat(&clean, 
newline_separator);
+   _mesa_string_buffer_append_len(sb,
+  
newline_separator,
+  
separator_len);
collapsed_newlines--;
}
shader = skip_newline (newline);
@@ -196,7 +207,7 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
if (backslash[1] == '\r' || backslash[1] == '\n')
{
collapsed_newlines++;
-   ralloc_strncat(&clean, shader, backslash - shader);
+   _mesa_string_buffer_append_len(sb, shader, backslash - 
shader);
shader = skip_newline (backslash + 1);
search_start = shader;
}
@@ -204,9 +215,9 @@ remove_line_continuations(glcpp_parser_t *ctx, const char 
*shader)
backslash = strchr(search_start, '\\');
}
 
-   ralloc_strcat(&clean, shader);
+   _mesa_string_buffer_append(sb, shader);
 
-   return clean;
+   return sb->buf;
 }
 
 int
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] glcpp: Use Bloom filter before identifier search

2017-08-29 Thread Thomas Helland
From: Vladislav Egorov 

Absolute majority of identifiers in shaders are not macro references.
Many shaders don't use preprocessing macros at all. Almost all
queries to parser->defines hash-table will be unsuccessful. Note
that all predefined macros start either with "GL_" or with "__".
Moreover, even user-defined macros are usually use upper case, while
ordinary identifiers use lower case. It's easy to detect macros just
looking on the first two characters of their name. So it makes sense
to make a quick identifier lookup in a Bloom filter before making
relatively expensive hash-table search call.

Many schemes of Bloom are possible, but it seems that two mini 64 bit
Bloom tables -- one for the 1st identifier's char, one for the second
identifier's char -- with one trivial hash-function (6 lowest bits of
the char) work very well. Considering that Latin-1 alphabetic chars
and _ occupy positions from 65 to 122, every character will have its
bit position in 64-bit word, distinguishing lower and upper case.

Some games do things like "#define float4 float", this scheme would
not help here, it barely improves heavy users of preprocessor like
Talos Principle, but in general it works pretty well. In my shader-db
it eliminates 90%+ hash table queries during preprocessing stage.
One 64-bit Bloom table working on only 1st or only 2nd char will
eliminate ~70-80% queries.

For simplicity use the same scheme for both 32-bit and 64-bit archs.
32-bit archs still have pretty efficients ways to do 64-bit shifts.
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 24 ++--
 src/compiler/glsl/glcpp/glcpp.h   | 10 ++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index debf12bef8..19a6df2015 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -1359,6 +1359,11 @@ glcpp_parser_create(const struct gl_extensions 
*extension_list,
glcpp_lex_init_extra (parser, &parser->scanner);
parser->defines = _mesa_hash_table_create(NULL, _mesa_key_hash_string,
  _mesa_key_string_equal);
+   parser->defines_bloom0 = 0;
+   parser->defines_bloom1 = 0;
+   GLCPP_DEFINES_BLOOM_PUT(parser, "__LINE__");
+   GLCPP_DEFINES_BLOOM_PUT(parser, "__FILE__");
+
parser->linalloc = linear_alloc_parent(parser, 0);
parser->active = NULL;
parser->lexing_directive = 0;
@@ -1845,6 +1850,9 @@ _glcpp_parser_expand_node(glcpp_parser_t *parser, 
token_node_t *node,
*last = node;
identifier = token->value.str;
 
+   if (!GLCPP_DEFINES_BLOOM_GET(parser, identifier))
+  return NULL;
+
/* Special handling for __LINE__ and __FILE__, (not through
 * the hash table). */
if (*identifier == '_') {
@@ -2131,6 +2139,7 @@ _define_object_macro(glcpp_parser_t *parser, YYLTYPE *loc,
   glcpp_error (loc, parser, "Redefinition of macro %s\n",  identifier);
}
 
+   GLCPP_DEFINES_BLOOM_PUT(parser, identifier);
_mesa_hash_table_insert (parser->defines, identifier, macro);
 }
 
@@ -2166,6 +2175,7 @@ _define_function_macro(glcpp_parser_t *parser, YYLTYPE 
*loc,
   glcpp_error (loc, parser, "Redefinition of macro %s\n", identifier);
}
 
+   GLCPP_DEFINES_BLOOM_PUT(parser, identifier);
_mesa_hash_table_insert(parser->defines, identifier, macro);
 }
 
@@ -2212,12 +,14 @@ glcpp_parser_lex(YYSTYPE *yylval, YYLTYPE *yylloc, 
glcpp_parser_t *parser)
ret == ENDIF || ret == HASH_TOKEN) {
  parser->in_control_line = 1;
   } else if (ret == IDENTIFIER) {
- struct hash_entry *entry = _mesa_hash_table_search(parser->defines,
-yylval->str);
- macro_t *macro = entry ? entry->data : NULL;
- if (macro && macro->is_function) {
-parser->newline_as_space = 1;
-parser->paren_count = 0;
+ if (GLCPP_DEFINES_BLOOM_GET(parser, yylval->str)) {
+struct hash_entry *entry = _mesa_hash_table_search(parser->defines,
+   yylval->str);
+macro_t *macro = entry ? entry->data : NULL;
+if (macro && macro->is_function) {
+   parser->newline_as_space = 1;
+   parser->paren_count = 0;
+}
  }
   }
 
diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h
index 9d997cd924..b2f6f7a2a3 100644
--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -37,6 +37,14 @@
 
 #define yyscan_t void*
 
+#define GLCPP_DEFINES_BLOOM_GET(parser, identifier) \
+   (parser)->defines_bloom0) >> ((identifier)[0] & 63)) & 1) & \
+parser)->defines_bloom1) >> ((identifier)[1] & 63)) & 1))
+
+#define GLCPP_DEFINES_BLOOM_PUT(parser, identifier) \
+   (parser)->defines_bloom0 |= (uint64_t)1 << ((identifier)[0] & 63); \
+   (parser)->defin

[Mesa-dev] [PATCH 2/8] util: Add tests for the string buffer

2017-08-29 Thread Thomas Helland
More tests could probably be added, but this should cover
concatenation, resizing, clearing, formatted printing,
and checking the length, so it should be quite complete.

V2: Address review feedback from Timothy, plus fixes
   - Use a large enough char array
   - Actually test the formatted appending
   - Test that clear function resets string length
---
 configure.ac|  2 +
 src/util/Makefile.am|  3 +-
 src/util/tests/string_buffer/Makefile.am| 34 +
 src/util/tests/string_buffer/append_and_print.c | 99 +
 4 files changed, 137 insertions(+), 1 deletion(-)
 create mode 100644 src/util/tests/string_buffer/Makefile.am
 create mode 100644 src/util/tests/string_buffer/append_and_print.c

diff --git a/configure.ac b/configure.ac
index 53d52f6d52..a0d7a67956 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2933,9 +2933,11 @@ AC_CONFIG_FILES([Makefile
  src/mesa/main/tests/Makefile
  src/util/Makefile
  src/util/tests/hash_table/Makefile
+ src/util/tests/string_buffer/Makefile
  src/util/xmlpool/Makefile
  src/vulkan/Makefile])
 
+
 AC_OUTPUT
 
 # Fix up dependencies in *.Plo files, where we changed the extension of a
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index 4512dc99d5..7000202a36 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -19,7 +19,8 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = xmlpool . tests/hash_table
+SUBDIRS = . tests/hash_table \
+  . tests/string_buffer
 
 include Makefile.sources
 
diff --git a/src/util/tests/string_buffer/Makefile.am 
b/src/util/tests/string_buffer/Makefile.am
new file mode 100644
index 00..60039a90d2
--- /dev/null
+++ b/src/util/tests/string_buffer/Makefile.am
@@ -0,0 +1,34 @@
+# Copyright © 2009 Intel Corporation
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  on the rights to use, copy, modify, merge, publish, distribute, sub
+#  license, and/or sell copies of the Software, and to permit persons to whom
+#  the Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice (including the next
+#  paragraph) shall be included in all copies or substantial portions of the
+#  Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+#  ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+#  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+#  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+AM_CPPFLAGS = \
+   -I$(top_srcdir)/include \
+   -I$(top_srcdir)/src/util \
+   $(DEFINES)
+
+LDADD = \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(PTHREAD_LIBS) \
+   $(DLOPEN_LIBS)
+
+TESTS = append_and_print \
+   $()
+
+check_PROGRAMS = $(TESTS)
diff --git a/src/util/tests/string_buffer/append_and_print.c 
b/src/util/tests/string_buffer/append_and_print.c
new file mode 100644
index 00..71b857892f
--- /dev/null
+++ b/src/util/tests/string_buffer/append_and_print.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "string_buffer.h"
+
+int
+main(int argc, char **argv)
+{
+   (void) argc;
+   (void) argv;
+
+   struct _mesa_string_buffer *buf;

[Mesa-dev] [PATCH 3/8] glsl: Change the parser to use the string buffer

2017-08-29 Thread Thomas Helland
V2: Pointed out by Timothy
   - Fix pp.c reralloc size issue and comment

V3 - Use vprintf instead of printf where we should
   - Fixes failing make-check tests
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 195 ++
 src/compiler/glsl/glcpp/glcpp.h   |   8 +-
 src/compiler/glsl/glcpp/pp.c  |  39 +++
 3 files changed, 78 insertions(+), 164 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 898a26044f..debf12bef8 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -209,12 +209,7 @@ line:
 |  SPACE control_line
 |  text_line {
_glcpp_parser_print_expanded_token_list (parser, $1);
-   const char *newline_str = "\n";
-   size_t size = strlen(newline_str);
-
-   ralloc_str_append(&parser->output, newline_str,
- parser->output_length, size);
-   parser->output_length += size;
+   _mesa_string_buffer_append_char(parser->output, '\n');
}
 |  expanded_line
 ;
@@ -233,20 +228,16 @@ expanded_line:
 |  LINE_EXPANDED integer_constant NEWLINE {
parser->has_new_line_number = 1;
parser->new_line_number = $2;
-   ralloc_asprintf_rewrite_tail (&parser->output,
- &parser->output_length,
- "#line %" PRIiMAX "\n",
- $2);
+   _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX 
"\n", $2);
}
 |  LINE_EXPANDED integer_constant integer_constant NEWLINE {
parser->has_new_line_number = 1;
parser->new_line_number = $2;
parser->has_new_source_number = 1;
parser->new_source_number = $3;
-   ralloc_asprintf_rewrite_tail (&parser->output,
- &parser->output_length,
- "#line %" PRIiMAX " %" PRIiMAX 
"\n",
- $2, $3);
+   _mesa_string_buffer_printf(parser->output,
+  "#line %" PRIiMAX " %" PRIiMAX "\n",
+   $2, $3);
}
 ;
 
@@ -264,12 +255,7 @@ define:
 
 control_line:
control_line_success {
-   const char *newline_str = "\n";
-   size_t size = strlen(newline_str);
-
-   ralloc_str_append(&parser->output, newline_str,
- parser->output_length, size);
-   parser->output_length += size;
+   _mesa_string_buffer_append_char(parser->output, '\n');
}
 |  control_line_error
 |  HASH_TOKEN LINE pp_tokens NEWLINE {
@@ -459,7 +445,7 @@ control_line_success:
glcpp_parser_resolve_implicit_version(parser);
}
 |  HASH_TOKEN PRAGMA NEWLINE {
-   ralloc_asprintf_rewrite_tail (&parser->output, 
&parser->output_length, "#%s", $2);
+   _mesa_string_buffer_printf(parser->output, "#%s", $2);
}
 ;
 
@@ -1137,133 +1123,61 @@ _token_list_equal_ignoring_space(token_list_t *a, 
token_list_t *b)
 }
 
 static void
-_token_print(char **out, size_t *len, token_t *token)
+_token_print(struct _mesa_string_buffer *out, token_t *token)
 {
if (token->type < 256) {
-  size_t size = sizeof(char);
-
-  ralloc_str_append(out, (char *) &token->type, *len, size);
-  *len += size;
+  _mesa_string_buffer_printf(out,"%c", token->type);
   return;
}
 
switch (token->type) {
case INTEGER:
-  ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival);
+  _mesa_string_buffer_printf(out, "%" PRIiMAX, token->value.ival);
   break;
case IDENTIFIER:
case INTEGER_STRING:
-   case OTHER: {
-  size_t size = strlen(token->value.str);
-
-  ralloc_str_append(out, token->value.str, *len, size);
-  *len += size;
+   case OTHER:
+  _mesa_string_buffer_append(out, token->value.str);
   break;
-   }
-   case SPACE: {
-  const char *token_str = " ";
-  size_t size = strlen(token_str);
-
-  ralloc_str_append(out, token_str, *len, size);
-  *len += size;
+   case SPACE:
+  _mesa_string_buffer_append(out, " ");
   break;
-   }
-   case LEFT_SHIFT: {
-  const char *token_str = "<<";
-  size_t size = strlen(token_str);
-
-  ralloc_str_append(out, token_str, *len, size);
-  *len += size;
+   case LEFT_SHIFT:
+  _mesa_string_buffer_append(out, "<<");
   break;
-   }
-   case RIGHT_SHIFT: {
-  const char *token_str = ">>";
-  size_t size = strlen(token_str);
-
-  ralloc_str_append(out, token_str, *len, size);
-  *len += size;
+   case RIGHT_SHIFT:
+  _mesa_string_buffer_append(out, ">>");
   break;

[Mesa-dev] [PATCH 7/8] port to gtest

2017-08-29 Thread Thomas Helland
---
 src/util/tests/string_buffer/append_and_print.c   |  99 --
 src/util/tests/string_buffer/append_and_print.cpp | 221 ++
 2 files changed, 221 insertions(+), 99 deletions(-)
 delete mode 100644 src/util/tests/string_buffer/append_and_print.c
 create mode 100644 src/util/tests/string_buffer/append_and_print.cpp

diff --git a/src/util/tests/string_buffer/append_and_print.c 
b/src/util/tests/string_buffer/append_and_print.c
deleted file mode 100644
index 71b857892f..00
--- a/src/util/tests/string_buffer/append_and_print.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright © 2017 Thomas Helland
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include 
-#include 
-#include 
-#include 
-#include "string_buffer.h"
-
-int
-main(int argc, char **argv)
-{
-   (void) argc;
-   (void) argv;
-
-   struct _mesa_string_buffer *buf;
-   char *str1 = "test1";
-   char *str2 = "test2";
-   char *str3 = "test1test2";
-   char str4[80];
-   char str5[40];
-
-   buf = _mesa_string_buffer_create(NULL, 6);
-
-   /* The string terminator needs one byte, so there should be five left */
-   assert(_mesa_string_buffer_space_left(buf) == 5);
-
-   _mesa_string_buffer_append(buf, str1);
-
-   /* The string should now be full */
-   assert(_mesa_string_buffer_space_left(buf) == 0);
-
-   /* The content should be equal */
-   assert(strcmp(buf->buf, str1) == 0);
-
-   /* Add more, so that the string is resized */
-   _mesa_string_buffer_append(buf, str2);
-
-   /* The size of the buffer should have been doubled.
-* We have 5 + 5 + 1 characters, so there should be one left.
-*/
-   assert(_mesa_string_buffer_space_left(buf) == 1);
-
-   /* The string should now be equal to str3 */
-   assert(strcmp(buf->buf, str3) == 0);
-
-   /* Compile a string with some formatting */
-   sprintf(str4, "Testing formatting %d, %f", 100, 1.0);
-
-   /* Clear the string buffer first */
-   _mesa_string_buffer_clear(buf);
-
-   /* Check that the length of the string is reset */
-   assert(buf->length == 0);
-
-   /* Then test printing some formatted text */
-   _mesa_string_buffer_printf(buf, "Testing formatting %d, %f", 100, 1.0);
-
-   /* The string should now be equal to str4 */
-   assert(strcmp(buf->buf, &str4) == 0);
-
-   /* Compile a string with some other formatting */
-   sprintf(str5, "Testing formatting %d, %x", 100, 0xDEADBEAF);
-
-   /* Concatenate str5 to str4 */
-   strcat(str4, str5);
-
-   /* Now use the formatted append function */
-   _mesa_string_buffer_printf(buf, "Testing formatting %d, %x", 100, 
0xDEADBEAF);
-
-   /* The string buffer should now be equal to str4 */
-   assert(strcmp(buf->buf, &str4) == 0);
-
-   /* Finally, clean up after us */
-   _mesa_string_buffer_destroy(buf);
-
-   return 0;
-}
diff --git a/src/util/tests/string_buffer/append_and_print.cpp 
b/src/util/tests/string_buffer/append_and_print.cpp
new file mode 100644
index 00..c3ccd8f9d1
--- /dev/null
+++ b/src/util/tests/string_buffer/append_and_print.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFR

[Mesa-dev] [PATCH 5/8] glcpp: Avoid unnecessary call to strlen

2017-08-29 Thread Thomas Helland
Length of the token was already calculated by flex and stored in yyleng,
no need to implicitly call strlen() via linear_strdup().
---
 src/compiler/glsl/glcpp/glcpp-lex.l | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l 
b/src/compiler/glsl/glcpp/glcpp-lex.l
index 381b97364a..93e5cb3845 100644
--- a/src/compiler/glsl/glcpp/glcpp-lex.l
+++ b/src/compiler/glsl/glcpp/glcpp-lex.l
@@ -101,7 +101,8 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
 #define RETURN_STRING_TOKEN(token) \
do {\
if (! parser->skipping) {   \
-   yylval->str = linear_strdup(yyextra->linalloc, yytext); 
\
+   yylval->str = linear_alloc_child(yyextra->linalloc, 
yyleng + 1);\
+   memcpy(yylval->str, yytext, yyleng + 1);\
RETURN_TOKEN_NEVER_SKIP (token);\
}   \
} while(0)
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/8] Resend of preprocessor series

2017-08-29 Thread Thomas Helland
This is a resend of the string buffer implementation and
related patches sent out back in May. I've done one more
change to the string buffer; using u_string.h for a compatible
vsnprintf version to reduce the code even more. I've not been
able to test this due to two build breakages (xmlpool and dri)
that I'm still trying to figure out of. But since I promised
to send these out this evening, I'm sending them untested.
I did test them thoroughly the last time around though,
so I believe it should be mostly good as long as I haven't
messed up the rebasing. I believe the string buffer part of
the series is the most important; the rest I've not really
gotten around to performance test much.

Thomas Helland (7):
  util: Add a string buffer implementation
  util: Add tests for the string buffer
  glsl: Change the parser to use the string buffer
  glcpp: Use string_buffer for line continuation removal
  glcpp: Avoid unnecessary call to strlen
  port to gtest
  fix test makefile

Vladislav Egorov (1):
  glcpp: Use Bloom filter before identifier search

 configure.ac  |   2 +
 src/compiler/glsl/glcpp/glcpp-lex.l   |   3 +-
 src/compiler/glsl/glcpp/glcpp-parse.y | 219 -
 src/compiler/glsl/glcpp/glcpp.h   |  18 +-
 src/compiler/glsl/glcpp/pp.c  |  64 ---
 src/util/Makefile.am  |   3 +-
 src/util/Makefile.sources |   2 +
 src/util/string_buffer.c  | 155 +++
 src/util/string_buffer.h  |  87 +
 src/util/tests/string_buffer/Makefile.am  |  38 
 src/util/tests/string_buffer/append_and_print.cpp | 221 ++
 11 files changed, 633 insertions(+), 179 deletions(-)
 create mode 100644 src/util/string_buffer.c
 create mode 100644 src/util/string_buffer.h
 create mode 100644 src/util/tests/string_buffer/Makefile.am
 create mode 100644 src/util/tests/string_buffer/append_and_print.cpp

-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] util: Add a string buffer implementation

2017-08-29 Thread Thomas Helland
Based on Vladislav Egorovs work on the preprocessor, but split
out to a util functionality that should be universal. Setup, teardown,
memory handling and general layout is modeled around the hash_table
and the set, to make it familiar for everyone.

A notable change is that this implementation is always null terminated.
The rationale is that it will be less error-prone, as one might
access the buffer directly, thereby reading a non-terminated string.
Also, vsnprintf and friends prints the null-terminator.

V2: Address review feedback from Timothy and Grazvydas
   - Fix MINGW preprocessor check
   - Changed len from uint to int
   - Make string argument const in append function
   - Move to header and inline append function
   - Add crimp_to_fit function for resizing buffer

V3: Move include of ralloc to string_buffer.h

V4: Use u_string.h for a cross-platform working vsnprintf
---
 src/util/Makefile.sources |   2 +
 src/util/string_buffer.c  | 155 ++
 src/util/string_buffer.h  |  87 ++
 3 files changed, 244 insertions(+)
 create mode 100644 src/util/string_buffer.c
 create mode 100644 src/util/string_buffer.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 4ed4e39f03..c7f6516a99 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -37,6 +37,8 @@ MESA_UTIL_FILES := \
simple_list.h \
slab.c \
slab.h \
+   string_buffer.c \
+   string_buffer.h \
strndup.h \
strtod.c \
strtod.h \
diff --git a/src/util/string_buffer.c b/src/util/string_buffer.c
new file mode 100644
index 00..e2fa612976
--- /dev/null
+++ b/src/util/string_buffer.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2017 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include "string_buffer.h"
+
+static bool
+ensure_capacity(struct _mesa_string_buffer *str, uint32_t needed_capacity)
+{
+   if (needed_capacity <= str->capacity)
+  return true;
+
+   /* Too small, we have to resize our buffer.
+* Double until we can fit the new string.
+*/
+   uint32_t new_capacity = str->capacity * 2;
+   while (needed_capacity > new_capacity)
+  new_capacity *= 2;
+
+   str->buf = reralloc_array_size(str, str->buf, sizeof(char), new_capacity);
+   if (str->buf == NULL)
+  return false;
+
+   str->capacity = new_capacity;
+   return true;
+}
+
+struct _mesa_string_buffer *
+_mesa_string_buffer_create(void *mem_ctx, uint32_t initial_capacity)
+{
+   struct _mesa_string_buffer *str;
+   str = ralloc(mem_ctx, struct _mesa_string_buffer);
+
+   if (str == NULL)
+  return NULL;
+
+   /* If no initial capacity is set then set it to something */
+   str->capacity = initial_capacity ? initial_capacity : 8;
+   str->buf = ralloc_array(str, char, str->capacity);
+   str->length = 0;
+   str->buf[str->length] = '\0';
+   return str;
+}
+
+void
+_mesa_string_buffer_destroy(struct _mesa_string_buffer *str)
+{
+   ralloc_free(str);
+}
+
+bool
+_mesa_string_buffer_append_all(struct _mesa_string_buffer *str,
+   uint32_t num_args, ...)
+{
+   int i;
+   char* s;
+   va_list args;
+   va_start(args, num_args);
+   for (i = 0; i < num_args; i++) {
+  s = va_arg(args, char*);
+  if (!_mesa_string_buffer_append_len(str, s, strlen(s)))
+ return false;
+   }
+   va_end(args);
+   return true;
+}
+
+bool
+_mesa_string_buffer_append_len(struct _mesa_string_buffer *str,
+   const char *c, uint32_t len)
+{
+   uint32_t needed_length = str->length + len + 1;
+   if (!ensure_capacity(str, needed_length))
+  return false;
+
+   memcpy(str->buf + str->length, c, len);
+   str->length += len;
+   str->buf[str->length] = '\0';
+   return true;
+}
+
+void
+_mesa_string_buffer_clear(struct _mesa_string_buffer *str)
+{
+   str->length = 0;
+   str->buf[

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Francisco Jerez
Francisco Jerez  writes:

> Chema Casanova  writes:
>
>> El 25/08/17 a las 20:09, Francisco Jerez escribió:
>>> Alejandro Piñeiro  writes:
>>>
 Although it is possible to emit them directly as AND/OR on brw_fs_nir,
 having specific opcodes makes it easier to remove duplicate settings
 later.

 Signed-off-by:  Alejandro Piñeiro 
 Signed-off-by:  Jose Maria Casanova Crespo 
 ---
  src/intel/compiler/brw_eu.h |  3 +++
  src/intel/compiler/brw_eu_defines.h |  9 +
  src/intel/compiler/brw_eu_emit.c| 19 +++
  src/intel/compiler/brw_fs_generator.cpp |  8 
  src/intel/compiler/brw_shader.cpp   |  5 +
  5 files changed, 44 insertions(+)

 diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
 index a3a9c63239d..0a7f8020398 100644
 --- a/src/intel/compiler/brw_eu.h
 +++ b/src/intel/compiler/brw_eu.h
 @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
struct brw_reg src,
struct brw_reg idx);
  
 +void
 +brw_rounding_mode(struct brw_codegen *p,
 +  enum brw_rnd_mode mode);
  /***
   * brw_eu_util.c:
   */
 diff --git a/src/intel/compiler/brw_eu_defines.h 
 b/src/intel/compiler/brw_eu_defines.h
 index 1af835d47ed..50435df2fcf 100644
 --- a/src/intel/compiler/brw_eu_defines.h
 +++ b/src/intel/compiler/brw_eu_defines.h
 @@ -388,6 +388,9 @@ enum opcode {
 SHADER_OPCODE_TYPED_SURFACE_WRITE,
 SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
  
 +   SHADER_OPCODE_RND_MODE_RTE,
 +   SHADER_OPCODE_RND_MODE_RTZ,
 +
>>> We don't need an opcode for each possible rounding mode (there's also RU
>>> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
>>> taking an immediate with the right rounding mode?
>> I like the proposal. It is better having a unique opcode for setting the
>> rounding mode. Changed for v2 of this patch.
>>> Also, you should be marking the rounding mode opcodes as
>>> has_side_effects(), because otherwise you're giving the scheduler the
>>> freedom of moving your rounding mode update instruction past the
>>> instruction you wanted it to have an effect on...
>> Well pointed, we already realized that it was missing while debugging
>> the latency problem of the control register. It was hiding the problem
>> re-scheduling the cr0 modification to the beginning of the shader.
 SHADER_OPCODE_MEMORY_FENCE,
  
 SHADER_OPCODE_GEN4_SCRATCH_READ,
 @@ -1233,4 +1236,10 @@ enum brw_message_target {
  /* R0 */
  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT27
  
 +enum PACKED brw_rnd_mode {
 +   BRW_RND_MODE_UNSPECIFIED,
 +   BRW_RND_MODE_RTE,
 +   BRW_RND_MODE_RTZ,
>>> Since you're introducing a back-end-specific rounding mode enum already,
>>> why not use the hardware values right away so you avoid hard-coding
>>> magic constants below.
>> At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
>> include Round Up and Down in the enum assignation based using the PRM
>> values. Also renamed RTE for RTNE to maintain coherence with nir
>> conversion modifiers using the same acronym as PRM.
>>
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
>> +   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
>> +   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
>> +   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
>> +};
>>
>> I have a doubt about how to avoid the magic constants to close the v2 of
>> this patch. One approach would be using the same code structure and
>> taking advantage using the codification of rounding field. This way we
>> just formula and expect that the C compiler optimizer to guess that the
>> immediate value is a constant.
>>
>
> Hm, I'm not particularly worried about the C++ compiler behavior, it's
> most likely immaterial.
>
>> switch (mode) {
>>     case BRW_RND_MODE_RTZ:
>> -  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0x0030u));
>> +  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4)));
>>    break;
>> -   case BRW_RND_MODE_RTE:
>> -  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(0xffcfu));
>> +   case BRW_RND_MODE_RTNE:
>> +  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
>> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
>>    break;
>>     default:
>>
>
> I don't think this is particularly advantageous since you still need to
> special-case every possible rounding mode.
>
>> Another approach could be to implement a general solution for all
>> supported rounding modes by the hw including Round Up and Down using
>> bitwise operations.
>>
>> /**
>>  * Changes the floating point rounding

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Francisco Jerez
Chema Casanova  writes:

> El 25/08/17 a las 20:09, Francisco Jerez escribió:
>> Alejandro Piñeiro  writes:
>>
>>> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
>>> having specific opcodes makes it easier to remove duplicate settings
>>> later.
>>>
>>> Signed-off-by:  Alejandro Piñeiro 
>>> Signed-off-by:  Jose Maria Casanova Crespo 
>>> ---
>>>  src/intel/compiler/brw_eu.h |  3 +++
>>>  src/intel/compiler/brw_eu_defines.h |  9 +
>>>  src/intel/compiler/brw_eu_emit.c| 19 +++
>>>  src/intel/compiler/brw_fs_generator.cpp |  8 
>>>  src/intel/compiler/brw_shader.cpp   |  5 +
>>>  5 files changed, 44 insertions(+)
>>>
>>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>>> index a3a9c63239d..0a7f8020398 100644
>>> --- a/src/intel/compiler/brw_eu.h
>>> +++ b/src/intel/compiler/brw_eu.h
>>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>>>struct brw_reg src,
>>>struct brw_reg idx);
>>>  
>>> +void
>>> +brw_rounding_mode(struct brw_codegen *p,
>>> +  enum brw_rnd_mode mode);
>>>  /***
>>>   * brw_eu_util.c:
>>>   */
>>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>>> b/src/intel/compiler/brw_eu_defines.h
>>> index 1af835d47ed..50435df2fcf 100644
>>> --- a/src/intel/compiler/brw_eu_defines.h
>>> +++ b/src/intel/compiler/brw_eu_defines.h
>>> @@ -388,6 +388,9 @@ enum opcode {
>>> SHADER_OPCODE_TYPED_SURFACE_WRITE,
>>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>>>  
>>> +   SHADER_OPCODE_RND_MODE_RTE,
>>> +   SHADER_OPCODE_RND_MODE_RTZ,
>>> +
>> We don't need an opcode for each possible rounding mode (there's also RU
>> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
>> taking an immediate with the right rounding mode?
> I like the proposal. It is better having a unique opcode for setting the
> rounding mode. Changed for v2 of this patch.
>> Also, you should be marking the rounding mode opcodes as
>> has_side_effects(), because otherwise you're giving the scheduler the
>> freedom of moving your rounding mode update instruction past the
>> instruction you wanted it to have an effect on...
> Well pointed, we already realized that it was missing while debugging
> the latency problem of the control register. It was hiding the problem
> re-scheduling the cr0 modification to the beginning of the shader.
>>> SHADER_OPCODE_MEMORY_FENCE,
>>>  
>>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>>> @@ -1233,4 +1236,10 @@ enum brw_message_target {
>>>  /* R0 */
>>>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27
>>>  
>>> +enum PACKED brw_rnd_mode {
>>> +   BRW_RND_MODE_UNSPECIFIED,
>>> +   BRW_RND_MODE_RTE,
>>> +   BRW_RND_MODE_RTZ,
>> Since you're introducing a back-end-specific rounding mode enum already,
>> why not use the hardware values right away so you avoid hard-coding
>> magic constants below.
> At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
> include Round Up and Down in the enum assignation based using the PRM
> values. Also renamed RTE for RTNE to maintain coherence with nir
> conversion modifiers using the same acronym as PRM.
>
> +enum PACKED brw_rnd_mode {
> +   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
> +   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
> +   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
> +   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
> +};
>
> I have a doubt about how to avoid the magic constants to close the v2 of
> this patch. One approach would be using the same code structure and
> taking advantage using the codification of rounding field. This way we
> just formula and expect that the C compiler optimizer to guess that the
> immediate value is a constant.
>

Hm, I'm not particularly worried about the C++ compiler behavior, it's
most likely immaterial.

> switch (mode) {
>     case BRW_RND_MODE_RTZ:
> -  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(0x0030u));
> +  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(((unsigned int) mode << 4)));
>    break;
> -   case BRW_RND_MODE_RTE:
> -  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(0xffcfu));
> +   case BRW_RND_MODE_RTNE:
> +  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
> brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
>    break;
>     default:
>

I don't think this is particularly advantageous since you still need to
special-case every possible rounding mode.

> Another approach could be to implement a general solution for all
> supported rounding modes by the hw including Round Up and Down using
> bitwise operations.
>
> /**
>  * Changes the floating point rounding mode updating the control register
>  * field defined at cr0.0[5-6] bits. This function supports the changes to
>  * RTNE (00), RU (01), RD (10) and RTZ (11

Re: [Mesa-dev] [PATCH 20/47] i965/fs: Define new shader opcodes to set rounding modes

2017-08-29 Thread Chema Casanova
El 25/08/17 a las 20:09, Francisco Jerez escribió:
> Alejandro Piñeiro  writes:
>
>> Although it is possible to emit them directly as AND/OR on brw_fs_nir,
>> having specific opcodes makes it easier to remove duplicate settings
>> later.
>>
>> Signed-off-by:  Alejandro Piñeiro 
>> Signed-off-by:  Jose Maria Casanova Crespo 
>> ---
>>  src/intel/compiler/brw_eu.h |  3 +++
>>  src/intel/compiler/brw_eu_defines.h |  9 +
>>  src/intel/compiler/brw_eu_emit.c| 19 +++
>>  src/intel/compiler/brw_fs_generator.cpp |  8 
>>  src/intel/compiler/brw_shader.cpp   |  5 +
>>  5 files changed, 44 insertions(+)
>>
>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>> index a3a9c63239d..0a7f8020398 100644
>> --- a/src/intel/compiler/brw_eu.h
>> +++ b/src/intel/compiler/brw_eu.h
>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p,
>>struct brw_reg src,
>>struct brw_reg idx);
>>  
>> +void
>> +brw_rounding_mode(struct brw_codegen *p,
>> +  enum brw_rnd_mode mode);
>>  /***
>>   * brw_eu_util.c:
>>   */
>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>> b/src/intel/compiler/brw_eu_defines.h
>> index 1af835d47ed..50435df2fcf 100644
>> --- a/src/intel/compiler/brw_eu_defines.h
>> +++ b/src/intel/compiler/brw_eu_defines.h
>> @@ -388,6 +388,9 @@ enum opcode {
>> SHADER_OPCODE_TYPED_SURFACE_WRITE,
>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
>>  
>> +   SHADER_OPCODE_RND_MODE_RTE,
>> +   SHADER_OPCODE_RND_MODE_RTZ,
>> +
> We don't need an opcode for each possible rounding mode (there's also RU
> and RD).  How about you add a single SHADER_OPCODE_RND_MODE opcode
> taking an immediate with the right rounding mode?
I like the proposal. It is better having a unique opcode for setting the
rounding mode. Changed for v2 of this patch.
> Also, you should be marking the rounding mode opcodes as
> has_side_effects(), because otherwise you're giving the scheduler the
> freedom of moving your rounding mode update instruction past the
> instruction you wanted it to have an effect on...
Well pointed, we already realized that it was missing while debugging
the latency problem of the control register. It was hiding the problem
re-scheduling the cr0 modification to the beginning of the shader.
>> SHADER_OPCODE_MEMORY_FENCE,
>>  
>> SHADER_OPCODE_GEN4_SCRATCH_READ,
>> @@ -1233,4 +1236,10 @@ enum brw_message_target {
>>  /* R0 */
>>  # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT  27
>>  
>> +enum PACKED brw_rnd_mode {
>> +   BRW_RND_MODE_UNSPECIFIED,
>> +   BRW_RND_MODE_RTE,
>> +   BRW_RND_MODE_RTZ,
> Since you're introducing a back-end-specific rounding mode enum already,
> why not use the hardware values right away so you avoid hard-coding
> magic constants below.
At the end we removed  MODE_UNSPECIFIED as it isn't really needed and we
include Round Up and Down in the enum assignation based using the PRM
values. Also renamed RTE for RTNE to maintain coherence with nir
conversion modifiers using the same acronym as PRM.

+enum PACKED brw_rnd_mode {
+   BRW_RND_MODE_RTNE = 0,  /* Round to Nearest or Even */
+   BRW_RND_MODE_RU = 1,    /* Round Up, toward +inf */
+   BRW_RND_MODE_RD = 2,    /* Round Down, toward -inf */
+   BRW_RND_MODE_RTZ = 3    /* Round Toward Zero */
+};

I have a doubt about how to avoid the magic constants to close the v2 of
this patch. One approach would be using the same code structure and
taking advantage using the codification of rounding field. This way we
just formula and expect that the C compiler optimizer to guess that the
immediate value is a constant.

switch (mode) {
    case BRW_RND_MODE_RTZ:
-  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(0x0030u));
+  inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(((unsigned int) mode << 4)));
   break;
-   case BRW_RND_MODE_RTE:
-  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(0xffcfu));
+   case BRW_RND_MODE_RTNE:
+  inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
brw_imm_ud(((unsigned int) mode << 4) | ~0x0030u));
   break;
    default:

Another approach could be to implement a general solution for all
supported rounding modes by the hw including Round Up and Down using
bitwise operations.

/**
 * Changes the floating point rounding mode updating the control register
 * field defined at cr0.0[5-6] bits. This function supports the changes to
 * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise
operations.
 * Only RTNE and RTZ rounding are enabled at nir.
 */

void
brw_rounding_mode(struct brw_codegen *p,
  enum brw_rnd_mode mode)
{

   const unsigned int mask = 0x30u;
   const unsigned int enable_bits =  ((unsigned int) mode) << 4;
   const unsigned int disable_bits = enable_bits | ~mask;

   /* Used by RTNE rounding to set field 

[Mesa-dev] [Bug 102454] glibc 2.26 doesn't provide anymore xlocale.h

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102454

--- Comment #1 from Emil Velikov  ---
The patch in the bug report isn't quite right (aka remove the header and guards
all together).

Instead one should add a configure check - AC_LINK_IFELSE() ... which:
 - includes xlocale.h reusing the HAVE_XLOCALE_H guard
 - references strtod_l w/o any guard, setting a macro HAVE_FOO
Then update the code
 - swap the __GNU_SOURCE guards through the file with HAVE_FOO
 - HAVE_XLOCALE_H should guard _only_ the header inclusion

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] egl/wayland: remove dri2_surf width/height double init.

2017-08-29 Thread Eric Engestrom
On 29 August 2017 17:33:57 BST, Emil Velikov  wrote:
> On 29 August 2017 at 15:40, Eric Engestrom 
> wrote:
> > On Sunday, 2017-08-27 11:20:33 +0100, Emil Velikov wrote:
> >> From: Emil Velikov 
> >>
> >> The dimensions are already set [to 0 or the value provided by the
> >> attributes list] by the _eglInitSurface() call further up.
> >>
> >> The values are updated, as the DRI driver calls the
> DRI2/IMAGE_LOADER'
> >> get_buffers, shortly before making use of the values.
> >>
> >> Signed-off-by: Emil Velikov 
> >> ---
> >>  src/egl/drivers/dri2/platform_wayland.c | 3 ---
> >>  1 file changed, 3 deletions(-)
> >>
> >> diff --git a/src/egl/drivers/dri2/platform_wayland.c
> b/src/egl/drivers/dri2/platform_wayland.c
> >> index f9554fccde9..89e94e11f4f 100644
> >> --- a/src/egl/drivers/dri2/platform_wayland.c
> >> +++ b/src/egl/drivers/dri2/platform_wayland.c
> >> @@ -198,9 +198,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv,
> _EGLDisplay *disp,
> >> dri2_surf->wl_win->private = dri2_surf;
> >> dri2_surf->wl_win->destroy_window_callback =
> destroy_window_callback;
> >>
> >> -   dri2_surf->base.Width =  -1;
> >> -   dri2_surf->base.Height = -1;
> >> -
> >
> > We actually have a local patch here that replaces this `-1`
> > initialisation with the dimensions of the wayland `window`:
> >
> > 8<
> > diff --git a/src/egl/drivers/dri2/platform_wayland.c
> b/src/egl/drivers/dri2/platform_wayland.c
> > index 85a11f4..7719407 100644
> > --- a/src/egl/drivers/dri2/platform_wayland.c
> > +++ b/src/egl/drivers/dri2/platform_wayland.c
> > @@ -183,8 +183,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv,
> _EGLDisplay *disp,
> > dri2_surf->wl_win->private = dri2_surf;
> > dri2_surf->wl_win->destroy_window_callback =
> destroy_window_callback;
> >
> > -   dri2_surf->base.Width =  -1;
> > -   dri2_surf->base.Height = -1;
> > +   dri2_surf->base.Width  = window->width;
> > +   dri2_surf->base.Height = window->height;
> >
> > config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
> >  dri2_surf->base.GLColorspace);
> > >8
> >
> > This apparently fixes dEQP crashes caused by an assert:
> > deRandom.hpp:93: Unknown function: Assertion `min <= max'
> failed.
> >
> > No dEQP test is mentioned on our patch, but I think this happened on
> > dEQP-EGL.functional.resize.surface_size.*
> >
> Having another look, we seems to be rather inconsistent when we set
> them - see below.
> At the same time, I doubt any !X11 dEQP resize tests work:
>  - iirc there's no wayland/drm/surfaceless support
>  - none of the three has a .query_surface hook
> 
> -Emil
> 
> X11/dri2
>  - create_*_surface, all but !pbuffer
>  - query_surface via xcb_get_geometry
> 
> X11/dri3:
>  - create_*_surface, all
>  - query_surface via xcb_get_geometry
> 
> drm
>  - create_*_surface, all (window only)
>  - query_surface - no hook
> 
> surfaceless
>  - create_*_surface, none (pbuffer only)
>  - query_surface  - no hook
> 
> wayland
>  - create_*_surface, none (window only)

>  - query_surface  - no hook

Oh, uh… I think this might also be in a local patch ^^
We have a bunch of patches like that, that I want to upstream, but
not enough bandwidth and too many high priority tasks lining up.
I'll try and have a look tomorrow for the query_surface hook.

Cheers,
  Eric
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer

2017-08-29 Thread Eric Engestrom
On 29 August 2017 18:11:45 BST, Matt Turner  wrote:
> On Tue, Aug 29, 2017 at 3:11 AM, Eric Engestrom
>  wrote:
> > On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote:
> >> Avoids Clang's warning about the current code:
> >>
> >>warning: suggest braces around initialization of subobject
> >
> > I'm not sure about this patch [1], but everything else in this
> series is:
> > Reviewed-by: Eric Engestrom 
> >
> > The "mark `UNUSED` because they are unused in some generations"
> patches
> > might be better to mark as `MAYBE_UNUSED`; they both resolve to the
> same
> > thing for compilers, but the latter is gives more information to
> humans
> > (ie. "this isn't always unused").
> >
> > [1]
> > Can you test this with GCC 4.x and 7.x? If memory serves, this init
> format
> > caused warnings on either old GCC or new GCC.
> > I think `= {{0}};` was the only format that both compilers were
> happy with.
> 
> Tested 4.9.4 and 7.1.0. No warnings from either of them about these
> initializers :)

Given this and Emil's comment, this patch is r-b me with the rest of the series 
:)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer

2017-08-29 Thread Emil Velikov
On 29 August 2017 at 18:10, Matt Turner  wrote:
> On Tue, Aug 29, 2017 at 3:35 AM, Emil Velikov  
> wrote:
>> On 29 August 2017 at 11:11, Eric Engestrom  wrote:
>>> On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote:
 Avoids Clang's warning about the current code:

warning: suggest braces around initialization of subobject
>>>
>>> I'm not sure about this patch [1], but everything else in this series is:
>> IIRC "{}" is the least likely way to avoid the warnings, across
>> GCC/Clang versions.
>> It's not part of the C spec, but since it works - not sure how much to 
>> bother.
>
> I'm surprised to hear you say that. I've tested with gcc-4.9.4, 6.4.0,
> 7.1.0; and clang-4.0, and none of them warn about {}
>
Eric corrected me on IRC, but I forgot to mention here - "s/least/most/"

> I think when we went through this before (in NIR) we couldn't use {}
> because it needs to compile with MSVC, and MSVC doesn't allow that in
> C.
>
Precisely. The "most" portable solution that I know of is memset().

>> Would be great to have the issue reported (and fixed) in Clang though.
>> AFAICT both {0} and {0,} are valid, if memory serves me right.
>
> I don't really understand the nuances of {0}, {0,}, {{0}}, etc, and I
> get the sense that that's the case for nearly everyone else as well.
>
AFAICT {0} and {0,} should just work everywhere, modulo bugs.

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
[2] https://bugs.llvm.org/show_bug.cgi?id=21689

> The most prominent bug I see in all of this is that {} isn't part of
> standard C (it is part of standard C++!)
Agreed. Not sure how easy it will be to convince the committee.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer

2017-08-29 Thread Matt Turner
On Tue, Aug 29, 2017 at 3:11 AM, Eric Engestrom
 wrote:
> On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote:
>> Avoids Clang's warning about the current code:
>>
>>warning: suggest braces around initialization of subobject
>
> I'm not sure about this patch [1], but everything else in this series is:
> Reviewed-by: Eric Engestrom 
>
> The "mark `UNUSED` because they are unused in some generations" patches
> might be better to mark as `MAYBE_UNUSED`; they both resolve to the same
> thing for compilers, but the latter is gives more information to humans
> (ie. "this isn't always unused").
>
> [1]
> Can you test this with GCC 4.x and 7.x? If memory serves, this init format
> caused warnings on either old GCC or new GCC.
> I think `= {{0}};` was the only format that both compilers were happy with.

Tested 4.9.4 and 7.1.0. No warnings from either of them about these
initializers :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/19] anv: Use GNU C empty brace initializer

2017-08-29 Thread Matt Turner
On Tue, Aug 29, 2017 at 3:35 AM, Emil Velikov  wrote:
> On 29 August 2017 at 11:11, Eric Engestrom  wrote:
>> On Monday, 2017-08-28 14:57:13 -0700, Matt Turner wrote:
>>> Avoids Clang's warning about the current code:
>>>
>>>warning: suggest braces around initialization of subobject
>>
>> I'm not sure about this patch [1], but everything else in this series is:
> IIRC "{}" is the least likely way to avoid the warnings, across
> GCC/Clang versions.
> It's not part of the C spec, but since it works - not sure how much to bother.

I'm surprised to hear you say that. I've tested with gcc-4.9.4, 6.4.0,
7.1.0; and clang-4.0, and none of them warn about {}

I think when we went through this before (in NIR) we couldn't use {}
because it needs to compile with MSVC, and MSVC doesn't allow that in
C.

> Would be great to have the issue reported (and fixed) in Clang though.
> AFAICT both {0} and {0,} are valid, if memory serves me right.

I don't really understand the nuances of {0}, {0,}, {{0}}, etc, and I
get the sense that that's the case for nearly everyone else as well.

The most prominent bug I see in all of this is that {} isn't part of
standard C (it is part of standard C++!)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2 1/2] gallivm: correct channel shift logic on big endian

2017-08-29 Thread Emil Velikov
On 23 August 2017 at 21:32, Ben Crocker  wrote:
> From: Ray Strode 
>
> lp_build_fetch_rgba_soa fetches a texel from a texture.
> Part of that process involves first gathering the element
> together from memory into a packed format, and then breaking
> out the individual color channels into separate, parallel
> arrays.
>
> The code fails to account for endianess when reading the packed
> values.
>
> This commit attempts to correct the problem by reversing the order
> the packed values are read on big endian systems.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613
> Cc: "17.2" "17.1" 
>
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> index 98eb694..22c19b1 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
> @@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
>   for (i = 0; i < format_desc->nr_channels; i++) {
>  struct util_format_channel_description chan_desc = 
> format_desc->channel[i];
>  unsigned blockbits = type.width;
> -unsigned vec_nr = chan_desc.shift / type.width;
> +unsigned vec_nr;
> +
> +#ifdef PIPE_ARCH_BIG_ENDIAN
> +vec_nr = (format_desc->block.bits - (chan_desc.shift + 
> chan_desc.size)) / type.width;
> +#else
> +vec_nr = chan_desc.shift / type.width;
> +#endif
>  chan_desc.shift %= type.width;
>
Roland, I think you're one of the more knowledgeable people for things gallivm.
Can you please review this patch?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] egl/wayland: remove dri2_surf width/height double init.

2017-08-29 Thread Emil Velikov
On 29 August 2017 at 15:40, Eric Engestrom  wrote:
> On Sunday, 2017-08-27 11:20:33 +0100, Emil Velikov wrote:
>> From: Emil Velikov 
>>
>> The dimensions are already set [to 0 or the value provided by the
>> attributes list] by the _eglInitSurface() call further up.
>>
>> The values are updated, as the DRI driver calls the DRI2/IMAGE_LOADER'
>> get_buffers, shortly before making use of the values.
>>
>> Signed-off-by: Emil Velikov 
>> ---
>>  src/egl/drivers/dri2/platform_wayland.c | 3 ---
>>  1 file changed, 3 deletions(-)
>>
>> diff --git a/src/egl/drivers/dri2/platform_wayland.c 
>> b/src/egl/drivers/dri2/platform_wayland.c
>> index f9554fccde9..89e94e11f4f 100644
>> --- a/src/egl/drivers/dri2/platform_wayland.c
>> +++ b/src/egl/drivers/dri2/platform_wayland.c
>> @@ -198,9 +198,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, 
>> _EGLDisplay *disp,
>> dri2_surf->wl_win->private = dri2_surf;
>> dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
>>
>> -   dri2_surf->base.Width =  -1;
>> -   dri2_surf->base.Height = -1;
>> -
>
> We actually have a local patch here that replaces this `-1`
> initialisation with the dimensions of the wayland `window`:
>
> 8<
> diff --git a/src/egl/drivers/dri2/platform_wayland.c 
> b/src/egl/drivers/dri2/platform_wayland.c
> index 85a11f4..7719407 100644
> --- a/src/egl/drivers/dri2/platform_wayland.c
> +++ b/src/egl/drivers/dri2/platform_wayland.c
> @@ -183,8 +183,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, 
> _EGLDisplay *disp,
> dri2_surf->wl_win->private = dri2_surf;
> dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
>
> -   dri2_surf->base.Width =  -1;
> -   dri2_surf->base.Height = -1;
> +   dri2_surf->base.Width  = window->width;
> +   dri2_surf->base.Height = window->height;
>
> config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
>  dri2_surf->base.GLColorspace);
> >8
>
> This apparently fixes dEQP crashes caused by an assert:
> deRandom.hpp:93: Unknown function: Assertion `min <= max' failed.
>
> No dEQP test is mentioned on our patch, but I think this happened on
> dEQP-EGL.functional.resize.surface_size.*
>
Having another look, we seems to be rather inconsistent when we set
them - see below.
At the same time, I doubt any !X11 dEQP resize tests work:
 - iirc there's no wayland/drm/surfaceless support
 - none of the three has a .query_surface hook

-Emil

X11/dri2
 - create_*_surface, all but !pbuffer
 - query_surface via xcb_get_geometry

X11/dri3:
 - create_*_surface, all
 - query_surface via xcb_get_geometry

drm
 - create_*_surface, all (window only)
 - query_surface - no hook

surfaceless
 - create_*_surface, none (pbuffer only)
 - query_surface  - no hook

wayland
 - create_*_surface, none (window only)
 - query_surface  - no hook
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Refactor brw_draw_prims.

2017-08-29 Thread Plamena Manolova
brw_draw_prims needs to be refactored prior to
ARB_indirect_parameters implementation.

Signed-off-by: Plamena Manolova 
---
 src/mesa/drivers/dri/i965/brw_draw.c | 343 +++
 1 file changed, 189 insertions(+), 154 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index a8ad2ac..7597bae 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -531,7 +531,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context 
*brw)
 
   if (!irb)
  continue;
- 
+
   brw_render_cache_set_add_bo(brw, irb->mt->bo);
   intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
   irb->mt_layer, irb->layer_count,
@@ -594,21 +594,163 @@ brw_postdraw_reconcile_align_wa_slices(struct 
brw_context *brw)
  * fallback conditions.
  */
 static void
-brw_try_draw_prims(struct gl_context *ctx,
-   const struct gl_vertex_array *arrays[],
-   const struct _mesa_prim *prims,
-   GLuint nr_prims,
-   const struct _mesa_index_buffer *ib,
-   bool index_bounds_valid,
-   GLuint min_index,
-   GLuint max_index,
-   struct brw_transform_feedback_object *xfb_obj,
-   unsigned stream,
-   struct gl_buffer_object *indirect)
+brw_try_draw_prim(struct gl_context *ctx,
+  const struct gl_vertex_array *arrays[],
+  const struct _mesa_prim *prim,
+  const struct _mesa_index_buffer *ib,
+  bool index_bounds_valid,
+  GLuint min_index,
+  GLuint max_index,
+  struct brw_transform_feedback_object *xfb_obj,
+  unsigned stream,
+  struct gl_buffer_object *indirect)
 {
struct brw_context *brw = brw_context(ctx);
-   GLuint i;
bool fail_next = false;
+   int estimated_max_prim_size;
+   const int sampler_state_size = 16;
+
+   estimated_max_prim_size = 512; /* batchbuffer commands */
+   estimated_max_prim_size += BRW_MAX_TEX_UNIT *
+  (sampler_state_size + sizeof(struct gen5_sampler_default_color));
+   estimated_max_prim_size += 1024; /* gen6 VS push constants */
+   estimated_max_prim_size += 1024; /* gen6 WM push constants */
+   estimated_max_prim_size += 512; /* misc. pad */
+
+   /* Flag BRW_NEW_DRAW_CALL on every draw.  This allows us to have
+* atoms that happen on every draw call.
+*/
+   brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
+
+   /* Flush the batch if it's approaching full, so that we don't wrap while
+* we've got validated state that needs to be in the same batch as the
+* primitives.
+*/
+   intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
+   intel_batchbuffer_save_state(brw);
+
+   if (brw->num_instances != prim->num_instances ||
+   brw->basevertex != prim->basevertex ||
+   brw->baseinstance != prim->base_instance) {
+   brw->num_instances = prim->num_instances;
+   brw->basevertex = prim->basevertex;
+   brw->baseinstance = prim->base_instance;
+  if (prim->draw_id > 0) { /* For draw_id == 0 we just did this before the 
loop */
+ brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
+ brw_merge_inputs(brw, arrays);
+  }
+   }
+
+   /* Determine if we need to flag BRW_NEW_VERTICES for updating the
+* gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
+* always flag if the shader uses one of the values. For direct draws,
+* we only flag if the values change.
+*/
+   const int new_basevertex =
+  prim->indexed ? prim->basevertex : prim->start;
+   const int new_baseinstance = prim->base_instance;
+   const struct brw_vs_prog_data *vs_prog_data =
+  brw_vs_prog_data(brw->vs.base.prog_data);
+   if (prim->draw_id > 0) {
+  const bool uses_draw_parameters =
+ vs_prog_data->uses_basevertex ||
+ vs_prog_data->uses_baseinstance;
+
+  if ((uses_draw_parameters && prim->is_indirect) ||
+  (vs_prog_data->uses_basevertex &&
+   brw->draw.params.gl_basevertex != new_basevertex) ||
+  (vs_prog_data->uses_baseinstance &&
+   brw->draw.params.gl_baseinstance != new_baseinstance))
+ brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
+   }
+
+   brw->draw.params.gl_basevertex = new_basevertex;
+   brw->draw.params.gl_baseinstance = new_baseinstance;
+   brw_bo_unreference(brw->draw.draw_params_bo);
+
+   if (prim->is_indirect) {
+  /* Point draw_params_bo at the indirect buffer. */
+  brw->draw.draw_params_bo =
+  intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
+  brw_bo_reference(brw->draw.draw_params_bo);
+  brw->draw.draw_params_offset =
+  prim->indirect_offset + (prim->indexed ? 12 : 8);
+   } else {
+  /* Set draw_params_bo to NULL so brw_prep

[Mesa-dev] [PATCH 2/2] i965: Implement ARB_indirect_parameters

2017-08-29 Thread Plamena Manolova
We can implement ARB_indirect_parameters for i965 by
taking advantage of the conditional rendering mechanism.
This works by issuing maxdrawcount draw calls and using
conditional rendering to predicate each of them with
"drawcount > gl_DrawID"

Signed-off-by: Plamena Manolova 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  3 +
 src/mesa/drivers/dri/i965/brw_draw.c | 97 
 src/mesa/drivers/dri/i965/brw_draw.h | 11 
 src/mesa/drivers/dri/i965/intel_extensions.c |  2 +
 4 files changed, 113 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 2274fe5..4639d5b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -831,6 +831,9 @@ struct brw_context
   int gl_drawid;
   struct brw_bo *draw_id_bo;
   uint32_t draw_id_offset;
+
+  struct brw_bo *draw_params_count_bo;
+  uint32_t draw_params_count_offset;
} draw;
 
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7597bae..473958c 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -820,6 +820,11 @@ brw_end_draw_prims(struct gl_context *ctx,
brw_program_cache_check_size(brw);
brw_postdraw_reconcile_align_wa_slices(brw);
brw_postdraw_set_buffers_need_resolve(brw);
+
+   if (brw->draw.draw_params_count_bo) {
+  brw_bo_unreference(brw->draw.draw_params_count_bo);
+  brw->draw.draw_params_count_bo = NULL;
+   }
 }
 
 void
@@ -837,6 +842,8 @@ brw_draw_prims(struct gl_context *ctx,
struct brw_context *brw = brw_context(ctx);
const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
int i;
+   int predicate_state = brw->predicate.state;
+   int combine_op = MI_PREDICATE_COMBINEOP_SET;
struct brw_transform_feedback_object *xfb_obj =
   (struct brw_transform_feedback_object *) gl_xfb_obj;
 
@@ -890,12 +897,101 @@ brw_draw_prims(struct gl_context *ctx,
 * to it.
 */
 
+   if (brw->draw.draw_params_count_bo &&
+   predicate_state == BRW_PREDICATE_STATE_USE_BIT) {
+  /* We do this to empty the MI_PREDICATE_DATA register */
+  BEGIN_BATCH(4);
+  OUT_BATCH(MI_PREDICATE_DATA);
+  OUT_BATCH(0u);
+  OUT_BATCH(MI_PREDICATE_DATA + 4);
+  OUT_BATCH(0u);
+  ADVANCE_BATCH();
+
+  combine_op = MI_PREDICATE_COMBINEOP_AND;
+   }
+
for (i = 0; i < nr_prims; i++) {
+  if (brw->draw.draw_params_count_bo) {
+ struct brw_bo *draw_id_bo = brw_bo_alloc(brw->bufmgr, "draw_id", 4, 
4);
+
+ brw_bo_reference(draw_id_bo);
+ brw_bo_subdata(draw_id_bo, 0, 4, &prims[i].draw_id);
+
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
+ brw_load_register_mem(brw, MI_PREDICATE_SRC0,
+   brw->draw.draw_params_count_bo,
+   brw->draw.draw_params_count_offset);
+ brw_load_register_mem(brw, MI_PREDICATE_SRC1, draw_id_bo, 0);
+
+ BEGIN_BATCH(1);
+ OUT_BATCH(GEN7_MI_PREDICATE |
+   MI_PREDICATE_LOADOP_LOADINV | combine_op |
+   MI_PREDICATE_COMPAREOP_DELTAS_EQUAL);
+ ADVANCE_BATCH();
+
+ brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+
+ brw_bo_unreference(draw_id_bo);
+  }
+
   brw_try_draw_prim(ctx, arrays, &prims[i], ib, index_bounds_valid,
 min_index, max_index, xfb_obj, stream, indirect);
}
+
brw_end_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid,
   min_index, max_index, xfb_obj, stream, indirect);
+
+   brw->predicate.state = predicate_state;
+}
+
+void
+brw_draw_indirect_prims(struct gl_context *ctx,
+GLuint mode,
+struct gl_buffer_object *indirect_data,
+GLsizeiptr indirect_offset,
+unsigned draw_count,
+unsigned stride,
+struct gl_buffer_object *indirect_params,
+GLsizeiptr indirect_params_offset,
+const struct _mesa_index_buffer *ib)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct _mesa_prim *prim;
+   GLsizei i;
+
+   prim = calloc(draw_count, sizeof(*prim));
+   if (prim == NULL) {
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s",
+  (draw_count > 1) ? "Multi" : "",
+  ib ? "Elements" : "Arrays",
+  indirect_params ? "CountARB" : "");
+  return;
+   }
+
+   prim[0].begin = 1;
+   prim[draw_count - 1].end = 1;
+   for (i = 0; i < draw_count; ++i, indirect_offset += stride) {
+  prim[i].mode = mode;
+  prim[i].indexed = !!ib;
+  prim[i].indirect_offset = indirect_offset;
+  prim[i].is_indirect = 1;
+  prim[i].draw_id = i;
+   }
+
+   if (indirect_params) {
+  brw

Re: [Mesa-dev] [PATCH 01/19] configure: Add and use AX_CHECK_COMPILE_FLAG

2017-08-29 Thread Matt Turner
On Tue, Aug 29, 2017 at 6:15 AM, Emil Velikov  wrote:
> On 28 August 2017 at 22:57, Matt Turner  wrote:
>
>> -# Flags to help ensure that certain portions of the code -- and only 
>> those
>> -# portions -- can be built with MSVC:
>> -# - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
>> -# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
>> -#   supports most of C99)
>> -# - the rest has no compiler compiler restrictions
>> -MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith"
>> -MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith"
>> -
> Can you please preserve the comment about the MSVC2013 flags?
> With that the series is
> Reviewed-by: Emil Velikov 

Yes, thank you.

> Mildly curious:
> Did you pipe the series through ezbench for perf testing?
> Wondering if the inline and fabsf -> babs changes made any noticeable impact.

No, I cannot imagine it could cause any change. It's really just
changing when a float -> double conversion happens.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] swr: Report format max_samples=1 to maintain support for "fake" msaa.

2017-08-29 Thread Cherniak, Bruce
Gentle bump.  This fixes mesa master for Kitware's VTK testing.

> On Aug 25, 2017, at 2:59 PM, Cherniak, Bruce  wrote:
> 
> Accompanying patch "st/mesa: only try to create 1x msaa surfaces for
> 'fake' msaa" requires driver to report max_samples=1 to enable "fake"
> msaa. Previously, 0 and 1 were treated equivalently in st_init_extensions()
> and either could enable "fake" msaa.
> 
> This patch raises the swr default msaa_max_count from 0 to 1, so that
> swr_is_format_supported will report max_samples=1.
> 
> Real msaa can still be enabled by exporting SWR_MSAA_MAX_COUNT with a
> pow2 value between 2 and 16.
> 
> This patch is necessary to prevent an OpenSWR regression resulting from
> the st/mesa patch.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102038
> ---
> src/gallium/drivers/swr/swr_screen.cpp | 22 +++---
> 1 file changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
> b/src/gallium/drivers/swr/swr_screen.cpp
> index 3287bc6fee..cc8d9955b8 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -255,13 +255,13 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
> param)
>   return 1;
> 
>/* MSAA support
> -* If user has explicitly set max_sample_count = 0 (via 
> SWR_MSAA_MAX_COUNT)
> -* then disable all MSAA support and go back to old caps. */
> +* If user has explicitly set max_sample_count = 1 (via 
> SWR_MSAA_MAX_COUNT)
> +* then disable all MSAA support and go back to old (FAKE_SW_MSAA) caps. 
> */
>case PIPE_CAP_TEXTURE_MULTISAMPLE:
>case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
> -  return swr_screen(screen)->msaa_max_count ? 1 : 0;
> +  return (swr_screen(screen)->msaa_max_count > 1) ? 1 : 0;
>case PIPE_CAP_FAKE_SW_MSAA:
> -  return swr_screen(screen)->msaa_max_count ? 0 : 1;
> +  return (swr_screen(screen)->msaa_max_count > 1) ? 0 : 1;
> 
>   /* unsupported features */
>case PIPE_CAP_ANISOTROPIC_FILTER:
> @@ -1079,22 +1079,22 @@ swr_validate_env_options(struct swr_screen *screen)
>   screen->client_copy_limit = client_copy_limit;
> 
>/* XXX msaa under development, disable by default for now */
> -   screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
> +   screen->msaa_max_count = 1; /* was SWR_MAX_NUM_MULTISAMPLES; */
> 
>/* validate env override values, within range and power of 2 */
> -   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 0);
> -   if (msaa_max_count) {
> -  if ((msaa_max_count < 0) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
> +   int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 1);
> +   if (msaa_max_count != 1) {
> +  if ((msaa_max_count < 1) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
> || !util_is_power_of_two(msaa_max_count)) {
>  fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
>  fprintf(stderr, "must be power of 2 between 1 and %d" \
> - " (or 0 to disable msaa)\n",
> + " (or 1 to disable msaa)\n",
>SWR_MAX_NUM_MULTISAMPLES);
> - msaa_max_count = 0;
> + msaa_max_count = 1;
>   }
> 
>   fprintf(stderr, "SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
> -  if (!msaa_max_count)
> +  if (msaa_max_count == 1)
>  fprintf(stderr, "(msaa disabled)\n");
> 
>   screen->msaa_max_count = msaa_max_count;
> -- 
> 2.11.0
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] st/mesa: only try to create 1x msaa surfaces for "fake" msaa drivers

2017-08-29 Thread Cherniak, Bruce
Gentle bump.  This fixes mesa master for Kitware's VTK testing.

> On Aug 25, 2017, at 2:59 PM, Bruce Cherniak  wrote:
> 
> From: Brian Paul 
> 
> For software drivers where we want "fake" msaa support for GL 3.x, we
> treat 1 sample as being msaa.
> 
> For drivers with real msaa support, start format probing at 2x msaa.
> For drivers with fake msaa support, start format probing at 1x msaa.
> 
> This also tweaks the MaxSamples code in st_init_extensions() so that
> we use MaxSamples=1 for fake msaa.  This allows the format proble loops
> to run at least one iteration.
> 
> This fixes a llvmpipe/VTK regression from commit 6839d3369905eb02151.
> And for drivers with fake msaa support, calls such as
> glTexImage2DMultisample(samples=1) will now succeed.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102038
> ---
> src/mesa/state_tracker/st_cb_fbo.c | 13 ++---
> src/mesa/state_tracker/st_cb_texture.c | 11 ---
> src/mesa/state_tracker/st_extensions.c | 14 ++
> 3 files changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
> b/src/mesa/state_tracker/st_cb_fbo.c
> index afc7700306..a7c286bcc5 100644
> --- a/src/mesa/state_tracker/st_cb_fbo.c
> +++ b/src/mesa/state_tracker/st_cb_fbo.c
> @@ -155,12 +155,19 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx,
> *   to  and no more than the next larger sample count supported
> *   by the implementation.
> *
> -* So let's find the supported number of samples closest to NumSamples.
> +* Find the supported number of samples >= rb->NumSamples
> */
>if (rb->NumSamples > 0) {
> -  unsigned i;
> +  unsigned start, i;
> 
> -  for (i = MAX2(2, rb->NumSamples); i <= ctx->Const.MaxSamples; i++) {
> +  if (ctx->Const.MaxSamples > 1 &&  rb->NumSamples == 1) {
> + /* don't try num_samples = 1 with drivers that support real msaa */
> + start = 2;
> +  } else {
> + start = rb->NumSamples;
> +  }
> +
> +  for (i = start; i <= ctx->Const.MaxSamples; i++) {
>  format = st_choose_renderbuffer_format(st, internalFormat, i);
> 
>  if (format != PIPE_FORMAT_NONE) {
> diff --git a/src/mesa/state_tracker/st_cb_texture.c 
> b/src/mesa/state_tracker/st_cb_texture.c
> index af2052db24..b5006b05a7 100644
> --- a/src/mesa/state_tracker/st_cb_texture.c
> +++ b/src/mesa/state_tracker/st_cb_texture.c
> @@ -2739,13 +2739,18 @@ st_texture_storage(struct gl_context *ctx,
> 
>bindings = default_bindings(st, fmt);
> 
> -   /* Raise the sample count if the requested one is unsupported. */
>if (num_samples > 0) {
> +  /* Find msaa sample count which is actually supported.  For example,
> +   * if the user requests 1x but only 4x or 8x msaa is supported, we'll
> +   * choose 4x here.
> +   */
>   enum pipe_texture_target ptarget = gl_target_to_pipe(texObj->Target);
>   boolean found = FALSE;
> 
> -  /* start the query with at least two samples */
> -  num_samples = MAX2(num_samples, 2);
> +  if (ctx->Const.MaxSamples > 1 && num_samples == 1) {
> + /* don't try num_samples = 1 with drivers that support real msaa */
> + num_samples = 2;
> +  }
> 
>   for (; num_samples <= ctx->Const.MaxSamples; num_samples++) {
>  if (screen->is_format_supported(screen, fmt, ptarget,
> diff --git a/src/mesa/state_tracker/st_extensions.c 
> b/src/mesa/state_tracker/st_extensions.c
> index 904d9cd834..2008e28250 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -1046,17 +1046,15 @@ void st_init_extensions(struct pipe_screen *screen,
>  void_formats, 32,
>  PIPE_BIND_RENDER_TARGET);
>}
> -   if (consts->MaxSamples == 1) {
> -  /* one sample doesn't really make sense */
> -  consts->MaxSamples = 0;
> -   }
> -   else if (consts->MaxSamples >= 2) {
> +
> +   if (consts->MaxSamples >= 2) {
> +  /* Real MSAA support */
>   extensions->EXT_framebuffer_multisample = GL_TRUE;
>   extensions->EXT_framebuffer_multisample_blit_scaled = GL_TRUE;
>}
> -
> -   if (consts->MaxSamples == 0 &&
> -   screen->get_param(screen, PIPE_CAP_FAKE_SW_MSAA)) {
> +   else if (consts->MaxSamples > 0 &&
> +screen->get_param(screen, PIPE_CAP_FAKE_SW_MSAA)) {
> +  /* fake MSAA support */
>   consts->FakeSWMSAA = GL_TRUE;
>   extensions->EXT_framebuffer_multisample = GL_TRUE;
>   extensions->EXT_framebuffer_multisample_blit_scaled = GL_TRUE;
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: update dirty_level_mask before dispatching

2017-08-29 Thread Samuel Pitoiset
This fixes a rendering issue with Hitman when bindless textures
are enabled.

Fixes: 2263610827 ("radeonsi: flush DB caches only when transitioning from DB 
to texturing")
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
 src/gallium/drivers/radeonsi/si_compute.c | 5 +
 2 files changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 59886e..d76d4a1384 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -569,6 +569,7 @@ struct r600_common_context {
unsignedgpu_reset_counter;
unsignedlast_dirty_tex_counter;
unsignedlast_compressed_colortex_counter;
+   unsignedlast_num_draw_calls;
 
struct threaded_context *tc;
struct u_suballocator   *allocator_zeroed_memory;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 3ebd22c3c1..ca334949d7 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -782,6 +782,11 @@ static void si_launch_grid(
program->shader.compilation_failed)
return;
 
+   if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) {
+   si_update_fb_dirtiness_after_rendering(sctx);
+   sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
+   }
+
si_decompress_compute_textures(sctx);
 
/* Add buffer sizes for memory checking in need_cs_space. */
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] winsys/amdgpu: set AMDGPU_GEM_CREATE_LOCAL if possible

2017-08-29 Thread Christian König
From: Christian König 

When the kernel supports it set the local flag and
stop adding those BOs to the BO list.

Can probably be optimized much more.

Signed-off-by: Christian König 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c |  8 
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h |  2 ++
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 22 +-
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 08348ed..1238d58 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -38,6 +38,10 @@
 #include 
 #include 
 
+#ifndef AMDGPU_GEM_CREATE_LOCAL
+#define AMDGPU_GEM_CREATE_LOCAL (1 << 6)
+#endif
+
 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
 #define DEBUG_SPARSE_COMMITS 0
 
@@ -402,6 +406,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
   request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
   request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+   ws->info.drm_minor >= 20)
+  request.flags |= AMDGPU_GEM_CREATE_LOCAL;
 
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
@@ -435,6 +442,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
bo->u.real.va_handle = va_handle;
bo->initial_domain = initial_domain;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+   bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_LOCAL);
 
if (initial_domain & RADEON_DOMAIN_VRAM)
   ws->allocated_vram += align64(size, ws->info.gart_page_size);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 1311344..10b095d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -115,6 +115,8 @@ struct amdgpu_winsys_bo {
unsigned num_fences;
unsigned max_fences;
struct pipe_fence_handle **fences;
+
+   bool is_local;
 };
 
 struct amdgpu_slab {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 9cadfc4..0b76fba 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1123,6 +1123,8 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
   free(handles);
   mtx_unlock(&ws->global_bo_list_lock);
} else {
+  unsigned num_handles;
+
   if (!amdgpu_add_sparse_backing_buffers(cs)) {
  r = -ENOMEM;
  goto bo_list_error;
@@ -1142,21 +1144,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
  }
   }
 
+  num_handles = 0;
   for (i = 0; i < cs->num_real_buffers; ++i) {
  struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
 
+if (buffer->bo->is_local)
+continue;
+
  assert(buffer->u.real.priority_usage != 0);
 
- cs->handles[i] = buffer->bo->bo;
- cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 
4;
+ cs->handles[num_handles] = buffer->bo->bo;
+ cs->flags[num_handles] = 
(util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+++num_handles;
   }
 
   if (acs->ring_type == RING_GFX)
  ws->gfx_bo_list_counter += cs->num_real_buffers;
 
-  r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers,
-cs->handles, cs->flags,
-&cs->request.resources);
+  if (num_handles) {
+ r = amdgpu_bo_list_create(ws->dev, num_handles,
+   cs->handles, cs->flags,
+   &cs->request.resources);
+  } else {
+ r = 0;
+cs->request.resources = 0;
+  }
}
 bo_list_error:
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeonsi: set a per-buffer flag that disables inter-process sharing (v4)

2017-08-29 Thread Christian König
From: Marek Olšák 

For lower overhead in the CS ioctl.
Winsys allocators are not used with interprocess-sharable resources.

v2: It shouldn't crash anymore, but the kernel will reject the new flag.
v3 (christian): Rename the flag, avoid sending those buffers in the BO list.
v4 (christian): Remove setting the kernel flag for now
---
 src/gallium/drivers/radeon/r600_buffer_common.c |  7 ++
 src/gallium/drivers/radeon/radeon_winsys.h  | 20 +
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c   | 30 ++---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c   | 27 +-
 4 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index dd1c209..2747ac4 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -167,6 +167,13 @@ void r600_init_resource_fields(struct r600_common_screen 
*rscreen,
 RADEON_FLAG_GTT_WC;
}
 
+   /* Only displayable single-sample textures can be shared between
+* processes. */
+   if (res->b.b.target == PIPE_BUFFER ||
+   res->b.b.nr_samples >= 2 ||
+   rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY)
+   res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
/* If VRAM is just stolen system memory, allow both VRAM and
 * GTT, whichever has free space. If a buffer is evicted from
 * VRAM to GTT, it will stay there.
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index b00b144..f0a0a92 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -54,6 +54,7 @@ enum radeon_bo_flag { /* bitfield */
 RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
 RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
 RADEON_FLAG_SPARSE =(1 << 3),
+RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
 };
 
 enum radeon_bo_usage { /* bitfield */
@@ -661,14 +662,19 @@ static inline unsigned radeon_flags_from_heap(enum 
radeon_heap heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
-return RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS;
+return RADEON_FLAG_GTT_WC |
+   RADEON_FLAG_NO_CPU_ACCESS |
+   RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
 case RADEON_HEAP_VRAM:
 case RADEON_HEAP_VRAM_GTT:
 case RADEON_HEAP_GTT_WC:
-return RADEON_FLAG_GTT_WC;
+return RADEON_FLAG_GTT_WC |
+   RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
 case RADEON_HEAP_GTT:
 default:
-return 0;
+return RADEON_FLAG_NO_INTERPROCESS_SHARING;
 }
 }
 
@@ -700,8 +706,14 @@ static inline int radeon_get_heap_index(enum 
radeon_bo_domain domain,
 /* NO_CPU_ACCESS implies VRAM only. */
 assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == 
RADEON_DOMAIN_VRAM);
 
+/* Resources with interprocess sharing don't use any winsys allocators. */
+if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
+return -1;
+
 /* Unsupported flags: NO_SUBALLOC, SPARSE. */
-if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS))
+if (flags & ~(RADEON_FLAG_GTT_WC |
+  RADEON_FLAG_NO_CPU_ACCESS |
+  RADEON_FLAG_NO_INTERPROCESS_SHARING))
 return -1;
 
 switch (domain) {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 97bbe23..08348ed 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -1134,7 +1134,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
 {
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo;
-   unsigned usage = 0, pb_cache_bucket;
+   unsigned usage = 0, pb_cache_bucket = 0;
 
/* VRAM implies WC. This is not optional. */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
@@ -1189,19 +1189,23 @@ no_slab:
size = align64(size, ws->info.gart_page_size);
alignment = align(alignment, ws->info.gart_page_size);
 
-   int heap = radeon_get_heap_index(domain, flags);
-   assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
-   usage = 1 << heap; /* Only set one usage bit for each heap. */
+   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
-   pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
-   assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+   if (use_reusable_pool) {
+   int heap = radeon_get_heap_index(domain, flags);
+   assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
+   usage = 1 << heap; /* Only set one usage bit for each heap. */
 
-   /* Get a buffer from the cache. */
-   bo = (struct amdgpu_winsys_bo*)
-pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
-p

Re: [Mesa-dev] [PATCH 08/10] egl/wayland: remove dri2_surf width/height double init.

2017-08-29 Thread Eric Engestrom
On Sunday, 2017-08-27 11:20:33 +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> The dimensions are already set [to 0 or the value provided by the
> attributes list] by the _eglInitSurface() call further up.
> 
> The values are updated, as the DRI driver calls the DRI2/IMAGE_LOADER'
> get_buffers, shortly before making use of the values.
> 
> Signed-off-by: Emil Velikov 
> ---
>  src/egl/drivers/dri2/platform_wayland.c | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/src/egl/drivers/dri2/platform_wayland.c 
> b/src/egl/drivers/dri2/platform_wayland.c
> index f9554fccde9..89e94e11f4f 100644
> --- a/src/egl/drivers/dri2/platform_wayland.c
> +++ b/src/egl/drivers/dri2/platform_wayland.c
> @@ -198,9 +198,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, 
> _EGLDisplay *disp,
> dri2_surf->wl_win->private = dri2_surf;
> dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
>  
> -   dri2_surf->base.Width =  -1;
> -   dri2_surf->base.Height = -1;
> -

We actually have a local patch here that replaces this `-1`
initialisation with the dimensions of the wayland `window`:

8<
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 85a11f4..7719407 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -183,8 +183,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay 
*disp,
dri2_surf->wl_win->private = dri2_surf;
dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
 
-   dri2_surf->base.Width =  -1;
-   dri2_surf->base.Height = -1;
+   dri2_surf->base.Width  = window->width;
+   dri2_surf->base.Height = window->height;
 
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
 dri2_surf->base.GLColorspace);
>8

This apparently fixes dEQP crashes caused by an assert:
deRandom.hpp:93: Unknown function: Assertion `min <= max' failed.

No dEQP test is mentioned on our patch, but I think this happened on
dEQP-EGL.functional.resize.surface_size.*

> config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
>  dri2_surf->base.GLColorspace);
>  
> -- 
> 2.14.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR

2017-08-29 Thread Emil Velikov
On 28 August 2017 at 21:20, Brian Paul  wrote:
> If llvmpipe_set_scissor_states() is never called, we still need to be sure
> that derived scissor/clip state is updated.  As of commit 743ad599a97d09b1
> that function might not be called.
>
> Fixes regressed Piglit gl-1.0-scissor-offscreen -fbo -auto test.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101709
Please add a fixes and/or stable tag when pushing the patch.

Fixes: 743ad599a97 ("st/mesa: don't set 16 scissors and 16 viewports
if they're unused")
Cc: "17.2" 

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101851] [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101851

Emil Velikov  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #10 from Emil Velikov  ---
Thanks all. Merged as

commit 0ac78dc92582a59d4319ebce019b4caa41fb432d
Author: Emil Velikov 
Date:   Sat Aug 26 02:37:11 2017 +0100

util: move string_to_uint_map to glsl

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] winsys/amdgpu: add BO to the global list only when RADEON_ALL_BOS is set

2017-08-29 Thread Samuel Pitoiset
Only useful when that debug option is enabled.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 20 
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c |  4 +---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  3 +++
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  1 +
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 97bbe235a4..1323be8356 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -167,10 +167,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
 
assert(bo->bo && "must not be called for slab entries");
 
-   mtx_lock(&bo->ws->global_bo_list_lock);
-   LIST_DEL(&bo->u.real.global_list_item);
-   bo->ws->num_buffers--;
-   mtx_unlock(&bo->ws->global_bo_list_lock);
+   if (bo->ws->debug_all_bos) {
+  mtx_lock(&bo->ws->global_bo_list_lock);
+  LIST_DEL(&bo->u.real.global_list_item);
+  bo->ws->num_buffers--;
+  mtx_unlock(&bo->ws->global_bo_list_lock);
+   }
 
amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(bo->u.real.va_handle);
@@ -360,10 +362,12 @@ static void amdgpu_add_buffer_to_global_list(struct 
amdgpu_winsys_bo *bo)
 
assert(bo->bo);
 
-   mtx_lock(&ws->global_bo_list_lock);
-   LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
-   ws->num_buffers++;
-   mtx_unlock(&ws->global_bo_list_lock);
+   if (ws->debug_all_bos) {
+  mtx_lock(&ws->global_bo_list_lock);
+  LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list);
+  ws->num_buffers++;
+  mtx_unlock(&ws->global_bo_list_lock);
+   }
 }
 
 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 9cadfc4298..5ddde8e794 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -903,8 +903,6 @@ static unsigned amdgpu_cs_get_buffer_list(struct 
radeon_winsys_cs *rcs,
 return cs->num_real_buffers;
 }
 
-DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
-
 static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs,
 struct amdgpu_cs_buffer *buffer)
 {
@@ -1097,7 +1095,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
/* Create the buffer list.
 * Use a buffer list containing all allocated buffers if requested.
 */
-   if (debug_get_option_all_bos()) {
+   if (ws->debug_all_bos) {
   struct amdgpu_winsys_bo *bo;
   amdgpu_bo_handle *handles;
   unsigned num = 0;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index a8b5e5b41a..5e0c1fd8d8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -50,6 +50,8 @@
 static struct util_hash_table *dev_tab = NULL;
 static mtx_t dev_tab_mutex = _MTX_INITIALIZER_NP;
 
+DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
+
 /* Helper function to do the ioctls needed for setup and init. */
 static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
 {
@@ -70,6 +72,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
}
 
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != 
NULL;
+   ws->debug_all_bos = debug_get_option_all_bos();
 
return true;
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 7aca612f45..de54470ede 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -79,6 +79,7 @@ struct amdgpu_winsys {
ADDR_HANDLE addrlib;
 
bool check_vm;
+   bool debug_all_bos;
 
/* List of all allocated buffers */
mtx_t global_bo_list_lock;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Emil Velikov
On 29 August 2017 at 14:51, Lionel Landwerlin
 wrote:
> On 29/08/17 14:39, Emil Velikov wrote:
>>
>> Hi Lionel,
>>
>> On 29 August 2017 at 14:05, Lionel Landwerlin
>>  wrote:
>>>
>>> This allows us to deploy new configurations without touching the
>>> kernel.
>>>
>>> v2: Detect loadable configs without creating one (Chris)
>>>
>>> Signed-off-by: Lionel Landwerlin 
>>> ---
>>>   src/mesa/drivers/dri/i965/brw_performance_query.c | 98
>>> ++-
>>>   1 file changed, 95 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c
>>> b/src/mesa/drivers/dri/i965/brw_performance_query.c
>>> index 4b585c95b7d..6ec6b417e52 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
>>> @@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val)
>>>
>>>   fd = open(file, 0);
>>>   if (fd < 0)
>>> -   return false;
>>> +return false;
>>>   n = read(fd, buf, sizeof (buf) - 1);
>>>   close(fd);
>>>   if (n < 0)
>>> -   return false;
>>> +return false;
>>>
>>>   buf[n] = '\0';
>>>   *val = strtoull(buf, NULL, 0);
>>> @@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct
>>> brw_context *brw,
>>>  return read_file_uint64(buf, value);
>>>   }
>>>
>>> +static bool
>>> +kernel_has_dynamic_config_support(struct brw_context *brw,
>>> +  const char *sysfs_dev_dir)
>>> +{
>>> +   __DRIscreen *screen = brw->screen->driScrnPriv;
>>> +   struct hash_entry *entry;
>>> +
>>> +   hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
>>> +  struct brw_perf_query_info *query = entry->data;
>>> +  char config_path[256];
>>> +  uint64_t config_id;
>>> +
>>> +  snprintf(config_path, sizeof(config_path),
>>> +   "%s/metrics/%s/id", sysfs_dev_dir, query->guid);
>>> +
>>> +  /* Look for the test config, which we know we can't replace. */
>>> +  if (read_file_uint64(config_path, &config_id) && config_id == 1) {
>>> + uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
>>
>> The array seems to have a sentinel, yet one provides explicit array
>> size (of 1) further down.
>> Having a look through i915_drm.h the expectation is not documented.
>
>
> This is the number of pair of (register, value).
>
>>
>> Can I suggest adding a comment [in the kernel i915_drm.h], or dropping
>> the sentinel - whichever seems applicable.
>
>
> I guess I'll add a comment in the kernel.
>
>>
>>> + struct drm_i915_perf_oa_config config;
>>> +
>>
>> Unless I failed at math - the struct does not seem compat safe.
>> That is sizeof(struct drm_i915_perf_oa_config) varies across 32/64bit
>> builds.
>
>
> Hmm... What's not compat safe in there? :
>
> struct drm_i915_perf_oa_config {
> /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
> char uuid[36];
>
> __u32 n_mux_regs;
> __u32 n_boolean_regs;
> __u32 n_flex_regs;
>
> __u64 mux_regs_ptr;
> __u64 boolean_regs_ptr;
> __u64 flex_regs_ptr;
> };
>
Scratch that - I misread 36 as 32, which would have left a 4 byte
whole in 64bit builds.
Pardon for the noise.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] st/query: init result data with 0

2017-08-29 Thread Karol Herbst
yeah, I already send out a "doing it in the driver" patch for this:
https://lists.freedesktop.org/archives/mesa-dev/2017-August/167798.html

On Mon, Aug 28, 2017 at 10:22 PM, Marek Olšák  wrote:
> You can also call util_query_clear_result in the driver.
>
> Marek
>
> On Sat, Aug 26, 2017 at 1:15 AM, Karol Herbst  wrote:
>> otherwise the result might contain random data.
>>
>> fixes on nvc0:
>>  * KHR-GL45.pipeline_statistics_query_tests_ARB.functional_default_qo_values
>>  * 
>> KHR-GL45.pipeline_statistics_query_tests_ARB.functional_non_rendering_commands_do_not_affect_queries
>>
>> Signed-off-by: Karol Herbst 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/mesa/state_tracker/st_cb_queryobj.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
>> b/src/mesa/state_tracker/st_cb_queryobj.c
>> index 4c25724b5d..9a65fe7bd9 100644
>> --- a/src/mesa/state_tracker/st_cb_queryobj.c
>> +++ b/src/mesa/state_tracker/st_cb_queryobj.c
>> @@ -211,7 +211,7 @@ get_query_result(struct pipe_context *pipe,
>>   struct st_query_object *stq,
>>   boolean wait)
>>  {
>> -   union pipe_query_result data;
>> +   union pipe_query_result data = { 0 };
>>
>> if (!stq->pq) {
>>/* Only needed in case we failed to allocate the gallium query 
>> earlier.
>> --
>> 2.14.1
>>
>> ___
>> mesa-stable mailing list
>> mesa-sta...@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Lionel Landwerlin

On 29/08/17 14:39, Emil Velikov wrote:

Hi Lionel,

On 29 August 2017 at 14:05, Lionel Landwerlin
 wrote:

This allows us to deploy new configurations without touching the
kernel.

v2: Detect loadable configs without creating one (Chris)

Signed-off-by: Lionel Landwerlin 
---
  src/mesa/drivers/dri/i965/brw_performance_query.c | 98 ++-
  1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 4b585c95b7d..6ec6b417e52 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val)

  fd = open(file, 0);
  if (fd < 0)
-   return false;
+return false;
  n = read(fd, buf, sizeof (buf) - 1);
  close(fd);
  if (n < 0)
-   return false;
+return false;

  buf[n] = '\0';
  *val = strtoull(buf, NULL, 0);
@@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct brw_context 
*brw,
 return read_file_uint64(buf, value);
  }

+static bool
+kernel_has_dynamic_config_support(struct brw_context *brw,
+  const char *sysfs_dev_dir)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct hash_entry *entry;
+
+   hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
+  struct brw_perf_query_info *query = entry->data;
+  char config_path[256];
+  uint64_t config_id;
+
+  snprintf(config_path, sizeof(config_path),
+   "%s/metrics/%s/id", sysfs_dev_dir, query->guid);
+
+  /* Look for the test config, which we know we can't replace. */
+  if (read_file_uint64(config_path, &config_id) && config_id == 1) {
+ uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };

The array seems to have a sentinel, yet one provides explicit array
size (of 1) further down.
Having a look through i915_drm.h the expectation is not documented.


This is the number of pair of (register, value).



Can I suggest adding a comment [in the kernel i915_drm.h], or dropping
the sentinel - whichever seems applicable.


I guess I'll add a comment in the kernel.




+ struct drm_i915_perf_oa_config config;
+

Unless I failed at math - the struct does not seem compat safe.
That is sizeof(struct drm_i915_perf_oa_config) varies across 32/64bit builds.


Hmm... What's not compat safe in there? :

struct drm_i915_perf_oa_config {
/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
char uuid[36];

__u32 n_mux_regs;
__u32 n_boolean_regs;
__u32 n_flex_regs;

__u64 mux_regs_ptr;
__u64 boolean_regs_ptr;
__u64 flex_regs_ptr;
};




Some padding should fix it.

HTH
Emil



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util: move string_to_uint_map to glsl

2017-08-29 Thread Emil Velikov
On 29 August 2017 at 11:41, Mike Lothian  wrote:
> Feel free to add my tested by for this
>
Great, thanks. Pushed to master.
Will scoop for 17.2 in a moment.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Emil Velikov
Hi Lionel,

On 29 August 2017 at 14:05, Lionel Landwerlin
 wrote:
> This allows us to deploy new configurations without touching the
> kernel.
>
> v2: Detect loadable configs without creating one (Chris)
>
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 98 
> ++-
>  1 file changed, 95 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 4b585c95b7d..6ec6b417e52 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val)
>
>  fd = open(file, 0);
>  if (fd < 0)
> -   return false;
> +return false;
>  n = read(fd, buf, sizeof (buf) - 1);
>  close(fd);
>  if (n < 0)
> -   return false;
> +return false;
>
>  buf[n] = '\0';
>  *val = strtoull(buf, NULL, 0);
> @@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct brw_context 
> *brw,
> return read_file_uint64(buf, value);
>  }
>
> +static bool
> +kernel_has_dynamic_config_support(struct brw_context *brw,
> +  const char *sysfs_dev_dir)
> +{
> +   __DRIscreen *screen = brw->screen->driScrnPriv;
> +   struct hash_entry *entry;
> +
> +   hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
> +  struct brw_perf_query_info *query = entry->data;
> +  char config_path[256];
> +  uint64_t config_id;
> +
> +  snprintf(config_path, sizeof(config_path),
> +   "%s/metrics/%s/id", sysfs_dev_dir, query->guid);
> +
> +  /* Look for the test config, which we know we can't replace. */
> +  if (read_file_uint64(config_path, &config_id) && config_id == 1) {
> + uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
The array seems to have a sentinel, yet one provides explicit array
size (of 1) further down.
Having a look through i915_drm.h the expectation is not documented.

Can I suggest adding a comment [in the kernel i915_drm.h], or dropping
the sentinel - whichever seems applicable.

> + struct drm_i915_perf_oa_config config;
> +
Unless I failed at math - the struct does not seem compat safe.
That is sizeof(struct drm_i915_perf_oa_config) varies across 32/64bit builds.

Some padding should fix it.

HTH
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/19] configure: Add and use AX_CHECK_COMPILE_FLAG

2017-08-29 Thread Emil Velikov
On 28 August 2017 at 22:57, Matt Turner  wrote:

> -# Flags to help ensure that certain portions of the code -- and only 
> those
> -# portions -- can be built with MSVC:
> -# - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
> -# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
> -#   supports most of C99)
> -# - the rest has no compiler compiler restrictions
> -MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith"
> -MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith"
> -
Can you please preserve the comment about the MSVC2013 flags?
With that the series is
Reviewed-by: Emil Velikov 

Mildly curious:
Did you pipe the series through ezbench for perf testing?
Wondering if the inline and fabsf -> babs changes made any noticeable impact.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] khronos/egl: remove dependency on Android NDK header

2017-08-29 Thread Eric Engestrom
On Friday, 2017-08-25 16:47:35 +0100, Emil Velikov wrote:
> On 24 August 2017 at 15:22, Eric Engestrom  wrote:
> > On Thursday, 2017-08-24 08:54:04 -0500, Rob Herring wrote:
> >> On Thu, Aug 24, 2017 at 7:49 AM, Eric Engestrom
> >>  wrote:
> >> > Khronos: https://github.com/KhronosGroup/EGL-Registry/pull/22
> >> > Cc: Rob Herring 
> >> > Cc: Emil Velikov 
> >> > Signed-off-by: Eric Engestrom 
> >> > ---
> >> >  include/EGL/eglplatform.h | 3 +--
> >> >  1 file changed, 1 insertion(+), 2 deletions(-)
> >> >
> >> > diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h
> >> > index f045d009c0..bf9ec0bf5f 100644
> >> > --- a/include/EGL/eglplatform.h
> >> > +++ b/include/EGL/eglplatform.h
> >> > @@ -97,8 +97,7 @@ typedef void   *EGLNativeWindowType;
> >> >
> >> >  #elif defined(__ANDROID__) || defined(ANDROID)
> >> >
> >> > -#include 
> >> > -
> >> > +struct ANativeWindow;
> >> >  struct egl_native_pixmap_t;
> >>
> >> How does this work when we need to dereference the struct to call
> >> ANativeWindow::dequeueBuffer() and others?
> >
> > Right, there are two things at play here:
> > - eglplatform.h doesn't need to know the struct, so it shouldn't include
> >   a whole header but simply forward declare for the pointer.
> > - platform_android does need it, but wasn't including the proper
> >   headers, so I missed it in my initial grep.
> >
> > It seems these two issues are orthogonal after all. This khronos header
> > patch should land IMO, but platform_android needs a patch to avoid
> > breaking (incoming) and will need the proper libraries that your patch
> > 2/2 provides for the O update.
> >
> Thanks guys, for the correction.
> The two "issues" seem the same but not.
> 
> On this patch - Eric, can you please mention in the commit message why
> we don't copy the whole file.

I pushed it a couple hours before your email, sorry.
/me needs to learn not to push stuff right away, let it sit for a bit...

As for "why we don't copy the whole file", we essentially do, but we
keep our local platforms additions (WL, GBM, X11 on MacOS, and Haiku),
right? Or is there something else?

> 
> With that:
> Reviewed-by: Emil Velikov 
> 
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/5] i965: perf: list registers to program for queries

2017-08-29 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h | 15 ++
 src/mesa/drivers/dri/i965/brw_oa.py | 51 +
 2 files changed, 66 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 8782de814c7..5508aaf6128 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -603,6 +603,11 @@ enum brw_query_kind {
PIPELINE_STATS
 };
 
+struct brw_perf_query_register_prog {
+   uint32_t reg;
+   uint32_t val;
+};
+
 struct brw_perf_query_info
 {
enum brw_query_kind kind;
@@ -622,6 +627,16 @@ struct brw_perf_query_info
int a_offset;
int b_offset;
int c_offset;
+
+   /* Register programming for a given query */
+   struct brw_perf_query_register_prog *flex_regs;
+   uint32_t n_flex_regs;
+
+   struct brw_perf_query_register_prog *mux_regs;
+   uint32_t n_mux_regs;
+
+   struct brw_perf_query_register_prog *b_counter_regs;
+   uint32_t n_b_counter_regs;
 };
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 8c35923462f..576ea6687fc 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -380,6 +380,45 @@ def output_counter_report(set, counter, current_offset):
 return current_offset + sizeof(c_type)
 
 
+register_types = {
+'FLEX': 'flex_regs',
+'NOA': 'mux_regs',
+'OA': 'b_counter_regs',
+}
+
+def compute_register_lengths(set):
+register_lengths = {}
+register_configs = set.findall('register_config')
+for register_config in register_configs:
+t = register_types[register_config.get('type')]
+if t not in register_lengths:
+register_lengths[t] = len(register_config.findall('register'))
+else:
+register_lengths[t] += len(register_config.findall('register'))
+
+return register_lengths
+
+
+def generate_register_configs(set):
+register_configs = set.findall('register_config')
+for register_config in register_configs:
+t = register_types[register_config.get('type')]
+
+availability = register_config.get('availability')
+if availability:
+output_availability(set, availability, register_config.get('type') 
+ ' register config')
+c_indent(3)
+
+for register in register_config.findall('register'):
+c("query->%s[query->n_%s++] = (struct 
brw_perf_query_register_prog) { .reg = %s, .val = %s };" %
+  (t, t, register.get('address'), register.get('value')))
+
+if availability:
+c_outdent(3)
+c("}")
+c("\n")
+
+
 def main():
 global c_file
 global header_file
@@ -475,6 +514,12 @@ def main():
 max_values[counter.get('symbol_name')] = output_counter_max(set, 
counter, empty_vars)
 counter_vars["$" + counter.get('symbol_name')] = counter
 
+c("\n")
+register_lengths = compute_register_lengths(set);
+for reg_type, reg_length in register_lengths.iteritems():
+c("static struct brw_perf_query_register_prog 
{0}_{1}_{2}[{3}];".format(chipset,
+   
 set.get('underscore_name'),
+   
 reg_type, reg_length))
 
 c("\nstatic struct brw_perf_query_counter 
{0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), 
len(counters)))
 c("static struct brw_perf_query_info " + chipset + "_" + 
set.get('underscore_name') + "_query = {\n")
@@ -510,6 +555,10 @@ def main():
 .c_offset = 46,
 """))
 
+for reg_type, reg_length in register_lengths.iteritems():
+c(".{0} = {1}_{2}_{3},".format(reg_type, chipset, 
set.get('underscore_name'), reg_type))
+c(".n_{0} = 0, /* Determined at runtime */".format(reg_type))
+
 c_outdent(3)
 c("};\n")
 
@@ -528,6 +577,8 @@ def main():
 c("\nif (!query->data_size) {")
 c_indent(3)
 
+generate_register_configs(set)
+
 offset = 0
 for counter in counters:
 offset = output_counter_report(set, counter, offset)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/5] i965: add a debug option to disable oa config loading

2017-08-29 Thread Lionel Landwerlin
This provides a good way to verify we haven't broken using the perf
driver on older kernels (which don't have the oa config loading
mechanism).

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_debug.c  | 1 +
 src/intel/common/gen_debug.h  | 2 +-
 src/mesa/drivers/dri/i965/brw_performance_query.c | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index b604d56ef86..a17d628f577 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -57,6 +57,7 @@ static const struct debug_control debug_control[] = {
{ "vert",DEBUG_VERTS },
{ "dri", DEBUG_DRI },
{ "sf",  DEBUG_SF },
+   { "no-oaconfig", DEBUG_NO_OACONFIG },
{ "wm",  DEBUG_WM },
{ "urb", DEBUG_URB },
{ "vs",  DEBUG_VS },
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index d290303682e..8c8cce4f846 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -57,7 +57,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_VERTS   (1ull << 13)
 #define DEBUG_DRI (1ull << 14)
 #define DEBUG_SF  (1ull << 15)
-/* Hole - feel free to reuse  (1ull << 16) */
+#define DEBUG_NO_OACONFIG (1ull << 16)
 #define DEBUG_WM  (1ull << 17)
 #define DEBUG_URB (1ull << 18)
 #define DEBUG_VS  (1ull << 19)
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 6ec6b417e52..34e79f398ab 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -2143,7 +2143,8 @@ brw_init_perf_query_info(struct gl_context *ctx)
*/
   oa_register(brw);
 
-  if (kernel_has_dynamic_config_support(brw, sysfs_dev_dir))
+  if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
+  kernel_has_dynamic_config_support(brw, sysfs_dev_dir))
  init_oa_configs(brw, sysfs_dev_dir);
   else
  enumerate_sysfs_metrics(brw, sysfs_dev_dir);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/5] i965: perf: factorize code for availability

2017-08-29 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_oa.py | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 91f7ecb5731..8c35923462f 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -325,6 +325,21 @@ semantic_type_map = {
 "ratio": "event"
 }
 
+def output_availability(set, availability, counter_name):
+expression = splice_rpn_expression(set, counter_name, availability)
+lines = expression.split(' && ')
+n_lines = len(lines)
+if n_lines == 1:
+c("if (" + lines[0] + ") {")
+else:
+c("if (" + lines[0] + " &&")
+c_indent(4)
+for i in range(1, (n_lines - 1)):
+c(lines[i] + " &&")
+c(lines[(n_lines - 1)] + ") {")
+c_outdent(4)
+
+
 def output_counter_report(set, counter, current_offset):
 data_type = counter.get('data_type')
 data_type_uc = data_type.upper()
@@ -343,18 +358,7 @@ def output_counter_report(set, counter, current_offset):
 
 availability = counter.get('availability')
 if availability:
-expression = splice_rpn_expression(set, counter, availability)
-lines = expression.split(' && ')
-n_lines = len(lines)
-if n_lines == 1:
-c("if (" + lines[0] + ") {")
-else:
-c("if (" + lines[0] + " &&")
-c_indent(4)
-for i in range(1, (n_lines - 1)):
-c(lines[i] + " &&")
-c(lines[(n_lines - 1)] + ") {")
-c_outdent(4)
+output_availability(set, availability, counter.get('name'))
 c_indent(3)
 
 c("counter = &query->counters[query->n_counters++];\n")
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Lionel Landwerlin
This allows us to deploy new configurations without touching the
kernel.

v2: Detect loadable configs without creating one (Chris)

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 98 ++-
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 4b585c95b7d..6ec6b417e52 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1717,11 +1717,11 @@ read_file_uint64(const char *file, uint64_t *val)
 
 fd = open(file, 0);
 if (fd < 0)
-   return false;
+return false;
 n = read(fd, buf, sizeof (buf) - 1);
 close(fd);
 if (n < 0)
-   return false;
+return false;
 
 buf[n] = '\0';
 *val = strtoull(buf, NULL, 0);
@@ -1807,6 +1807,95 @@ read_sysfs_drm_device_file_uint64(struct brw_context 
*brw,
return read_file_uint64(buf, value);
 }
 
+static bool
+kernel_has_dynamic_config_support(struct brw_context *brw,
+  const char *sysfs_dev_dir)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct hash_entry *entry;
+
+   hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
+  struct brw_perf_query_info *query = entry->data;
+  char config_path[256];
+  uint64_t config_id;
+
+  snprintf(config_path, sizeof(config_path),
+   "%s/metrics/%s/id", sysfs_dev_dir, query->guid);
+
+  /* Look for the test config, which we know we can't replace. */
+  if (read_file_uint64(config_path, &config_id) && config_id == 1) {
+ uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
+ struct drm_i915_perf_oa_config config;
+
+ memset(&config, 0, sizeof(config));
+
+ memcpy(config.uuid, query->guid, sizeof(config.uuid));
+
+ config.n_mux_regs = 1;
+ config.mux_regs_ptr = (uintptr_t) mux_regs;
+
+ if (ioctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) 
< 0 &&
+ errno == ENOENT)
+return true;
+
+ break;
+  }
+   }
+
+   return false;
+}
+
+static void
+init_oa_configs(struct brw_context *brw, const char *sysfs_dev_dir)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct hash_entry *entry;
+
+   hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
+  struct brw_perf_query_info *query = entry->data;
+  struct drm_i915_perf_oa_config config;
+  char config_path[256];
+  uint64_t config_id;
+  int ret;
+
+  snprintf(config_path, sizeof(config_path),
+   "%s/metrics/%s/id", sysfs_dev_dir, query->guid);
+
+  if (read_file_uint64(config_path, &config_id)) {
+ if (config_id <= 1)
+continue;
+
+ ioctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id);
+  }
+
+  memset(&config, 0, sizeof(config));
+
+  memcpy(config.uuid, query->guid, sizeof(config.uuid));
+
+  config.n_mux_regs = query->n_mux_regs;
+  config.mux_regs_ptr = (uintptr_t) query->mux_regs;
+
+  config.n_boolean_regs = query->n_b_counter_regs;
+  config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs;
+
+  config.n_flex_regs = query->n_flex_regs;
+  config.flex_regs_ptr = (uintptr_t) query->flex_regs;
+
+  ret = ioctl(screen->fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
+  if (ret < 0) {
+ DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
+ query->name, query->guid, strerror(errno));
+  } else {
+ struct brw_perf_query_info *registred_query =
+append_query_info(brw);
+ *registred_query = *query;
+ registred_query->oa_metrics_set_id = ret;
+ DBG("metric set registred: id = %" PRIu64", guid = %s\n",
+ registred_query->oa_metrics_set_id, query->guid);
+  }
+   }
+}
+
 static bool
 init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir)
 {
@@ -2054,7 +2143,10 @@ brw_init_perf_query_info(struct gl_context *ctx)
*/
   oa_register(brw);
 
-  enumerate_sysfs_metrics(brw, sysfs_dev_dir);
+  if (kernel_has_dynamic_config_support(brw, sysfs_dev_dir))
+ init_oa_configs(brw, sysfs_dev_dir);
+  else
+ enumerate_sysfs_metrics(brw, sysfs_dev_dir);
}
 
brw->perfquery.unaccumulated =
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/5] i965: add support for loadable OA configs

2017-08-29 Thread Lionel Landwerlin
Hi again,

Improvement on detection of loadable configs as suggested by Chris.
Change is in patch 4.

Cheers,

Lionel Landwerlin (5):
  i965: perf: make revision variable available
  i965: perf: factorize code for availability
  i965: perf: list registers to program for queries
  i965: perf: add support for userspace configurations
  i965: add a debug option to disable oa config loading

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   2 +-
 src/mesa/drivers/dri/i965/brw_context.h   |  16 
 src/mesa/drivers/dri/i965/brw_oa.py   |  80 ++---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 103 +-
 src/mesa/drivers/dri/i965/intel_screen.c  |   9 +-
 src/mesa/drivers/dri/i965/intel_screen.h  |   3 +
 7 files changed, 190 insertions(+), 24 deletions(-)

--
2.14.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/5] i965: perf: make revision variable available

2017-08-29 Thread Lionel Landwerlin
This will be used in the next commit to build up register programming.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h   | 1 +
 src/mesa/drivers/dri/i965/brw_oa.py   | 1 +
 src/mesa/drivers/dri/i965/brw_performance_query.c | 4 +++-
 src/mesa/drivers/dri/i965/intel_screen.c  | 9 ++---
 src/mesa/drivers/dri/i965/intel_screen.h  | 3 +++
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 2274fe5c80e..8782de814c7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1104,6 +1104,7 @@ struct brw_context
  uint64_t subslice_mask;   /** $SubsliceMask */
  uint64_t gt_min_freq; /** $GpuMinFrequency */
  uint64_t gt_max_freq; /** $GpuMaxFrequency */
+ uint64_t revision;/** $SkuRevisionId */
   } sys_vars;
 
   /* OA metric sets, indexed by GUID, as know by Mesa at build time,
diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 254c512a7da..91f7ecb5731 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -175,6 +175,7 @@ hw_vars["$SubsliceMask"] = 
"brw->perfquery.sys_vars.subslice_mask"
 hw_vars["$GpuTimestampFrequency"] = 
"brw->perfquery.sys_vars.timestamp_frequency"
 hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq"
 hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq"
+hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision"
 
 def output_rpn_equation_code(set, counter, equation, counter_vars):
 c("/* RPN equation: " + equation + " */")
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d8680b48793..4b585c95b7d 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1812,6 +1812,7 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
 {
const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
+   __DRIscreen *screen = brw->screen->driScrnPriv;
 
if (!read_sysfs_drm_device_file_uint64(brw, sysfs_dev_dir,
   "gt_min_freq_mhz",
@@ -1826,6 +1827,8 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100;
brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100;
brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
+
+   brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd);
brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices;
/* Assuming uniform distribution of subslices per slices. */
brw->perfquery.sys_vars.n_eu_sub_slices = devinfo->num_subslices[0];
@@ -1848,7 +1851,6 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
   } else
  unreachable("not reached");
} else {
-  __DRIscreen *screen = brw->screen->driScrnPriv;
   drm_i915_getparam_t gp;
   int ret;
   int slice_mask = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index f98475997d3..3d617f99436 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2052,14 +2052,9 @@ set_max_gl_versions(struct intel_screen *screen)
 /**
  * Return the revision (generally the revid field of the PCI header) of the
  * graphics device.
- *
- * XXX: This function is useful to keep around even if it is not currently in
- * use. It is necessary for new platforms and revision specific workarounds or
- * features. Please don't remove it so that we know it at least continues to
- * build.
  */
-static __attribute__((__unused__)) int
-brw_get_revision(int fd)
+int
+intel_device_get_revision(int fd)
 {
struct drm_i915_getparam gp;
int revision;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h 
b/src/mesa/drivers/dri/i965/intel_screen.h
index 41e1dbdd4e9..7948617b7f0 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -136,6 +136,9 @@ double get_time(void);
 const int*
 intel_supported_msaa_modes(const struct intel_screen  *screen);
 
+int
+intel_device_get_revision(int fd);
+
 static inline bool
 can_do_pipelined_register_writes(const struct intel_screen *screen)
 {
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Question about implementing viewport transfer and const load in nir

2017-08-29 Thread Rob Clark
On Sat, Aug 26, 2017 at 9:40 AM, Qiang Yu  wrote:
> Hi guys,
>
> When working on lima gp compiler, I come across two problems about
> inserting extra uniform
> and instructions in nir for the driver and don't know where's the
> right place to do it. So I'd like
> to hear your opinion and if there's other driver already did so.
>
> 1. viewport transfer
> lima gp needs embed viewport transfer into vertex shader, so need to
> insert a uniform which
> holds the scale and transfer and some instruction to apply the
> calculation to the gl_Position
> varying output. If do this in driver callback create_vs_state(), seems
> won't affect the state
> tracker to allocate uniform space for it, maybe add something in
> st_create_vp_variant()?
>
> 2. const load
> lima gp needs const be loaded just as uniform, so I have to allocate
> uniform space for them.
> Besides the same problem as 1 (where to do it), the const node may be
> eliminated after driver
> nir optimization and won't have a base filed as uniform.
>
> Seems some of these (space allocation) need be done in non-driver
> layer and some (instruction
> insertion and uniform base assign to const) can be done in driver.
>
> BTW. lima gp can only have one uniform buffer, so I can't just use a
> dedicated uniform buffer
> for viewport transfer and const uniform.
>

btw, does lima have some way to write to memory from cmdstream (ie.
without setting up a full blown draw)?  If so perhaps you could get
away with leaving some extra space at the end of your uniform buffer
that you copy driver internal uniforms into before kicking the draw?

fwiw, freedreno does have driver specific uniforms, see
ir3_driver_param.  Although normal uniforms (ie. non-UBO) get copied
into internal memory, so I just upload driver specific uniforms (as
needed) and immediates to the tail of the uniform memory before the
draw.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102466] gallium/auxiliary/util/u_cpu_detect.c: 2 * bad condition ?

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102466

--- Comment #3 from Emil Velikov  ---
(In reply to dcb314 from comment #2)

> I think you missed the point. Any expression ANDed with 6 will be != 6.
> 
How about when xgetbv() returns say 6 or 7 (amongst others)?

Also keep in mind that the file depends on multiple defines. Based on a _very_
old play with Mesa + cppcheck, the latter was tripping quite often, producing
false positives.

The OS/compiler request does not sound that unreasonable ;-)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102466] gallium/auxiliary/util/u_cpu_detect.c: 2 * bad condition ?

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102466

--- Comment #2 from dcb...@hotmail.com ---
(In reply to Gert Wollny from comment #1)
> It would be helpful to add on which OS and with which compiler and for what
> architecture you were compiling mesa, because depending on the build
> environment xgetbv() may simply return 0 as a fallback (see lines 259-277 in
> the same file).

I think you missed the point. Any expression ANDed with 6 will be != 6.

OS, compiler and arch independent. For the record, Linux, gcc and x86_64,
although I used a static analyser called cppcheck to find this bug.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Lionel Landwerlin

On 29/08/17 12:15, Chris Wilson wrote:

Quoting Lionel Landwerlin (2017-08-29 11:58:57)

This allows us to deploy new configurations without touching the
kernel.

Signed-off-by: Lionel Landwerlin 
---
  src/mesa/drivers/dri/i965/brw_performance_query.c | 97 ++-
  1 file changed, 96 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 4b585c95b7d..901cbb464e9 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1807,6 +1807,96 @@ read_sysfs_drm_device_file_uint64(struct brw_context 
*brw,
 return read_file_uint64(buf, value);
  }
  
+static bool

+kernel_has_dynamic_config_support(int drm_fd, const char *sysfs_dev_dir)
+{
+struct drm_i915_perf_oa_config config;
+const char *uuid = "01234567-0123-0123-0123-0123456789ab";
+uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
+char config_path[256];
+struct stat sb;
+uint64_t config_id;
+int ret;
+
+snprintf(config_path, sizeof(config_path),
+ "%s/metrics/%s/id", sysfs_dev_dir, uuid);
+
+if (stat(config_path, &sb) == 0) {
+   if (!read_file_uint64(config_path, &config_id))
+  return false;
+
+   if (ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == 0)
+  return true;

So you acknowledge the race condition with another process also doing
the same test, or worse if they genuinely used that uuid for an actual
config.


Thanks, I should randomize that.




+}
+
+memset(&config, 0, sizeof(config));
+memcpy(config.uuid, uuid, sizeof(config.uuid));
+
+config.n_mux_regs = 1;
+config.mux_regs_ptr = (uintptr_t) mux_regs;
+
+/* Create a new config */
+ret = ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
+if (ret < 0)
+return false;

A trick you may like to try is to create an invalid config that
generates something other than EINVAL so that you can then distinguish
with the non-existent ioctl cmd error. Since you reserved id=0 for
invalid, a good choice would be

static bool has_dyn_config() {
uint64_t config_id = 0;
return sys_ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) 
== -ENOENT;
}


Thanks!


-Chris



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Chris Wilson
Quoting Lionel Landwerlin (2017-08-29 11:58:57)
> This allows us to deploy new configurations without touching the
> kernel.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 97 
> ++-
>  1 file changed, 96 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 4b585c95b7d..901cbb464e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -1807,6 +1807,96 @@ read_sysfs_drm_device_file_uint64(struct brw_context 
> *brw,
> return read_file_uint64(buf, value);
>  }
>  
> +static bool
> +kernel_has_dynamic_config_support(int drm_fd, const char *sysfs_dev_dir)
> +{
> +struct drm_i915_perf_oa_config config;
> +const char *uuid = "01234567-0123-0123-0123-0123456789ab";
> +uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
> +char config_path[256];
> +struct stat sb;
> +uint64_t config_id;
> +int ret;
> +
> +snprintf(config_path, sizeof(config_path),
> + "%s/metrics/%s/id", sysfs_dev_dir, uuid);
> +
> +if (stat(config_path, &sb) == 0) {
> +   if (!read_file_uint64(config_path, &config_id))
> +  return false;
> +
> +   if (ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == 0)
> +  return true;

So you acknowledge the race condition with another process also doing
the same test, or worse if they genuinely used that uuid for an actual
config.

> +}
> +
> +memset(&config, 0, sizeof(config));
> +memcpy(config.uuid, uuid, sizeof(config.uuid));
> +
> +config.n_mux_regs = 1;
> +config.mux_regs_ptr = (uintptr_t) mux_regs;
> +
> +/* Create a new config */
> +ret = ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
> +if (ret < 0)
> +return false;

A trick you may like to try is to create an invalid config that
generates something other than EINVAL so that you can then distinguish
with the non-existent ioctl cmd error. Since you reserved id=0 for
invalid, a good choice would be

static bool has_dyn_config() {
uint64_t config_id = 0;
return sys_ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) 
== -ENOENT;
}
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/10] egl: mixed bag of fixes/cleanups

2017-08-29 Thread Emil Velikov
[Dropping Neil's email, as it bounces]

On 29 August 2017 at 11:43, Daniel Stone  wrote:

   egl: allow RGB565 formats in eglCreateWaylandBufferFromImageWL
>>>
>>> I've been avoiding trying to expand use of this API, in all honesty.
>>>
>> Any particular reason?
>>
>> Quick search on the webs + my system shows no users of the extension,
>> apart from weston-nested.
>> Even there it's not a hard requirement.
>>
>> Any ideas if the extension was ever used [elsewhere], should we
>> consider deprecating it?
>
> I haven't seen much use of it, myself. It's also only really useful
> for wl_drm; I'd prefer to push zwp_linux_dmabuf instead. It also
> relies on a more perfect proxying of wl_drm than we necessarily
> actually implement.
>
Right. So I guess as weston-nested is updated to use wl_dmabuf we
could sketch an evil plan ;-)


   egl: add juicy comment about WL_bind_wayland_display +
 wl_drm/wl_dmabuf
>>>
>>> It depends on a couple of things, really. Firstly, on Mesa's general
>>> feature deprecation schedule. Secondly, on external support
>>> (Weston/Mutter/Enlightenment are fine, no support yet in
>>> KWin/WLC/Smithay/...). Thirdly, on how cleanly we can separate it. If
>>> we can break out all the BindWaylandDisplay code more than we already
>>> have, it might be easier to maintain. And lastly, on NVIDIA: I believe
>>> being a Streams provider requires BindWaylandDisplay, so we might need
>>> that to be replaced before we can rip it out of Mesa.
>>>
>>> Having the comment expanded to cover these might be nice.
>>
>> Fully agree on all points, although I think the priority is in the opposite 
>> way.
>> But above all does the extension make sense, or even yet, can it be
>> implemented with wl_dmabuf?
>
> The one thing it provides over and above wl_dmabuf is device
> advertisement, to guide the implementation towards a particular GPU.
> So maybe it makes sense to keep it just for that, purely for render
> nodes.
>
Ack, so it'll be a fairly trivial implementation.

> Also VA-API still uses wl_drm, so we need to wean that away from it first.
>
Right, that's why I flipped the order ;-) Although they are
considering breaking the ABI with the v2.0 branch.

For which I have some sweet patches - purge wl_drm, forbid use of card nodes...
Need to finish that one of the of these days.


   egl/wayland: group wl_win specific code together
   egl/wayland: make sure HAS_$FORMAT is set for wl_dmabuf
>>>
>>> Reviewed-by: Daniel Stone 
>>>
>> As above - I'm guessing the r-b stands for both patches?
>
> Yep!
>
Ack.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102466] gallium/auxiliary/util/u_cpu_detect.c: 2 * bad condition ?

2017-08-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102466

--- Comment #1 from Gert Wollny  ---
It would be helpful to add on which OS and with which compiler and for what
architecture you were compiling mesa, because depending on the build
environment xgetbv() may simply return 0 as a fallback (see lines 259-277 in
the same file).

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965: add a debug option to disable oa config loading

2017-08-29 Thread Lionel Landwerlin
This provides a good way to verify we haven't broken using the perf
driver on older kernels (which don't have the oa config loading
mechanism).

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_debug.c  | 1 +
 src/intel/common/gen_debug.h  | 2 +-
 src/mesa/drivers/dri/i965/brw_performance_query.c | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index b604d56ef86..a17d628f577 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -57,6 +57,7 @@ static const struct debug_control debug_control[] = {
{ "vert",DEBUG_VERTS },
{ "dri", DEBUG_DRI },
{ "sf",  DEBUG_SF },
+   { "no-oaconfig", DEBUG_NO_OACONFIG },
{ "wm",  DEBUG_WM },
{ "urb", DEBUG_URB },
{ "vs",  DEBUG_VS },
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index d290303682e..8c8cce4f846 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -57,7 +57,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_VERTS   (1ull << 13)
 #define DEBUG_DRI (1ull << 14)
 #define DEBUG_SF  (1ull << 15)
-/* Hole - feel free to reuse  (1ull << 16) */
+#define DEBUG_NO_OACONFIG (1ull << 16)
 #define DEBUG_WM  (1ull << 17)
 #define DEBUG_URB (1ull << 18)
 #define DEBUG_VS  (1ull << 19)
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 901cbb464e9..16e356e70b2 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -2146,7 +2146,8 @@ brw_init_perf_query_info(struct gl_context *ctx)
*/
   oa_register(brw);
 
-  if (kernel_has_dynamic_config_support(screen->fd, sysfs_dev_dir))
+  if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
+  kernel_has_dynamic_config_support(screen->fd, sysfs_dev_dir))
  init_oa_configs(brw, sysfs_dev_dir);
   else
  enumerate_sysfs_metrics(brw, sysfs_dev_dir);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] i965: perf: list registers to program for queries

2017-08-29 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h | 15 ++
 src/mesa/drivers/dri/i965/brw_oa.py | 51 +
 2 files changed, 66 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 8782de814c7..5508aaf6128 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -603,6 +603,11 @@ enum brw_query_kind {
PIPELINE_STATS
 };
 
+struct brw_perf_query_register_prog {
+   uint32_t reg;
+   uint32_t val;
+};
+
 struct brw_perf_query_info
 {
enum brw_query_kind kind;
@@ -622,6 +627,16 @@ struct brw_perf_query_info
int a_offset;
int b_offset;
int c_offset;
+
+   /* Register programming for a given query */
+   struct brw_perf_query_register_prog *flex_regs;
+   uint32_t n_flex_regs;
+
+   struct brw_perf_query_register_prog *mux_regs;
+   uint32_t n_mux_regs;
+
+   struct brw_perf_query_register_prog *b_counter_regs;
+   uint32_t n_b_counter_regs;
 };
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 8c35923462f..576ea6687fc 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -380,6 +380,45 @@ def output_counter_report(set, counter, current_offset):
 return current_offset + sizeof(c_type)
 
 
+register_types = {
+'FLEX': 'flex_regs',
+'NOA': 'mux_regs',
+'OA': 'b_counter_regs',
+}
+
+def compute_register_lengths(set):
+register_lengths = {}
+register_configs = set.findall('register_config')
+for register_config in register_configs:
+t = register_types[register_config.get('type')]
+if t not in register_lengths:
+register_lengths[t] = len(register_config.findall('register'))
+else:
+register_lengths[t] += len(register_config.findall('register'))
+
+return register_lengths
+
+
+def generate_register_configs(set):
+register_configs = set.findall('register_config')
+for register_config in register_configs:
+t = register_types[register_config.get('type')]
+
+availability = register_config.get('availability')
+if availability:
+output_availability(set, availability, register_config.get('type') 
+ ' register config')
+c_indent(3)
+
+for register in register_config.findall('register'):
+c("query->%s[query->n_%s++] = (struct 
brw_perf_query_register_prog) { .reg = %s, .val = %s };" %
+  (t, t, register.get('address'), register.get('value')))
+
+if availability:
+c_outdent(3)
+c("}")
+c("\n")
+
+
 def main():
 global c_file
 global header_file
@@ -475,6 +514,12 @@ def main():
 max_values[counter.get('symbol_name')] = output_counter_max(set, 
counter, empty_vars)
 counter_vars["$" + counter.get('symbol_name')] = counter
 
+c("\n")
+register_lengths = compute_register_lengths(set);
+for reg_type, reg_length in register_lengths.iteritems():
+c("static struct brw_perf_query_register_prog 
{0}_{1}_{2}[{3}];".format(chipset,
+   
 set.get('underscore_name'),
+   
 reg_type, reg_length))
 
 c("\nstatic struct brw_perf_query_counter 
{0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), 
len(counters)))
 c("static struct brw_perf_query_info " + chipset + "_" + 
set.get('underscore_name') + "_query = {\n")
@@ -510,6 +555,10 @@ def main():
 .c_offset = 46,
 """))
 
+for reg_type, reg_length in register_lengths.iteritems():
+c(".{0} = {1}_{2}_{3},".format(reg_type, chipset, 
set.get('underscore_name'), reg_type))
+c(".n_{0} = 0, /* Determined at runtime */".format(reg_type))
+
 c_outdent(3)
 c("};\n")
 
@@ -528,6 +577,8 @@ def main():
 c("\nif (!query->data_size) {")
 c_indent(3)
 
+generate_register_configs(set)
+
 offset = 0
 for counter in counters:
 offset = output_counter_report(set, counter, offset)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] i965: perf: add support for userspace configurations

2017-08-29 Thread Lionel Landwerlin
This allows us to deploy new configurations without touching the
kernel.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 97 ++-
 1 file changed, 96 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 4b585c95b7d..901cbb464e9 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1807,6 +1807,96 @@ read_sysfs_drm_device_file_uint64(struct brw_context 
*brw,
return read_file_uint64(buf, value);
 }
 
+static bool
+kernel_has_dynamic_config_support(int drm_fd, const char *sysfs_dev_dir)
+{
+struct drm_i915_perf_oa_config config;
+const char *uuid = "01234567-0123-0123-0123-0123456789ab";
+uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
+char config_path[256];
+struct stat sb;
+uint64_t config_id;
+int ret;
+
+snprintf(config_path, sizeof(config_path),
+ "%s/metrics/%s/id", sysfs_dev_dir, uuid);
+
+if (stat(config_path, &sb) == 0) {
+   if (!read_file_uint64(config_path, &config_id))
+  return false;
+
+   if (ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id) == 0)
+  return true;
+}
+
+memset(&config, 0, sizeof(config));
+memcpy(config.uuid, uuid, sizeof(config.uuid));
+
+config.n_mux_regs = 1;
+config.mux_regs_ptr = (uintptr_t) mux_regs;
+
+/* Create a new config */
+ret = ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
+if (ret < 0)
+return false;
+
+config_id = ret;
+ioctl(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id);
+
+return true;
+}
+
+static void
+init_oa_configs(struct brw_context *brw, const char *sysfs_dev_dir)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct hash_entry *entry;
+
+   hash_table_foreach(brw->perfquery.oa_metrics_table, entry) {
+  struct brw_perf_query_info *query = entry->data;
+  struct drm_i915_perf_oa_config config;
+  char config_path[256];
+  uint64_t config_id;
+  int ret;
+
+  snprintf(config_path, sizeof(config_path),
+   "%s/metrics/%s/id", sysfs_dev_dir, query->guid);
+
+  if (read_file_uint64(config_path, &config_id)) {
+ if (config_id <= 1)
+continue;
+
+ ioctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id);
+  }
+
+  memset(&config, 0, sizeof(config));
+
+  memcpy(config.uuid, query->guid, sizeof(config.uuid));
+
+  config.n_mux_regs = query->n_mux_regs;
+  config.mux_regs_ptr = (uintptr_t) query->mux_regs;
+
+  config.n_boolean_regs = query->n_b_counter_regs;
+  config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs;
+
+  config.n_flex_regs = query->n_flex_regs;
+  config.flex_regs_ptr = (uintptr_t) query->flex_regs;
+
+  ret = ioctl(screen->fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
+  if (ret < 0) {
+ DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
+ query->name, query->guid, strerror(errno));
+  } else {
+ struct brw_perf_query_info *registred_query =
+append_query_info(brw);
+ *registred_query = *query;
+ registred_query->oa_metrics_set_id = ret;
+ DBG("metric set registred: id = %" PRIu64", guid = %s\n",
+ registred_query->oa_metrics_set_id, query->guid);
+  }
+   }
+}
+
 static bool
 init_oa_sys_vars(struct brw_context *brw, const char *sysfs_dev_dir)
 {
@@ -2045,6 +2135,8 @@ brw_init_perf_query_info(struct gl_context *ctx)
get_sysfs_dev_dir(brw, sysfs_dev_dir, sizeof(sysfs_dev_dir)) &&
init_oa_sys_vars(brw, sysfs_dev_dir))
{
+  __DRIscreen *screen = brw->screen->driScrnPriv;
+
   brw->perfquery.oa_metrics_table =
  _mesa_hash_table_create(NULL, _mesa_key_hash_string,
  _mesa_key_string_equal);
@@ -2054,7 +2146,10 @@ brw_init_perf_query_info(struct gl_context *ctx)
*/
   oa_register(brw);
 
-  enumerate_sysfs_metrics(brw, sysfs_dev_dir);
+  if (kernel_has_dynamic_config_support(screen->fd, sysfs_dev_dir))
+ init_oa_configs(brw, sysfs_dev_dir);
+  else
+ enumerate_sysfs_metrics(brw, sysfs_dev_dir);
}
 
brw->perfquery.unaccumulated =
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] i965: perf: make revision variable available

2017-08-29 Thread Lionel Landwerlin
This will be used in the next commit to build up register programming.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_context.h   | 1 +
 src/mesa/drivers/dri/i965/brw_oa.py   | 1 +
 src/mesa/drivers/dri/i965/brw_performance_query.c | 4 +++-
 src/mesa/drivers/dri/i965/intel_screen.c  | 9 ++---
 src/mesa/drivers/dri/i965/intel_screen.h  | 3 +++
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 2274fe5c80e..8782de814c7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1104,6 +1104,7 @@ struct brw_context
  uint64_t subslice_mask;   /** $SubsliceMask */
  uint64_t gt_min_freq; /** $GpuMinFrequency */
  uint64_t gt_max_freq; /** $GpuMaxFrequency */
+ uint64_t revision;/** $SkuRevisionId */
   } sys_vars;
 
   /* OA metric sets, indexed by GUID, as know by Mesa at build time,
diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 254c512a7da..91f7ecb5731 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -175,6 +175,7 @@ hw_vars["$SubsliceMask"] = 
"brw->perfquery.sys_vars.subslice_mask"
 hw_vars["$GpuTimestampFrequency"] = 
"brw->perfquery.sys_vars.timestamp_frequency"
 hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq"
 hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq"
+hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision"
 
 def output_rpn_equation_code(set, counter, equation, counter_vars):
 c("/* RPN equation: " + equation + " */")
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d8680b48793..4b585c95b7d 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1812,6 +1812,7 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
 {
const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
+   __DRIscreen *screen = brw->screen->driScrnPriv;
 
if (!read_sysfs_drm_device_file_uint64(brw, sysfs_dev_dir,
   "gt_min_freq_mhz",
@@ -1826,6 +1827,8 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100;
brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100;
brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
+
+   brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd);
brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices;
/* Assuming uniform distribution of subslices per slices. */
brw->perfquery.sys_vars.n_eu_sub_slices = devinfo->num_subslices[0];
@@ -1848,7 +1851,6 @@ init_oa_sys_vars(struct brw_context *brw, const char 
*sysfs_dev_dir)
   } else
  unreachable("not reached");
} else {
-  __DRIscreen *screen = brw->screen->driScrnPriv;
   drm_i915_getparam_t gp;
   int ret;
   int slice_mask = 0;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index f98475997d3..3d617f99436 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2052,14 +2052,9 @@ set_max_gl_versions(struct intel_screen *screen)
 /**
  * Return the revision (generally the revid field of the PCI header) of the
  * graphics device.
- *
- * XXX: This function is useful to keep around even if it is not currently in
- * use. It is necessary for new platforms and revision specific workarounds or
- * features. Please don't remove it so that we know it at least continues to
- * build.
  */
-static __attribute__((__unused__)) int
-brw_get_revision(int fd)
+int
+intel_device_get_revision(int fd)
 {
struct drm_i915_getparam gp;
int revision;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h 
b/src/mesa/drivers/dri/i965/intel_screen.h
index 41e1dbdd4e9..7948617b7f0 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -136,6 +136,9 @@ double get_time(void);
 const int*
 intel_supported_msaa_modes(const struct intel_screen  *screen);
 
+int
+intel_device_get_revision(int fd);
+
 static inline bool
 can_do_pipelined_register_writes(const struct intel_screen *screen)
 {
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965: perf: factorize code for availability

2017-08-29 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_oa.py | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 91f7ecb5731..8c35923462f 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -325,6 +325,21 @@ semantic_type_map = {
 "ratio": "event"
 }
 
+def output_availability(set, availability, counter_name):
+expression = splice_rpn_expression(set, counter_name, availability)
+lines = expression.split(' && ')
+n_lines = len(lines)
+if n_lines == 1:
+c("if (" + lines[0] + ") {")
+else:
+c("if (" + lines[0] + " &&")
+c_indent(4)
+for i in range(1, (n_lines - 1)):
+c(lines[i] + " &&")
+c(lines[(n_lines - 1)] + ") {")
+c_outdent(4)
+
+
 def output_counter_report(set, counter, current_offset):
 data_type = counter.get('data_type')
 data_type_uc = data_type.upper()
@@ -343,18 +358,7 @@ def output_counter_report(set, counter, current_offset):
 
 availability = counter.get('availability')
 if availability:
-expression = splice_rpn_expression(set, counter, availability)
-lines = expression.split(' && ')
-n_lines = len(lines)
-if n_lines == 1:
-c("if (" + lines[0] + ") {")
-else:
-c("if (" + lines[0] + " &&")
-c_indent(4)
-for i in range(1, (n_lines - 1)):
-c(lines[i] + " &&")
-c(lines[(n_lines - 1)] + ") {")
-c_outdent(4)
+output_availability(set, availability, counter.get('name'))
 c_indent(3)
 
 c("counter = &query->counters[query->n_counters++];\n")
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] i965: add support for loadable OA configs

2017-08-29 Thread Lionel Landwerlin
Hi,

Some recent work in the i915 kernel driver enables us to load OA
configs from userspace, so new configs can be deployed without having
to modify the kernel. This series makes use of this feature in i965 if
available.

Cheers,

Lionel Landwerlin (5):
  i965: perf: make revision variable available
  i965: perf: factorize code for availability
  i965: perf: list registers to program for queries
  i965: perf: add support for userspace configurations
  i965: add a debug option to disable oa config loading

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   2 +-
 src/mesa/drivers/dri/i965/brw_context.h   |  16 
 src/mesa/drivers/dri/i965/brw_oa.py   |  80 ++---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 102 +-
 src/mesa/drivers/dri/i965/intel_screen.c  |   9 +-
 src/mesa/drivers/dri/i965/intel_screen.h  |   3 +
 7 files changed, 191 insertions(+), 22 deletions(-)

--
2.14.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >