from:"Rob Clark"

[Mesa-dev] [PATCH 3/6] gallium/u_blitter: split out a helper for common clear state

2016-08-13 Thread Rob Clark

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/auxiliary/util/u_blitter.c | 38 ++
 src/gallium/auxiliary/util/u_blitter.h |  5 +
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 9fbef9b..e008100 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1268,19 +1268,13 @@ static void *get_clear_blend_state(struct 
blitter_context_priv *ctx,
return ctx->blend_clear[index];
 }
 
-static void util_blitter_clear_custom(struct blitter_context *blitter,
-  unsigned width, unsigned height,
-  unsigned num_layers,
-  unsigned clear_buffers,
-  const union pipe_color_union *color,
-  double depth, unsigned stencil,
-  void *custom_blend, void *custom_dsa)
+void util_blitter_common_clear_setup(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ unsigned clear_buffers,
+ void *custom_blend, void *custom_dsa)
 {
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
-   struct pipe_stencil_ref sr = { { 0 } };
-
-   assert(ctx->has_layered || num_layers <= 1);
 
util_blitter_set_running_flag(blitter);
blitter_check_saved_vertex_states(ctx);
@@ -1306,14 +1300,32 @@ static void util_blitter_clear_custom(struct 
blitter_context *blitter,
   pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
}
 
+   pipe->set_sample_mask(pipe, ~0);
+   blitter_set_dst_dimensions(ctx, width, height);
+}
+
+static void util_blitter_clear_custom(struct blitter_context *blitter,
+  unsigned width, unsigned height,
+  unsigned num_layers,
+  unsigned clear_buffers,
+  const union pipe_color_union *color,
+  double depth, unsigned stencil,
+  void *custom_blend, void *custom_dsa)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->base.pipe;
+   struct pipe_stencil_ref sr = { { 0 } };
+
+   assert(ctx->has_layered || num_layers <= 1);
+
+   util_blitter_common_clear_setup(blitter, width, height, clear_buffers,
+   custom_blend, custom_dsa);
+
sr.ref_value[0] = stencil & 0xff;
pipe->set_stencil_ref(pipe, );
 
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
bind_fs_write_all_cbufs(ctx);
-   pipe->set_sample_mask(pipe, ~0);
-
-   blitter_set_dst_dimensions(ctx, width, height);
 
if (num_layers > 1 && ctx->has_layered) {
   blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
diff --git a/src/gallium/auxiliary/util/u_blitter.h 
b/src/gallium/auxiliary/util/u_blitter.h
index 0f5da6b..d7d9f4a 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -542,6 +542,11 @@ util_blitter_save_render_condition(struct blitter_context 
*blitter,
blitter->saved_render_cond_cond = condition;
 }
 
+void util_blitter_common_clear_setup(struct blitter_context *blitter,
+ unsigned width, unsigned height,
+ unsigned clear_buffers,
+ void *custom_blend, void *custom_dsa);
+
 void util_blitter_set_running_flag(struct blitter_context *blitter);
 void util_blitter_unset_running_flag(struct blitter_context *blitter);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/6] freedreno: support for using generic clear path

2016-08-13 Thread Rob Clark

Since clears are more or less just normal draws, there isn't that much
benefit in having hand-rolled clear path.  Add support to use u_blitter
instead if gen specific backend doesn't implement ctx->clear().

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_draw.c | 62 ++
 src/gallium/drivers/freedreno/freedreno_resource.c | 20 +++
 src/gallium/drivers/freedreno/freedreno_resource.h |  4 ++
 3 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c 
b/src/gallium/drivers/freedreno/freedreno_draw.c
index 715ad21..2570d4d 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -203,6 +203,60 @@ fd_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
fd_batch_check_size(batch);
 }
 
+/* Generic clear implementation (partially) using u_blitter: */
+static void
+fd_blitter_clear(struct pipe_context *pctx, unsigned buffers,
+   const union pipe_color_union *color, double depth, unsigned 
stencil)
+{
+   struct fd_context *ctx = fd_context(pctx);
+   struct pipe_framebuffer_state *pfb = >batch->framebuffer;
+   struct blitter_context *blitter = ctx->blitter;
+
+   fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_CLEAR);
+
+   util_blitter_common_clear_setup(blitter, pfb->width, pfb->height,
+   buffers, NULL, NULL);
+
+   struct pipe_stencil_ref sr = {
+   .ref_value = { stencil & 0xff }
+   };
+   pctx->set_stencil_ref(pctx, );
+
+   struct pipe_constant_buffer cb = {
+   .buffer_size = 16,
+   .user_buffer = >ui,
+   };
+   pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, );
+
+   struct pipe_viewport_state vp = {
+   .scale = { 0.5f * pfb->width, -0.5f * pfb->height, depth },
+   .translate = { 0.5f * pfb->width,  0.5f * pfb->height, 0.0f },
+   };
+   pctx->set_viewport_states(pctx, 0, 1, );
+
+   pctx->bind_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
+   pctx->set_vertex_buffers(pctx, blitter->vb_slot, 1,
+   >solid_vbuf_state.vertexbuf.vb[0]);
+   pctx->set_stream_output_targets(pctx, 0, NULL, NULL);
+   pctx->bind_vs_state(pctx, ctx->solid_prog.vp);
+   pctx->bind_fs_state(pctx, ctx->solid_prog.fp);
+
+   struct pipe_draw_info info = {
+   .mode = PIPE_PRIM_MAX,/* maps to DI_PT_RECTLIST */
+   .count = 2,
+   .instance_count = 1,
+   };
+   ctx->draw_vbo(ctx, );
+
+   util_blitter_restore_constant_buffer_state(blitter);
+   util_blitter_restore_vertex_states(blitter);
+   util_blitter_restore_fragment_states(blitter);
+   util_blitter_restore_render_cond(blitter);
+   util_blitter_unset_running_flag(blitter);
+
+   fd_blitter_pipe_end(ctx);
+}
+
 /* TODO figure out how to make better use of existing state mechanism
  * for clear (and possibly gmem->mem / mem->gmem) so we can (a) keep
  * track of what state really actually changes, and (b) reduce the code
@@ -274,6 +328,14 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 
+   /* if per-gen backend doesn't implement ctx->clear() generic
+* blitter clear:
+*/
+   if (!ctx->clear) {
+   fd_blitter_clear(pctx, buffers, color, depth, stencil);
+   return;
+   }
+
fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_CLEAR);
 
ctx->clear(ctx, buffers, color, depth, stencil);
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index 9ac9550..2a4fd74 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -107,16 +107,13 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
fd_bc_invalidate_resource(rsc, true);
 }
 
-static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, 
bool discard);
-static void fd_blitter_pipe_end(struct fd_context *ctx);
-
 static void
 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool 
fallback)
 {
/* TODO size threshold too?? */
if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
/* do blit on gpu: */
-   fd_blitter_pipe_begin(ctx, false, true);
+   fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
util_blitter_blit(ctx->blitter, blit);
fd_blitter_pipe_end(ctx);
} else {
@@ -939,7 +936,7 @@ fd_blitter_pipe_co

[Mesa-dev] [PATCH 0/6] Custom u_blitter clear

2016-08-13 Thread Rob Clark

So, for adreno, clears are basically just normal draws.  So I've been
meaning to get rid of a bunch of generation specific hand-rolled clear
draw code.  I got more motivated when I realized a bunch more stuff
would need to be fixed up to make MSAA work if I kept the hand-rolled
clear path.

The problem with using u_blitter's clear path as-is is that it is
measurably slower on adreno.  We *really* want to use a uniform for
the clear color and pre-baked VBOs.  Versus passing everything thru
the VS.

I thought about changing u_blitter (but apparently that might be
slower on other hw), or making u_blitter support both cases.  But
instead went the route of exposing more of the u_blitter internal
APIs so they could be re-used from driver specific blit code.  This
lets me re-use the shaders and pre-baked VBOs that I also use for
GMEM->MEM blits (which could not reasonably re-use a generic
u_blitter path).

So I end up re-using u_blitter's state restore and some of the common
clear state setup, but doing the VBO/shader/etc state setup myself in
terms of the pipe_context API (so that it is generation independent).

Rob Clark (6):
  gallium/u_blitter: export some functions
  gallium/u_blitter: add helper to save FS const buffer state
  gallium/u_blitter: split out a helper for common clear state
  freedreno: support for using generic clear path
  freedreno/a3xx: use generic clear path
  freedreno/a4xx: use generic clear path

 src/gallium/auxiliary/util/u_blitter.c | 193 ++
 src/gallium/auxiliary/util/u_blitter.h |  29 +++
 src/gallium/drivers/freedreno/a3xx/fd3_context.c   |   3 +-
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c  | 201 +--
 src/gallium/drivers/freedreno/a4xx/fd4_context.c   |   3 +-
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c  | 216 +
 src/gallium/drivers/freedreno/freedreno_draw.c |  62 ++
 src/gallium/drivers/freedreno/freedreno_resource.c |  20 +-
 src/gallium/drivers/freedreno/freedreno_resource.h |   4 +
 9 files changed, 223 insertions(+), 508 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/6] gallium/u_blitter: add helper to save FS const buffer state

2016-08-13 Thread Rob Clark

Not (currently) state that is overwridden by u_blitter itself, but
drivers with custom blit/clear which are reusing part of the u_blitter
infrastructure will use it.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/auxiliary/util/u_blitter.c | 10 ++
 src/gallium/auxiliary/util/u_blitter.h | 15 +++
 2 files changed, 25 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 8e5ed45..9fbef9b 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -283,6 +283,7 @@ struct blitter_context *util_blitter_create(struct 
pipe_context *pipe)
   ctx->rs_discard_state = pipe->create_rasterizer_state(pipe, _state);
}
 
+   ctx->base.cb_slot = 0; /* 0 for now */
ctx->base.vb_slot = 0; /* 0 for now */
 
/* vertex elements states */
@@ -698,6 +699,15 @@ void util_blitter_restore_textures(struct blitter_context 
*blitter)
ctx->base.saved_num_sampler_views = ~0;
 }
 
+void util_blitter_restore_constant_buffer_state(struct blitter_context 
*blitter)
+{
+   struct pipe_context *pipe = blitter->pipe;
+
+   pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, blitter->cb_slot,
+>saved_fs_constant_buffer);
+   pipe_resource_reference(>saved_fs_constant_buffer.buffer, NULL);
+}
+
 static void blitter_set_rectangle(struct blitter_context_priv *ctx,
   int x1, int y1, int x2, int y2,
   float depth)
diff --git a/src/gallium/auxiliary/util/u_blitter.h 
b/src/gallium/auxiliary/util/u_blitter.h
index 06e21e6..0f5da6b 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -111,6 +111,9 @@ struct blitter_context
unsigned saved_num_sampler_views;
struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS];
 
+   unsigned cb_slot;
+   struct pipe_constant_buffer saved_fs_constant_buffer;
+
unsigned vb_slot;
struct pipe_vertex_buffer saved_vertex_buffer;
 
@@ -486,6 +489,17 @@ util_blitter_save_fragment_sampler_views(struct 
blitter_context *blitter,
 }
 
 static inline void
+util_blitter_save_fragment_constant_buffer_slot(
+  struct blitter_context *blitter,
+  struct pipe_constant_buffer *constant_buffers)
+{
+   pipe_resource_reference(>saved_fs_constant_buffer.buffer,
+   constant_buffers[blitter->cb_slot].buffer);
+   memcpy(>saved_fs_constant_buffer, 
_buffers[blitter->cb_slot],
+  sizeof(struct pipe_constant_buffer));
+}
+
+static inline void
 util_blitter_save_vertex_buffer_slot(struct blitter_context *blitter,
  struct pipe_vertex_buffer *vertex_buffers)
 {
@@ -536,6 +550,7 @@ void util_blitter_restore_fragment_states(struct 
blitter_context *blitter);
 void util_blitter_restore_render_cond(struct blitter_context *blitter);
 void util_blitter_restore_fb_state(struct blitter_context *blitter);
 void util_blitter_restore_textures(struct blitter_context *blitter);
+void util_blitter_restore_constant_buffer_state(struct blitter_context 
*blitter);
 
 #ifdef __cplusplus
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/6] gallium/u_blitter: export some functions

2016-08-13 Thread Rob Clark

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/auxiliary/util/u_blitter.c | 145 +
 src/gallium/auxiliary/util/u_blitter.h |   9 ++
 2 files changed, 84 insertions(+), 70 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index d06b3a8..8e5ed45 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -510,26 +510,26 @@ void util_blitter_set_texture_multisample(struct 
blitter_context *blitter,
ctx->has_texture_multisample = supported;
 }
 
-static void blitter_set_running_flag(struct blitter_context_priv *ctx)
+void util_blitter_set_running_flag(struct blitter_context *blitter)
 {
-   if (ctx->base.running) {
+   if (blitter->running) {
   _debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n",
 __LINE__);
}
-   ctx->base.running = TRUE;
+   blitter->running = TRUE;
 
-   ctx->base.pipe->set_active_query_state(ctx->base.pipe, false);
+   blitter->pipe->set_active_query_state(blitter->pipe, false);
 }
 
-static void blitter_unset_running_flag(struct blitter_context_priv *ctx)
+void util_blitter_unset_running_flag(struct blitter_context *blitter)
 {
-   if (!ctx->base.running) {
+   if (!blitter->running) {
   _debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n",
 __LINE__);
}
-   ctx->base.running = FALSE;
+   blitter->running = FALSE;
 
-   ctx->base.pipe->set_active_query_state(ctx->base.pipe, true);
+   blitter->pipe->set_active_query_state(blitter->pipe, true);
 }
 
 static void blitter_check_saved_vertex_states(struct blitter_context_priv *ctx)
@@ -543,8 +543,9 @@ static void blitter_check_saved_vertex_states(struct 
blitter_context_priv *ctx)
assert(ctx->base.saved_rs_state != INVALID_PTR);
 }
 
-static void blitter_restore_vertex_states(struct blitter_context_priv *ctx)
+void util_blitter_restore_vertex_states(struct blitter_context *blitter)
 {
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
unsigned i;
 
@@ -601,8 +602,9 @@ static void blitter_check_saved_fragment_states(struct 
blitter_context_priv *ctx
assert(ctx->base.saved_blend_state != INVALID_PTR);
 }
 
-static void blitter_restore_fragment_states(struct blitter_context_priv *ctx)
+void util_blitter_restore_fragment_states(struct blitter_context *blitter)
 {
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
 
/* Fragment shader. */
@@ -644,8 +646,9 @@ static void blitter_disable_render_cond(struct 
blitter_context_priv *ctx)
}
 }
 
-static void blitter_restore_render_cond(struct blitter_context_priv *ctx)
+void util_blitter_restore_render_cond(struct blitter_context *blitter)
 {
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
 
if (ctx->base.saved_render_cond_query) {
@@ -656,8 +659,9 @@ static void blitter_restore_render_cond(struct 
blitter_context_priv *ctx)
}
 }
 
-static void blitter_restore_fb_state(struct blitter_context_priv *ctx)
+void util_blitter_restore_fb_state(struct blitter_context *blitter)
 {
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
 
pipe->set_framebuffer_state(pipe, >base.saved_fb_state);
@@ -670,8 +674,9 @@ static void blitter_check_saved_textures(struct 
blitter_context_priv *ctx)
assert(ctx->base.saved_num_sampler_views != ~0u);
 }
 
-static void blitter_restore_textures(struct blitter_context_priv *ctx)
+void util_blitter_restore_textures(struct blitter_context *blitter)
 {
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
unsigned i;
 
@@ -1267,7 +1272,7 @@ static void util_blitter_clear_custom(struct 
blitter_context *blitter,
 
assert(ctx->has_layered || num_layers <= 1);
 
-   blitter_set_running_flag(ctx);
+   util_blitter_set_running_flag(blitter);
blitter_check_saved_vertex_states(ctx);
blitter_check_saved_fragment_states(ctx);
blitter_disable_render_cond(ctx);
@@ -1311,10 +1316,10 @@ static void util_blitter_clear_custom(struct 
blitter_context *blitter,
   UTIL_BLITTER_ATTRIB_COLOR, color);
}
 
-   blitter_restore_vertex_states(ctx);
-   blitter_restore_fragment_states(ctx);
-   blitter_restore_render_cond(ctx);
-   blitter_unset_running_flag(ctx);
+   util_blitter_restore_vertex_states(blitter);
+   util_blitter_restore_fragment_states(blitter);
+   util_blitter_restore_render_cond(blitter);
+   util_blitter_unset_running_flag(blitter);
 }
 
 void util_blit

Re: [Mesa-dev] cairo as state tracker

2016-08-09 Thread Rob Clark

On Tue, Aug 9, 2016 at 11:11 AM, Enrico Weigelt, metux IT consult
 wrote:
> On 07.08.2016 12:50, Marek Olšák wrote:
>
>> It would mainly be a futile task if it had to compete with their
>> official Mesa driver.
>
> Not quite. Would give us all of gallium's capabilities also for
> the intel chips, for example having lots of different state trackers.
> (coming back to my original intention of cairo as a gallium st)
>

If you don't realize the complexity of a gpu driver, or the 100's of
thousands of hours that have gone into i965, it's easy to say 'lets
throw that all away and start from scratch with a gallium driver' ;-)

There is ilo.. I suppose if someone cared enough they could add NIR
support and figure out how to share the compiler back-end with i965,
so it wouldn't be *completely* starting from scratch.  But there is
still a big delta between ilo and i965 in terms of features and
supported hw gen's.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Rob Clark

On Tue, Aug 9, 2016 at 9:47 AM, Nicolai Hähnle  wrote:
> Hi everybody,
>
> addrlib is the addressing and alignment calculator which is used by
> radeonsi. It's developed (and also used) internally at AMD, and so far we've
> had one open source copy living in the Mesa repository at
> src/gallium/winsys/amdgpu/drm/addrlib.
>
> The question of using addrlib in non-Mesa parts of our open-source stack has
> come up, in particular in relation to compute. We'd obviously like to share
> the code rather than having multiple copies flying around. Since the
> interface of addrlib is slow-moving but unstable, shared linking is not an
> option.
>
> I think the best way forward is to create a dedicated repository for addrlib
> which is then integrated into Mesa as a git submodule.
>
> The point of this email is to gather feedback from the Mesa community on
> this plan, which is explicitly:
>
> (0) Create an addrlib repository, say amd/addrlib on fd.o.
> (1) Add it as a git submodule to the Mesa repository.
> (2) Fix up whatever aspects of the build system that may be affected
> (perhaps for building source tarballs).
> (3) Go back to mostly ignoring addrlib, except for trying to get better at
> syncing with the internal closed-source version.
>
> From initial experiments, the impact on users interested in radeon is that
> they will have to run `git submodule init` and then occasionally `git
> submodule update`. Users who do not build radeonsi should be able to ignore
> the change completely.

tbh, git submodules are more annoying than they need to be, and I'm
not really terribly excited to use that for something that is a build
dependency.  Maybe just move it into libdrm instead?

BR,
-R

> There are alternatives. For example, ROCm uses Google's repo tool already.
> But for Mesa, git submodule looks like a lightweight, well supported and
> overall conservative option that everybody should already have installed. If
> there are good arguments for something else, let's hear them!
>
> Another point: if we proceed with this plan, I think we should consider
> moving addrlib into src/amd/addrlib. There are two reasons: First,
> transitioning to a submodule *without* changing the directory is probably
> more fragile, i.e. what happens when you switch between checkouts before and
> after the transition. Second, if/when radv ends up being merged into Mesa
> master, it makes sense to have addrlib there anyway.
>
> Thoughts? Complaints? Praise?
> Nicolai
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] dri: use 'bool' instead 'int' for boolean variables

2016-08-08 Thread Rob Clark

how strict are we on that actually?  I think whenever we add new gl
entry points (ie. implementing new extension, etc) we kinda sorta
break ABI, don't we?  I thought mixing/matching libGL vs *_dri.so was
at least not recommended..

BR,
-R

On Mon, Aug 8, 2016 at 12:22 PM, Ian Romanick  wrote:
> I'm pretty sure this breaks ABI.  Did you try using an unpatched libGL
> with a patched *_dri.so (and vice-versa)?
>
> On 08/01/2016 06:21 AM, Jan Ziak wrote:
>> Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0...@gmail.com>
>> ---
>>  src/egl/drivers/dri2/egl_dri2.h   | 19 ++-
>>  src/gallium/auxiliary/vl/vl_winsys_dri3.c |  3 ++-
>>  src/glx/dri3_priv.h   |  5 +++--
>>  src/loader/loader.c   |  7 ---
>>  src/loader/loader.h   |  4 +++-
>>  5 files changed, 22 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/egl/drivers/dri2/egl_dri2.h 
>> b/src/egl/drivers/dri2/egl_dri2.h
>> index 4577875..e406443 100644
>> --- a/src/egl/drivers/dri2/egl_dri2.h
>> +++ b/src/egl/drivers/dri2/egl_dri2.h
>> @@ -28,6 +28,7 @@
>>  #ifndef EGL_DRI2_INCLUDED
>>  #define EGL_DRI2_INCLUDED
>>
>> +#include 
>>  #include 
>>
>>  #ifdef HAVE_X11_PLATFORM
>> @@ -160,7 +161,7 @@ struct dri2_egl_display
>> int   dri2_major;
>> int   dri2_minor;
>> __DRIscreen  *dri_screen;
>> -   int   own_dri_screen;
>> +   bool  own_dri_screen;
>> const __DRIconfig   **driver_configs;
>> void *driver;
>> const __DRIcoreExtension   *core;
>> @@ -181,8 +182,8 @@ struct dri2_egl_display
>>  * dri2_make_current (tracks if there are active contexts/surfaces). */
>> int   ref_count;
>>
>> -   int   own_device;
>> -   int   invalidate_available;
>> +   bool  own_device;
>> +   bool  invalidate_available;
>> int   min_swap_interval;
>> int   max_swap_interval;
>> int   default_swap_interval;
>> @@ -201,7 +202,7 @@ struct dri2_egl_display
>>  #ifdef HAVE_X11_PLATFORM
>> xcb_connection_t *conn;
>> int  screen;
>> -   int  swap_available;
>> +   bool swap_available;
>>  #ifdef HAVE_DRI3
>> struct loader_dri3_extensions loader_dri3_ext;
>>  #endif
>> @@ -214,13 +215,13 @@ struct dri2_egl_display
>> struct wl_drm*wl_drm;
>> struct wl_shm*wl_shm;
>> struct wl_event_queue*wl_queue;
>> -   intauthenticated;
>> +   bool  authenticated;
>> intformats;
>> uint32_t  capabilities;
>>  #endif
>>
>> -   int  is_render_node;
>> -   intis_different_gpu;
>> +   bool is_render_node;
>> +   bool is_different_gpu;
>>  };
>>
>>  struct dri2_egl_context
>> @@ -244,7 +245,7 @@ struct dri2_egl_surface
>> __DRIdrawable   *dri_drawable;
>> __DRIbuffer  buffers[5];
>> int  buffer_count;
>> -   int  have_fake_front;
>> +   bool have_fake_front;
>>
>>  #ifdef HAVE_X11_PLATFORM
>> xcb_drawable_t   drawable;
>> @@ -282,7 +283,7 @@ struct dri2_egl_surface
>>  #ifdef HAVE_DRM_PLATFORM
>>struct gbm_bo   *bo;
>>  #endif
>> -  int locked;
>> +  boollocked;
>>int age;
>> } color_buffers[4], *back, *current;
>>  #endif
>> diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c 
>> b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>> index 493e645..7cb6c18 100644
>> --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>> +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>> @@ -25,6 +25,7 @@
>>   *
>>   **/
>>
>> +#include 
>>  #include 
>>
>>  #include 
>> @@ -627,7 +628,7 @@ vl_dri3_screen_create(Display *display, int screen)
>> xcb_dri3_open_reply_t *open_reply;
>> xcb_get_geometry_cookie_t geom_cookie;
>> xcb_get_geometry_reply_t *geom_reply;
>> -   int is_different_gpu;
>> +   bool is_different_gpu;
>> int fd;
>>
>> assert(display);
>> diff --git a/src/glx/dri3_priv.h b/src/glx/dri3_priv.h
>> index 0822377..bfb0786 100644
>> --- a/src/glx/dri3_priv.h
>> +++ b/src/glx/dri3_priv.h
>> @@ -54,6 +54,7 @@
>>   *   Kristian Høgsberg (k...@redhat.com)
>>   */
>>
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -78,7 +79,7 @@ struct dri3_display
>> int dri3Minor;
>>
>> /* Present bits */
>> -   int hasPresent;
>> +   bool hasPresent;
>> int presentMajor;
>> int presentMinor;
>>  };
>> @@ -101,7 +102,7 @@ struct dri3_screen {
>>
>> void *driver;
>>

Re: [Mesa-dev] [PATCH 1/3] gallium: add a pipe_context parameter to fence_finish

2016-08-06 Thread Rob Clark

On Sat, Aug 6, 2016 at 11:32 AM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> required by glClientWaitSync (GL 4.5 Core spec) that can optionally flush
> the context

Thanks, I've been meaning to add the same thing but haven't had time
to work on the native fd fence stuff in a bit..

mind squashing in http://hastebin.com/raw/cijubeduqe to avoid spurious
indentation change?

with that, r-b

BR,
-R

> ---
>  src/gallium/docs/source/context.rst  | 10 --
>  src/gallium/drivers/ddebug/dd_draw.c |  2 +-
>  src/gallium/drivers/ddebug/dd_screen.c   |  4 +++-
>  src/gallium/drivers/freedreno/freedreno_fence.c  |  5 +++--
>  src/gallium/drivers/freedreno/freedreno_fence.h  |  5 +++--
>  src/gallium/drivers/i915/i915_screen.c   |  1 +
>  src/gallium/drivers/ilo/ilo_screen.c |  1 +
>  src/gallium/drivers/llvmpipe/lp_flush.c  |  3 ++-
>  src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
>  src/gallium/drivers/nouveau/nouveau_screen.c |  1 +
>  src/gallium/drivers/r300/r300_screen.c   |  1 +
>  src/gallium/drivers/radeon/r600_pipe_common.c|  1 +
>  src/gallium/drivers/radeon/r600_query.c  |  2 +-
>  src/gallium/drivers/rbug/rbug_screen.c   |  6 +++---
>  src/gallium/drivers/softpipe/sp_fence.c  |  1 +
>  src/gallium/drivers/softpipe/sp_flush.c  |  2 +-
>  src/gallium/drivers/svga/svga_context.c  |  4 ++--
>  src/gallium/drivers/svga/svga_screen.c   |  1 +
>  src/gallium/drivers/swr/swr_context.cpp  |  4 ++--
>  src/gallium/drivers/swr/swr_draw.cpp |  2 +-
>  src/gallium/drivers/swr/swr_fence.cpp|  1 +
>  src/gallium/drivers/swr/swr_fence.h  |  1 +
>  src/gallium/drivers/swr/swr_query.cpp|  4 ++--
>  src/gallium/drivers/swr/swr_screen.cpp   |  6 +++---
>  src/gallium/drivers/swr/swr_state.cpp|  2 +-
>  src/gallium/drivers/trace/tr_screen.c|  5 -
>  src/gallium/drivers/vc4/vc4_fence.c  |  1 +
>  src/gallium/drivers/virgl/virgl_screen.c |  1 +
>  src/gallium/include/pipe/p_screen.h  | 15 ---
>  src/gallium/state_trackers/clover/core/event.cpp |  4 ++--
>  src/gallium/state_trackers/dri/dri2.c|  6 +++---
>  src/gallium/state_trackers/dri/dri_drawable.c|  2 +-
>  src/gallium/state_trackers/glx/xlib/xm_api.c |  2 +-
>  src/gallium/state_trackers/nine/swapchain9.c |  6 +++---
>  src/gallium/state_trackers/vdpau/presentation.c  |  4 ++--
>  src/gallium/state_trackers/xa/xa_context.c   |  2 +-
>  src/gallium/state_trackers/xvmc/surface.c|  2 +-
>  src/mesa/state_tracker/st_cb_flush.c |  2 +-
>  src/mesa/state_tracker/st_cb_syncobj.c   |  4 ++--
>  39 files changed, 77 insertions(+), 50 deletions(-)
>
> diff --git a/src/gallium/docs/source/context.rst 
> b/src/gallium/docs/source/context.rst
> index e646ea0..af3d266 100644
> --- a/src/gallium/docs/source/context.rst
> +++ b/src/gallium/docs/source/context.rst
> @@ -459,26 +459,24 @@ processed even where they're known to be obscured.
>
>
>  Flushing
>  
>
>  ``flush``
>
>  PIPE_FLUSH_END_OF_FRAME: Whether the flush marks the end of frame.
>
>  PIPE_FLUSH_DEFERRED: It is not required to flush right away, but it is 
> required
> -to return a valid fence. The behavior of fence_finish or any other call isn't
> -changed. The only side effect can be that fence_finish will wait a little
> -longer. No guidance is given as to how drivers should implement fence_finish
> -with deferred flushes. If some drivers can't do deferred flushes safely, they
> -should just ignore the flag.
> -
> +to return a valid fence. If fence_finish is called with the returned fence
> +and the context is still unflushed, and the ctx parameter of fence_finish is
> +equal to the context where the fence was created, fence_finish will flush
> +the context.
>
>
>  ``flush_resource``
>
>  Flush the resource cache, so that the resource can be used
>  by an external client. Possible usage:
>  - flushing a resource before presenting it on the screen
>  - flushing a resource if some other process or device wants to use it
>  This shouldn't be used to flush caches if the resource is only managed
>  by a single pipe_screen and is not shared with another process.
> diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
> b/src/gallium/drivers/ddebug/dd_draw.c
> index c1bfdaf..97325e4 100644
> --- a/src/gallium/drivers/ddebug/dd_draw.c
> +++ b/src/gallium/drivers/ddebug/dd_draw.c
> @@ -541,21 +541,21 @@ dd_flush_and_check_hang(struct dd_context *dctx,
> bool idle;
>
> assert(timeout_ms > 0);
>
> pipe->flush(pipe, , flush_flags);
> if (flush_fence)
>screen->fence_reference(screen, flush_fence, fence);
> if (!fence)
>return false;
>
> -   idle = screen->fence_finish(screen, fence, timeout_ms * 100);
> +   idle =

Re: [Mesa-dev] [PATCH 1/3] freedreno/a2xx: fix comparison out of range warnings

2016-08-06 Thread Rob Clark

Thanks.. my expectation is that ~0 gets cast to the same enum on both
sides of the != so I *think* we should be good.

BR,
-R

On Sat, Aug 6, 2016 at 9:05 AM, Francesco Ansanelli  wrote:
> Hello,
>
> I did more tests... and I didn't experience any error.
> I attach last version of the script.
>
> Cheers,
> Francesco
>
> 2016-08-04 19:50 GMT+02:00 Francesco Ansanelli :
>>
>> Hi Eric,
>>
>> after your question:
>>
>> [snip]
>> I expect, with enums having varying size, that this cast will
>> not always work.
>> [snip]
>>
>> I started a script for testing with variable enum sizes and compilers...
>> If does makes sense to you, I'll try to loop from 1 to n (suggestions?)
>> and build with gcc and clang (and others?) to find the case you're taking
>> about.
>> I'm also thinking about a negative test...
>> What do you think?
>>
>> Cheers,
>> Francesco
>>
>> 2016-08-01 14:10 GMT+02:00 Eric Engestrom :
>>>
>>> On Sat, Jul 30, 2016 at 09:49:57AM +0200, Francesco Ansanelli wrote:
>>> > Signed-off-by: Francesco Ansanelli 
>>> > ---
>>> >  src/gallium/drivers/freedreno/a2xx/fd2_screen.c |8 
>>> >  1 file changed, 4 insertions(+), 4 deletions(-)
>>> >
>>> > diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>>> > b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>>> > index fe4849b..007b9e6 100644
>>> > --- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>>> > +++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>>> > @@ -61,7 +61,7 @@ fd2_screen_is_format_supported(struct pipe_screen
>>> > *pscreen,
>>> >
>>> >   if ((usage & (PIPE_BIND_SAMPLER_VIEW |
>>> >   PIPE_BIND_VERTEX_BUFFER)) &&
>>> > - (fd2_pipe2surface(format) != ~0u)) {
>>> > + (fd2_pipe2surface(format) != (enum
>>> > a2xx_sq_surfaceformat)~0)) {
>>>
>>> You said the compiler warning goes away, but is the condition guaranteed
>>> to hit? I expect, with enums having varying size, that this cast will
>>> not always work. I agree with Rob Herring, adding the error value to the
>>> enum is better.
>>>
>>> >   retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
>>> >   PIPE_BIND_VERTEX_BUFFER);
>>> >   }
>>> > @@ -70,7 +70,7 @@ fd2_screen_is_format_supported(struct pipe_screen
>>> > *pscreen,
>>> >   PIPE_BIND_DISPLAY_TARGET |
>>> >   PIPE_BIND_SCANOUT |
>>> >   PIPE_BIND_SHARED)) &&
>>> > - (fd2_pipe2color(format) != ~0u)) {
>>> > + (fd2_pipe2color(format) != (enum
>>> > a2xx_colorformatx)~0)) {
>>> >   retval |= usage & (PIPE_BIND_RENDER_TARGET |
>>> >   PIPE_BIND_DISPLAY_TARGET |
>>> >   PIPE_BIND_SCANOUT |
>>> > @@ -78,12 +78,12 @@ fd2_screen_is_format_supported(struct pipe_screen
>>> > *pscreen,
>>> >   }
>>> >
>>> >   if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
>>> > - (fd_pipe2depth(format) != ~0u)) {
>>> > + (fd_pipe2depth(format) != (enum
>>> > adreno_rb_depth_format)~0)) {
>>> >   retval |= PIPE_BIND_DEPTH_STENCIL;
>>> >   }
>>> >
>>> >   if ((usage & PIPE_BIND_INDEX_BUFFER) &&
>>> > - (fd_pipe2index(format) != ~0u)) {
>>> > + (fd_pipe2index(format) != (enum
>>> > pc_di_index_size)~0)) {
>>> >   retval |= PIPE_BIND_INDEX_BUFFER;
>>> >   }
>>> >
>>> > --
>>> > 1.7.9.5
>>> >
>>> > ___
>>> > mesa-dev mailing list
>>> > mesa-dev@lists.freedesktop.org
>>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] src: replace RTLD_NOW with RTLD_LAZY

2016-08-06 Thread Rob Clark

On Sat, Aug 6, 2016 at 4:28 AM, Enrico Weigelt, metux IT consult
 wrote:
> On 06.08.2016 04:05, ⚛ wrote:
>
>> Question 2: Exists there a reason for _not_ linking radeonsi_dri.so,
>> swrastg_dri.so, etc, directly to Mesa's libGL.so? The Gallium
>> *_dri.so libraries are the same inode on the filesystem.
>
> Sure about that ?
>
> nekrad@orion:~/MESA/lib/dri$ ls -lai
> total 508348
> 6750710 drwxrwxr-x 2 nekrad nekrad 4096 Aug  3 03:50 .
> 6750702 drwxrwxr-x 4 nekrad nekrad 4096 Aug  3 03:50 ..
> 6714176 -rwxr-xr-x 5 nekrad nekrad 51025670 Aug  3 03:50 i915_dri.so
> 6714176 -rwxr-xr-x 5 nekrad nekrad 51025670 Aug  3 03:50 i965_dri.so
> 6714275 -rwxr-xr-x 5 nekrad nekrad 53071749 Aug  3 03:50 kms_swrast_dri.so
> 6714176 -rwxr-xr-x 5 nekrad nekrad 51025670 Aug  3 03:50
> nouveau_vieux_dri.so
> 6714176 -rwxr-xr-x 5 nekrad nekrad 51025670 Aug  3 03:50 r200_dri.so
> 6714275 -rwxr-xr-x 5 nekrad nekrad 53071749 Aug  3 03:50 r300_dri.so
> 6714275 -rwxr-xr-x 5 nekrad nekrad 53071749 Aug  3 03:50 r600_dri.so
> 6714176 -rwxr-xr-x 5 nekrad nekrad 51025670 Aug  3 03:50 radeon_dri.so
> 6714275 -rwxr-xr-x 5 nekrad nekrad 53071749 Aug  3 03:50 swrast_dri.so
> 6714275 -rwxr-xr-x 5 nekrad nekrad 53071749 Aug  3 03:50 vmwgfx_dri.so
>
> I see only some of them are the same.
> By the way: why not using symlinks here ?

gallium vs classic drivers, I guess?

I don't remember the reason for hardlinks, but I seem to remember
there was one.  Emil would probably know.

BR,
-R

> (hardlinks can easily break up ...)
>
>> Question 3: Isn't the current status quo (i.e: not linking
>> radeonsi_dri.so directly to libGL.so) also a micro-optimization that
>> can hide certain bugs?
>
> Just curious: how much space would it add if drivers were linked
> into libGL.so ? (IOW: how much redundancy is there between the
> individual *_dri.so's ?)
>
> --mtx
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] src: replace RTLD_NOW with RTLD_LAZY

2016-08-06 Thread Rob Clark

On Sat, Aug 6, 2016 at 3:01 AM, Eric Anholt <e...@anholt.net> wrote:
> Rob Clark <robdcl...@gmail.com> writes:
>
>> On Fri, Aug 5, 2016 at 8:42 PM, Jan Ziak <0xe2.0x9a.0...@gmail.com> wrote:
>>> Mesa source code prior to this patch uses both RTLD_NOW and RTLD_LAZY.
>>> This patch removes all RTLD_NOW in favor of RTLD_LAZY.
>>>
>>> In comparison to early binding, lazy binding reduces CPU instruction count
>>> of small GL apps (e.g: glxinfo) by 6 million instructions.
>>> Larger apps won't notice the difference.
>>
>> tbh, I don't know the background of existing places that use RTLD_LAZY
>> instead of RTLD_NOW (but my experience w/ xserver using LAZY has not
>> been positive, so I think going the other direction seems like a good
>> idea).. But I'm not sure that optimizing for glxinfo is the best goal.
>> I know that at least for freedreno a lot of the startup time for small
>> real gl apps (ie. something that mostly matters for piglit runs) goes
>> to constructing regalloc interference graph..  maybe there is some way
>> to leverage what is being done for on-disk shader cache to cache some
>> of this up-front work and make a meaningful reduction in startup cost
>> for things that actually do a bit more than glxinfo.  (Plus speeding
>> up piglit runs is actually a real world benefit..)
>
> I do think that RTLD_LAZY makes sense, and there's no reason to waste
> the CPU time if we don't need it.  If nothing else, we all run a lot of
> piglit processes that all create contexts.  As far as "what if there are
> unresolved symbols or something?", I think if we have symbols not being
> covered by piglit even once, we've already lost.

well, for something like shader_runner, I wonder if there is some way
to tell what % of symbols actually get resolved?  Maybe it is lower
than I was expecting.

> For your regalloc, have you looked at i965's direct q value calculation
> in brw_fs_reg_allocate.cpp?  That might save you a ton of time.  That
> said, I was skimming a paper recently that seemed to be saying that if
> you can assume a not-completely-general set of register classes, you can
> do the equivalent of the pq test without the giant table.

I do actually compute the q values, like i965.  I do have more regs
(but have restricted things to fewer classes).  Oh, and a bunch of
half-precision regs too, but fewer classes there since I need to use
full precision for args to texture sample instructions so that removes
a couple permutations.

Anyways, I haven't looked at it for a while, but probably just comes
down to overhead being more noticeable on slower devices ;-)

I wouldn't mind having a look at that paper if you can find it again.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] src: replace RTLD_NOW with RTLD_LAZY

2016-08-05 Thread Rob Clark

On Fri, Aug 5, 2016 at 8:42 PM, Jan Ziak <0xe2.0x9a.0...@gmail.com> wrote:
> Mesa source code prior to this patch uses both RTLD_NOW and RTLD_LAZY.
> This patch removes all RTLD_NOW in favor of RTLD_LAZY.
>
> In comparison to early binding, lazy binding reduces CPU instruction count
> of small GL apps (e.g: glxinfo) by 6 million instructions.
> Larger apps won't notice the difference.

tbh, I don't know the background of existing places that use RTLD_LAZY
instead of RTLD_NOW (but my experience w/ xserver using LAZY has not
been positive, so I think going the other direction seems like a good
idea).. But I'm not sure that optimizing for glxinfo is the best goal.
I know that at least for freedreno a lot of the startup time for small
real gl apps (ie. something that mostly matters for piglit runs) goes
to constructing regalloc interference graph..  maybe there is some way
to leverage what is being done for on-disk shader cache to cache some
of this up-front work and make a meaningful reduction in startup cost
for things that actually do a bit more than glxinfo.  (Plus speeding
up piglit runs is actually a real world benefit..)

BR,
-R


> Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0...@gmail.com>
> ---
>  src/egl/drivers/dri2/egl_dri2.c |  6 +++---
>  src/gbm/backends/dri/gbm_dri.c  |  6 +++---
>  src/glx/apple/apple_cgl.c   |  2 +-
>  src/glx/dri_common.c| 10 +-
>  4 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
> index a5cab68..3c9f58e 100644
> --- a/src/egl/drivers/dri2/egl_dri2.c
> +++ b/src/egl/drivers/dri2/egl_dri2.c
> @@ -449,12 +449,12 @@ dri2_open_driver(_EGLDisplay *disp)
>  #if GLX_USE_TLS
>snprintf(path, sizeof path,
>"%.*s/tls/%s_dri.so", len, p, dri2_dpy->driver_name);
> -  dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
> +  dri2_dpy->driver = dlopen(path, RTLD_LAZY | RTLD_GLOBAL);
>  #endif
>if (dri2_dpy->driver == NULL) {
>  snprintf(path, sizeof path,
>   "%.*s/%s_dri.so", len, p, dri2_dpy->driver_name);
> -dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
> +dri2_dpy->driver = dlopen(path, RTLD_LAZY | RTLD_GLOBAL);
>  if (dri2_dpy->driver == NULL)
> _eglLog(_EGL_DEBUG, "failed to open %s: %s\n", path, dlerror());
>}
> @@ -464,7 +464,7 @@ dri2_open_driver(_EGLDisplay *disp)
>
>  #ifdef ANDROID
>snprintf(path, sizeof path, "%.*s/gallium_dri.so", len, p);
> -  dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
> +  dri2_dpy->driver = dlopen(path, RTLD_LAZY | RTLD_GLOBAL);
>if (dri2_dpy->driver == NULL)
>   _eglLog(_EGL_DEBUG, "failed to open %s: %s\n", path, dlerror());
>else
> diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
> index c3626e3..a1921b7 100644
> --- a/src/gbm/backends/dri/gbm_dri.c
> +++ b/src/gbm/backends/dri/gbm_dri.c
> @@ -332,12 +332,12 @@ dri_open_driver(struct gbm_dri_device *dri)
>  #if GLX_USE_TLS
>snprintf(path, sizeof path,
> "%.*s/tls/%s_dri.so", len, p, dri->base.driver_name);
> -  dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
> +  dri->driver = dlopen(path, RTLD_LAZY| RTLD_GLOBAL);
>  #endif
>if (dri->driver == NULL) {
>   snprintf(path, sizeof path,
>"%.*s/%s_dri.so", len, p, dri->base.driver_name);
> - dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
> + dri->driver = dlopen(path, RTLD_LAZY | RTLD_GLOBAL);
>}
>/* not need continue to loop all paths once the driver is found */
>if (dri->driver != NULL)
> @@ -345,7 +345,7 @@ dri_open_driver(struct gbm_dri_device *dri)
>
>  #ifdef ANDROID
>snprintf(path, sizeof path, "%.*s/gallium_dri.so", len, p);
> -  dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
> +  dri->driver = dlopen(path, RTLD_LAZY | RTLD_GLOBAL);
>if (dri->driver == NULL)
>   sprintf("failed to open %s: %s\n", path, dlerror());
>else
> diff --git a/src/glx/apple/apple_cgl.c b/src/glx/apple/apple_cgl.c
> index 648ed86..a458d34 100644
> --- a/src/glx/apple/apple_cgl.c
> +++ b/src/glx/apple/apple_cgl.c
> @@ -75,7 +75,7 @@ apple_cgl_init(void)
> }
>
> (void) dlerror();/*drain dlerror */
> -   h = dlopen(opengl_framework_path, RTLD_NOW);
> +   h = dlopen(opengl_framework_path, RTLD_LAZY);
>
> if (NULL == h) {
>fprintf(stderr, "error: unable to dlopen %s : %s\n",
> diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
> index 6728d38..c7b7562 100644
> --- a/src/glx/dri_common.c
> +++ b/src/glx/dri_common.c
> @@ -42,8 +42,8 @@
>  #include "dri_common.h"
>  #include "loader.h"
>
> -#ifndef RTLD_NOW
> -#define RTLD_NOW 0
> +#ifndef RTLD_LAZY
> +#define RTLD_LAZY 0
>  #endif
>  #ifndef RTLD_GLOBAL
>  #define RTLD_GLOBAL 0
> @@ -103,7 +103,7 @@

Re: [Mesa-dev] dead code

2016-08-03 Thread Rob Clark

On Wed, Aug 3, 2016 at 2:48 PM, Rob Clark <robdcl...@gmail.com> wrote:
> On Wed, Aug 3, 2016 at 2:18 PM, Enrico Weigelt, metux IT consult
> <enrico.weig...@gr13.net> wrote:
>> On 03.08.2016 13:25, Rob Clark wrote:
>>
>>> Probably it would be on a case-by-case basis.  There are at least a
>>> few places with some useful debug code, ie. not the kind that you'd
>>> normally enable, but stuff you'd want if you were making changes in
>>> those areas..
>>
>> In those cases, shouldn't we instead introduce proper #define's ?
>
> At least in the cases I am thinking of, I'm not sure I really see the
> value in that..  it isn't really stuff I'd ask a user to turn on.  (If
> it came to that, I'd just ask the user to send me an apitrace so that
> I could debug, and possibly change some #if 0 to #if 1, and/or add
> other debug code in the process.)
>
> There is a DEBUG define that is enabled for --enable-debug builds, but
> most of the #if 0 debug code I've seen is stuff that is either too
> much overhead, or too much printf spam for debug builds.
>
> Anyways, I'm not saying there aren't some #if 0's that could be
> removed.. some look relatively trivial and easy enough to recreate.
> And/or useless.  I'm just saying we shouldn't blindly remove them all.
>
> And I wouldn't be surprised if there were some '#ifdef SOMETHING's
> that are actually worth removing.

just fyi, as far as clean-up tasks, fixing coverity[1] issues is a
good thing.. it does require requesting access, since I guess by
default the issues are not visible without being granted access (which
in more security sensitive projects, at least, makes sense)

[1] https://scan.coverity.com/projects/mesa
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] dead code

2016-08-03 Thread Rob Clark

On Wed, Aug 3, 2016 at 2:18 PM, Enrico Weigelt, metux IT consult
<enrico.weig...@gr13.net> wrote:
> On 03.08.2016 13:25, Rob Clark wrote:
>
>> Probably it would be on a case-by-case basis.  There are at least a
>> few places with some useful debug code, ie. not the kind that you'd
>> normally enable, but stuff you'd want if you were making changes in
>> those areas..
>
> In those cases, shouldn't we instead introduce proper #define's ?

At least in the cases I am thinking of, I'm not sure I really see the
value in that..  it isn't really stuff I'd ask a user to turn on.  (If
it came to that, I'd just ask the user to send me an apitrace so that
I could debug, and possibly change some #if 0 to #if 1, and/or add
other debug code in the process.)

There is a DEBUG define that is enabled for --enable-debug builds, but
most of the #if 0 debug code I've seen is stuff that is either too
much overhead, or too much printf spam for debug builds.

Anyways, I'm not saying there aren't some #if 0's that could be
removed.. some look relatively trivial and easy enough to recreate.
And/or useless.  I'm just saying we shouldn't blindly remove them all.

And I wouldn't be surprised if there were some '#ifdef SOMETHING's
that are actually worth removing.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] dead code

2016-08-03 Thread Rob Clark

On Wed, Aug 3, 2016 at 5:00 AM, Enrico Weigelt, metux IT consult
 wrote:
> Hi folks,
>
>
> I've seen quite a lot of #if 0's - looks like dead code.
> Should we remove that ?
>

Probably it would be on a case-by-case basis.  There are at least a
few places with some useful debug code, ie. not the kind that you'd
normally enable, but stuff you'd want if you were making changes in
those areas..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ttn: Make FRAG_RESULT_DEPTH be a float variable to match gtn and ptn.

2016-08-01 Thread Rob Clark

On Mon, Aug 1, 2016 at 1:45 PM, Eric Anholt  wrote:
> This lets TTN-using drivers handle FRAG_RESULT_DEPTH the same between all
> their source paths.

lgtm.. and nice to lose an extra tgsi hack ;-)

r-b


> ---
>  src/gallium/auxiliary/nir/tgsi_to_nir.c  | 19 ---
>  src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c |  6 --
>  src/gallium/drivers/freedreno/ir3/ir3_shader.c   |  1 -
>  src/gallium/drivers/vc4/vc4_program.c|  2 +-
>  4 files changed, 17 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
> b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index 65eca6f9e864..3d80ef06f13d 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -1918,9 +1918,22 @@ ttn_add_output_stores(struct ttn_compile *c)
>   nir_intrinsic_instr *store =
>  nir_intrinsic_instr_create(b->shader, 
> nir_intrinsic_store_output);
>   unsigned loc = var->data.driver_location + i;
> - store->num_components = 4;
> - store->src[0].reg.reg = c->output_regs[loc].reg;
> - store->src[0].reg.base_offset = c->output_regs[loc].offset;
> +
> + nir_src src = nir_src_for_reg(c->output_regs[loc].reg);
> + src.reg.base_offset = c->output_regs[loc].offset;
> +
> + if (c->build.shader->stage == MESA_SHADER_FRAGMENT &&
> + var->data.location == FRAG_RESULT_DEPTH) {
> +/* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
> + * NIR uses a single float FRAG_RESULT_DEPTH.
> + */
> +src = nir_src_for_ssa(nir_channel(b, nir_ssa_for_src(b, src, 4), 
> 2));
> +store->num_components = 1;
> + } else {
> +store->num_components = 4;
> + }
> + store->src[0] = src;
> +
>   nir_intrinsic_set_base(store, loc);
>   nir_intrinsic_set_write_mask(store, 0xf);
>   store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 
> b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> index 14d5e50f992a..083ba00c7099 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
> @@ -2489,12 +2489,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
> /* fixup input/outputs: */
> for (i = 0; i < so->outputs_count; i++) {
> so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
> -   /* preserve hack for depth output.. tgsi writes depth to .z,
> -* but what we give the hw is the scalar register:
> -*/
> -   if (so->shader->from_tgsi && (so->type == SHADER_FRAGMENT) &&
> -   (so->outputs[i].slot == FRAG_RESULT_DEPTH))
> -   so->outputs[i].regid += 2;
> }
>
> /* Note that some or all channels of an input may be unused: */
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c 
> b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
> index 87bdcf91a674..ac48132026c4 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
> @@ -294,7 +294,6 @@ ir3_shader_create(struct ir3_compiler *compiler,
> tgsi_dump(cso->tokens, 0);
> }
> nir = ir3_tgsi_to_nir(cso->tokens);
> -   shader->from_tgsi = true;
> }
> /* do first pass optimization, ignoring the key: */
> shader->nir = ir3_optimize_nir(shader, nir, NULL);
> diff --git a/src/gallium/drivers/vc4/vc4_program.c 
> b/src/gallium/drivers/vc4/vc4_program.c
> index 0afd8c6e36eb..a6507181 100644
> --- a/src/gallium/drivers/vc4/vc4_program.c
> +++ b/src/gallium/drivers/vc4/vc4_program.c
> @@ -1223,7 +1223,7 @@ emit_frag_end(struct vc4_compile *c)
>  if (c->output_position_index != -1) {
>  qir_FTOI_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
>qir_FMUL(c,
> -   
> c->outputs[c->output_position_index + 2],
> +   
> c->outputs[c->output_position_index],
> qir_uniform_f(c, 
> 0xff)))->cond = discard_cond;
>  } else {
>  qir_MOV_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
> --
> 2.8.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] freedreno/a2xx: fix comparison out of range warnings

2016-08-01 Thread Rob Clark

On Mon, Aug 1, 2016 at 8:10 AM, Eric Engestrom
 wrote:
> On Sat, Jul 30, 2016 at 09:49:57AM +0200, Francesco Ansanelli wrote:
>> Signed-off-by: Francesco Ansanelli 
>> ---
>>  src/gallium/drivers/freedreno/a2xx/fd2_screen.c |8 
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 
>> b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>> index fe4849b..007b9e6 100644
>> --- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>> @@ -61,7 +61,7 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
>>
>>   if ((usage & (PIPE_BIND_SAMPLER_VIEW |
>>   PIPE_BIND_VERTEX_BUFFER)) &&
>> - (fd2_pipe2surface(format) != ~0u)) {
>> + (fd2_pipe2surface(format) != (enum 
>> a2xx_sq_surfaceformat)~0)) {
>
> You said the compiler warning goes away, but is the condition guaranteed
> to hit? I expect, with enums having varying size, that this cast will
> not always work. I agree with Rob Herring, adding the error value to the
> enum is better.

iirc, error value in enums caused some problems since it resulted in
some cases with enum values that are too many bits for the
corresponding bitfield in register and/or instructions.

BR,
-R

>>   retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
>>   PIPE_BIND_VERTEX_BUFFER);
>>   }
>> @@ -70,7 +70,7 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
>>   PIPE_BIND_DISPLAY_TARGET |
>>   PIPE_BIND_SCANOUT |
>>   PIPE_BIND_SHARED)) &&
>> - (fd2_pipe2color(format) != ~0u)) {
>> + (fd2_pipe2color(format) != (enum 
>> a2xx_colorformatx)~0)) {
>>   retval |= usage & (PIPE_BIND_RENDER_TARGET |
>>   PIPE_BIND_DISPLAY_TARGET |
>>   PIPE_BIND_SCANOUT |
>> @@ -78,12 +78,12 @@ fd2_screen_is_format_supported(struct pipe_screen 
>> *pscreen,
>>   }
>>
>>   if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
>> - (fd_pipe2depth(format) != ~0u)) {
>> + (fd_pipe2depth(format) != (enum 
>> adreno_rb_depth_format)~0)) {
>>   retval |= PIPE_BIND_DEPTH_STENCIL;
>>   }
>>
>>   if ((usage & PIPE_BIND_INDEX_BUFFER) &&
>> - (fd_pipe2index(format) != ~0u)) {
>> + (fd_pipe2index(format) != (enum pc_di_index_size)~0)) {
>>   retval |= PIPE_BIND_INDEX_BUFFER;
>>   }
>>
>> --
>> 1.7.9.5
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] virglrenderer regression in commit ad4f0f1941677c

2016-08-01 Thread Rob Clark

On Mon, Aug 1, 2016 at 1:30 AM, Marc-André Lureau  wrote:
>
> Hi
>
> - Original Message -
>> On Fri, Jul 29, 2016 at 3:57 PM, Rob Herring  wrote:
>> > Hi,
>> >
>> > This commit in virglrenderer causes a regression in Android for me.
>> > The parameters that get passed in are last_level = 8, width = 1. I'm
>> > not really sure if this is valid (I'm guessing there should be some
>> > min width?), or where I should be looking to fix this. Any ideas?
>>
>> what is the height?  last mip-map level is basically the number of
>> times you could half the width+height (round up to 1) until it is 1x1.
>> So something like 1x512 would have last_level=8.  (I might be off by
>> one right-shift..)
>>
>> Try this:
>>
>> - if (args->last_level > (floor(log2(MAX2(args->width, args->width))) + 1))
>> + if (args->last_level > (floor(log2(MAX2(args->width, args->height))) + 1))
>
> Oh, looks like what I wanted but I wouldn't see the typo even after 
> re-reading...
>
> please send a patch on the virgl mailing list.


tbh, I don't even have a virgl setup (psuedo-patch was just from
looking at cgit)..  so I'd be more than happy if someone else could
actually test that change and send a patch.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] u_vbuf: fix potentially bogus assert

2016-07-30 Thread Rob Clark

There are cases where we hit u_vbuf path due to alignment or pitch-
alignment restrictions, but for an output-format that u_vbuf does not
support translating (yet the driver does support natively).  In which
case we hit the memcpy() path and don't care that u_vbuf doesn't
understand it.

Fixes crash with debug build of mesa in:
dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.user_ptr_stride17_components2_quads1

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95000
Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/auxiliary/util/u_vbuf.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index 5b4e527..532e7c0 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -627,6 +627,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
for (i = 0; i < mgr->ve->count; i++) {
   struct translate_key *k;
   struct translate_element *te;
+  enum pipe_format output_format = mgr->ve->native_format[i];
   unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
   bit = 1 << vb_index;
 
@@ -644,7 +645,8 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
  }
   }
   assert(type < VB_NUM);
-  assert(translate_is_output_format_supported(mgr->ve->native_format[i]));
+  if (mgr->ve->ve[i].src_format != output_format)
+ assert(translate_is_output_format_supported(output_format));
   /*printf("velem=%i type=%i\n", i, type);*/
 
   /* Add the vertex element. */
@@ -657,7 +659,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
   te->input_buffer = vb_index;
   te->input_format = mgr->ve->ve[i].src_format;
   te->input_offset = mgr->ve->ve[i].src_offset;
-  te->output_format = mgr->ve->native_format[i];
+  te->output_format = output_format;
   te->output_offset = k->output_stride;
 
   k->output_stride += mgr->ve->native_format_size[i];
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] virglrenderer regression in commit ad4f0f1941677c

2016-07-30 Thread Rob Clark

On Fri, Jul 29, 2016 at 3:57 PM, Rob Herring  wrote:
> Hi,
>
> This commit in virglrenderer causes a regression in Android for me.
> The parameters that get passed in are last_level = 8, width = 1. I'm
> not really sure if this is valid (I'm guessing there should be some
> min width?), or where I should be looking to fix this. Any ideas?

what is the height?  last mip-map level is basically the number of
times you could half the width+height (round up to 1) until it is 1x1.
So something like 1x512 would have last_level=8.  (I might be off by
one right-shift..)

Try this:

- if (args->last_level > (floor(log2(MAX2(args->width, args->width))) + 1))
+ if (args->last_level > (floor(log2(MAX2(args->width, args->height))) + 1))

BR,
-R

> commit ad4f0f1941677c6cd78bcd14348cd99ae7dd7527
> Author: Marc-André Lureau 
> Date:   Tue Jan 19 14:37:50 2016 +0100
>
> renderer: reject large LOD values
>
> Or we could sit for a very long time in some further loops.
>
> Fix found thanks to american fuzzy lop.
>
> Signed-off-by: Marc-André Lureau 
>
> Thanks,
> Rob
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa] gallium/freedreno: move cast to avoid integer overflow

2016-07-29 Thread Rob Clark

On Fri, Jul 29, 2016 at 8:33 PM, Eric Engestrom  wrote:
> Previously, the bitshift would be performed on a simple int (32 bits on
> most systems), overflow, and then be cast to 64 bits.

and actually 32b on 100% of systems that had a2xx ;-)

I've picked this up locally and will push soon.. thanks

BR,
-R

> CovID: 1362461
> Signed-off-by: Eric Engestrom 
> ---
>  src/gallium/drivers/freedreno/a2xx/fd2_compiler.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c 
> b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
> index 6829544..39418fc 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
> @@ -402,10 +402,10 @@ add_src_reg(struct fd2_compile_context *ctx, struct 
> ir2_instruction *alu,
> swiz[3] = swiz_vals[src->SwizzleW];
> swiz[4] = '\0';
>
> -   if ((ctx->need_sync & (uint64_t)(1 << num)) &&
> +   if ((ctx->need_sync & ((uint64_t)1 << num)) &&
> !(flags & IR2_REG_CONST)) {
> alu->sync = true;
> -   ctx->need_sync &= ~(uint64_t)(1 << num);
> +   ctx->need_sync &= ~((uint64_t)1 << num);
> }
>
> return ir2_reg_create(alu, num, swiz, flags);
> --
> 2.9.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa] freedreno/a2xx: add missing `break` after error message

2016-07-29 Thread Rob Clark

hmm, these were actually intended to fall-thru to the 'x' case, just
to give the hw something sensible.  (Not that it is a case that should
ever happen.)  Maybe better to use unreachable()?

BR,
-R

On Fri, Jul 29, 2016 at 8:27 PM, Eric Engestrom  wrote:
> CovID: 1362455, 1362456, 1362457
> Signed-off-by: Eric Engestrom 
> ---
>  src/gallium/drivers/freedreno/a2xx/ir-a2xx.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c 
> b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
> index 2b62b3a..e2fea5d 100644
> --- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
> +++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
> @@ -536,6 +536,7 @@ static uint32_t reg_fetch_src_swiz(struct ir2_register 
> *reg, uint32_t n)
> switch (reg->swizzle[i]) {
> default:
> ERROR_MSG("invalid fetch src swizzle: %s", 
> reg->swizzle);
> +   break;
> case 'x': swiz |= 0x0; break;
> case 'y': swiz |= 0x1; break;
> case 'z': swiz |= 0x2; break;
> @@ -562,6 +563,7 @@ static uint32_t reg_fetch_dst_swiz(struct ir2_register 
> *reg)
> switch (reg->swizzle[i]) {
> default:
> ERROR_MSG("invalid dst swizzle: %s", 
> reg->swizzle);
> +   break;
> case 'x': swiz |= 0x0; break;
> case 'y': swiz |= 0x1; break;
> case 'z': swiz |= 0x2; break;
> @@ -622,6 +624,7 @@ static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
> switch (reg->swizzle[i]) {
> default:
> ERROR_MSG("invalid vector src swizzle: %s", 
> reg->swizzle);
> +   break;
> case 'x': swiz |= (0x0 - i) & 0x3; break;
> case 'y': swiz |= (0x1 - i) & 0x3; break;
> case 'z': swiz |= (0x2 - i) & 0x3; break;
> --
> 2.9.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa] freedreno/a2xx: remove duplicate assignment

2016-07-29 Thread Rob Clark

thanks, applied locally and will push in next day or two..

BR,
-R

On Fri, Jul 29, 2016 at 8:23 PM, Eric Engestrom  wrote:
> CovID: 1362445, 1362446
> Signed-off-by: Eric Engestrom 
> ---
>  src/gallium/drivers/freedreno/a2xx/ir-a2xx.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h 
> b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h
> index 822e5ec..36ed204 100644
> --- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h
> +++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h
> @@ -158,7 +158,7 @@ static inline struct ir2_instruction *
>  ir2_instr_create_vtx_fetch(struct ir2_cf *cf, int ci, int cis,
> enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride)
>  {
> -   struct ir2_instruction *instr = instr = ir2_instr_create(cf, 
> IR2_FETCH);
> +   struct ir2_instruction *instr = ir2_instr_create(cf, IR2_FETCH);
> instr->fetch.opc = VTX_FETCH;
> instr->fetch.const_idx = ci;
> instr->fetch.const_idx_sel = cis;
> @@ -170,7 +170,7 @@ ir2_instr_create_vtx_fetch(struct ir2_cf *cf, int ci, int 
> cis,
>  static inline struct ir2_instruction *
>  ir2_instr_create_tex_fetch(struct ir2_cf *cf, int ci)
>  {
> -   struct ir2_instruction *instr = instr = ir2_instr_create(cf, 
> IR2_FETCH);
> +   struct ir2_instruction *instr = ir2_instr_create(cf, IR2_FETCH);
> instr->fetch.opc = TEX_FETCH;
> instr->fetch.const_idx = ci;
> return instr;
> --
> 2.9.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: fix uninitialized instance variable

2016-07-29 Thread Rob Clark

thanks, pushed

On Fri, Jul 29, 2016 at 7:56 AM, Jan Ziak <0xe2.0x9a.0...@gmail.com> wrote:
> Valgrind detected that variable ir_copy_propagation_visitor::killed_all
> is uninitialized.
>
> Signed-off-by: Jan Ziak (http://atom-symbol.net) <0xe2.0x9a.0...@gmail.com>
> ---
>  src/compiler/glsl/opt_copy_propagation.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
> b/src/compiler/glsl/opt_copy_propagation.cpp
> index 807ba8f..443905d 100644
> --- a/src/compiler/glsl/opt_copy_propagation.cpp
> +++ b/src/compiler/glsl/opt_copy_propagation.cpp
> @@ -76,6 +76,7 @@
>mem_ctx = ralloc_context(0);
>this->acp = new(mem_ctx) exec_list;
>this->kills = new(mem_ctx) exec_list;
> +  killed_all = false;
> }
> ~ir_copy_propagation_visitor()
> {
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] freedreno/ir3: Add missing braces in initializer

2016-07-29 Thread Rob Clark

yes please

BR,
-R

On Fri, Jul 29, 2016 at 1:37 AM, Francesco Ansanelli
<franci...@gmail.com> wrote:
> I did a test with:
>
> static struct ir3_shader_key key;
> memset(, 0, sizeof(key));
>
> both gcc and clang seems happy with it.
> Let me know if you want me to send a patch.
>
>
> 2016-07-29 1:53 GMT+02:00 Rob Clark <robdcl...@gmail.com>:
>>
>> On Thu, Jul 28, 2016 at 6:44 PM, Rob Herring <r...@kernel.org> wrote:
>> > On Thu, Jul 28, 2016 at 1:07 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> >> tbh, I haven't used anything as ancient as 4.6 in a while.. these days
>> >> I'm using 6.1 and even with 5.x I don't remember seeing that warning.
>> >
>> > FWIW, clang 3.8 throws the warning too.
>> >
>>
>> I think that since it is only that one spot (which, btw, is only hit
>> w/ FD_MESA_DEBUG=shaderdb, ie. only for running shaderdb tests), we
>> could also go old-school and just memset() it..
>>
>> BR,
>> -R
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] freedreno/a2xx: Fix sign compare warnings

2016-07-28 Thread Rob Clark

On Thu, Jul 28, 2016 at 6:36 PM, Rob Herring  wrote:
> On Thu, Jun 30, 2016 at 12:16 PM, Francesco Ansanelli
>  wrote:
>> ---
>>  src/gallium/drivers/freedreno/a2xx/fd2_screen.c |8 
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 
>> b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>> index c2baa6f..fe4849b 100644
>> --- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
>> @@ -61,7 +61,7 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
>>
>> if ((usage & (PIPE_BIND_SAMPLER_VIEW |
>> PIPE_BIND_VERTEX_BUFFER)) &&
>> -   (fd2_pipe2surface(format) != ~0)) {
>> +   (fd2_pipe2surface(format) != ~0u)) {
>
> FYI, all these still warn with LLVM, but now the warning is about
> being out of range:
>
> external/mesa3d/src/gallium/drivers/freedreno/a2xx/fd2_screen.c:64:30:
> warning: comparison of constant 4294967295 with expression of type
> 'enum a2xx_sq_surfaceformat' is always true
> [-Wtautological-constant-out-of-range-compare]
> (fd2_pipe2surface(format) != ~0u)) {
>   ^  ~~~

bleh.. I guess it depends on whether enums are signed or unsigned?

maybe (enum a2xx_sq_surfaceformat)~0 then?

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] freedreno/ir3: Add missing braces in initializer

2016-07-28 Thread Rob Clark

On Thu, Jul 28, 2016 at 6:44 PM, Rob Herring <r...@kernel.org> wrote:
> On Thu, Jul 28, 2016 at 1:07 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> tbh, I haven't used anything as ancient as 4.6 in a while.. these days
>> I'm using 6.1 and even with 5.x I don't remember seeing that warning.
>
> FWIW, clang 3.8 throws the warning too.
>

I think that since it is only that one spot (which, btw, is only hit
w/ FD_MESA_DEBUG=shaderdb, ie. only for running shaderdb tests), we
could also go old-school and just memset() it..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] freedreno/ir3: Add missing braces in initializer

2016-07-28 Thread Rob Clark

tbh, I haven't used anything as ancient as 4.6 in a while.. these days
I'm using 6.1 and even with 5.x I don't remember seeing that warning.

Maybe we can just drop -Wmissing-field-initializers??

BR,
-R

On Thu, Jul 28, 2016 at 1:16 PM, Francesco Ansanelli
<franci...@gmail.com> wrote:
> Hi,
>
> with my version of the compiler:
>
> gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
>
> and original code ({0}) I get:
>
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: missing
> braces around initializer [-Wmissing-braces]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: (near
> initialization for ‘key.’) [-Wmissing-braces]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: missing
> initializer [-Wmissing-field-initializers]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: (near
> initialization for ‘key...has_per_samp’)
> [-Wmissing-field-initializers]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: missing
> initializer [-Wmissing-field-initializers]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: (near
> initialization for ‘key.vsaturate_s’) [-Wmissing-field-initializers]
>
> with the change suggested by Eric ({{{0}}}) I get:
>
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: missing
> initializer [-Wmissing-field-initializers]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: (near
> initialization for ‘key...has_per_samp’)
> [-Wmissing-field-initializers]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: missing
> initializer [-Wmissing-field-initializers]
> src/gallium/drivers/freedreno/ir3/ir3_shader.c:312:17: warning: (near
> initialization for ‘key.vsaturate_s’) [-Wmissing-field-initializers]
>
> With my latest suggestion no warnings on that row, neither from clang:
>
> Ubuntu clang version 3.4-1ubuntu3~precise2 (tags/RELEASE_34/final) (based on
> LLVM 3.4)
>
> Up to you how to proceed..
>
> Cheers
> Francesco
>
>
> 2016-07-28 12:54 GMT+02:00 Rob Clark <robdcl...@gmail.com>:
>>
>> On Thu, Jul 28, 2016 at 6:31 AM, Emil Velikov <emil.l.veli...@gmail.com>
>> wrote:
>> > On 28 July 2016 at 09:21, Eric Engestrom <eric.engest...@imgtec.com>
>> > wrote:
>> >> On Wed, Jul 27, 2016 at 07:37:56PM +0200, Francesco Ansanelli wrote:
>> >>> Hello,
>> >>>
>> >>> unluckily this didn't fix the warning...
>> >>> I've done some more compile testing and found this solution:
>> >>>
>> >>> - static struct ir3_shader_key key = {{0}};
>> >>> +static struct ir3_shader_key key = {{{0, 0, 0, 0, 0, 0, 0, 0}}, 0, 0,
>> >>> 0,
>> >>> 0, 0, 0, 0, 0};
>> >>
>> >> So there are 3 levels of structs? The proper fix IMHO should then be
>> >> {{{0}}}, as the rest of the 0's are implied.
>> >> Can you test if that fixes it?
>> >>
>> > The original code is perfect (follows the spec) as-is. There's been a
>> > number of gcc bugs on the topic, hence why sometimes people prefer
>> > memset.
>> > Which one people will opt for is quite subjective but I'd rather leave
>> > the call to Rob.
>>
>> I'm leaning towards just going back to {0}, since at least gcc (at
>> least modern versions) didn't seem to complain about that, but do
>> complain about {{0}}
>>
>> BR,
>> -R
>>
>> > -Emil
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] freedreno/ir3: Add missing braces in initializer

2016-07-28 Thread Rob Clark

On Thu, Jul 28, 2016 at 6:31 AM, Emil Velikov  wrote:
> On 28 July 2016 at 09:21, Eric Engestrom  wrote:
>> On Wed, Jul 27, 2016 at 07:37:56PM +0200, Francesco Ansanelli wrote:
>>> Hello,
>>>
>>> unluckily this didn't fix the warning...
>>> I've done some more compile testing and found this solution:
>>>
>>> - static struct ir3_shader_key key = {{0}};
>>> +static struct ir3_shader_key key = {{{0, 0, 0, 0, 0, 0, 0, 0}}, 0, 0, 0,
>>> 0, 0, 0, 0, 0};
>>
>> So there are 3 levels of structs? The proper fix IMHO should then be
>> {{{0}}}, as the rest of the 0's are implied.
>> Can you test if that fixes it?
>>
> The original code is perfect (follows the spec) as-is. There's been a
> number of gcc bugs on the topic, hence why sometimes people prefer
> memset.
> Which one people will opt for is quite subjective but I'd rather leave
> the call to Rob.

I'm leaning towards just going back to {0}, since at least gcc (at
least modern versions) didn't seem to complain about that, but do
complain about {{0}}

BR,
-R

> -Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/9] st/mesa: completely rewrite state atoms

2016-07-25 Thread Rob Clark

On Mon, Jul 25, 2016 at 1:19 PM, Marek Olšák <mar...@gmail.com> wrote:
> On Mon, Jul 25, 2016 at 5:42 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> On Mon, Jul 25, 2016 at 11:16 AM, Brian Paul <bri...@vmware.com> wrote:
>>> On 07/18/2016 07:11 AM, Marek Olšák wrote:
>>>>
>>>> @@ -183,49 +107,42 @@ static void check_attrib_edgeflag(struct st_context
>>>> *st)
>>>>
>>>>   void st_validate_state( struct st_context *st, enum st_pipeline pipeline
>>>> )
>>>>   {
>>>> -   const struct st_tracked_state **atoms;
>>>> -   struct st_state_flags *state;
>>>> -   GLuint num_atoms;
>>>> -   GLuint i;
>>>> +   uint64_t dirty, pipeline_mask;
>>>> +   uint32_t dirty_lo, dirty_hi;
>>>> +
>>>> +   /* Get Mesa driver state. */
>>>> +   st->dirty |= st->ctx->NewDriverState & ST_ALL_STATES_MASK;
>>>> +   st->ctx->NewDriverState = 0;
>>>>
>>>>  /* Get pipeline state. */
>>>>  switch (pipeline) {
>>>> -case ST_PIPELINE_RENDER:
>>>> -  atoms = render_atoms;
>>>> -  num_atoms = ARRAY_SIZE(render_atoms);
>>>> -  state = >dirty;
>>>> +   case ST_PIPELINE_RENDER:
>>>> +  check_attrib_edgeflag(st);
>>>> +  check_program_state(st);
>>>> +  st_manager_validate_framebuffers(st);
>>>> +
>>>> +  pipeline_mask = ST_PIPELINE_RENDER_STATE_MASK;
>>>> break;
>>>>  case ST_PIPELINE_COMPUTE:
>>>> -  atoms = compute_atoms;
>>>> -  num_atoms = ARRAY_SIZE(compute_atoms);
>>>> -  state = >dirty_cp;
>>>> +  pipeline_mask = ST_PIPELINE_COMPUTE_STATE_MASK;
>>>> break;
>>>>  default:
>>>> unreachable("Invalid pipeline specified");
>>>>  }
>>>>
>>>> -   /* Get Mesa driver state. */
>>>> -   st->dirty.st |= st->ctx->NewDriverState;
>>>> -   st->dirty_cp.st |= st->ctx->NewDriverState;
>>>> -   st->ctx->NewDriverState = 0;
>>>> -
>>>> -   if (pipeline == ST_PIPELINE_RENDER) {
>>>> -  check_attrib_edgeflag(st);
>>>> -
>>>> -  check_program_state(st);
>>>> -
>>>> -  st_manager_validate_framebuffers(st);
>>>> -   }
>>>> -
>>>> -   if (state->st == 0 && state->mesa == 0)
>>>> +   dirty = st->dirty & pipeline_mask;
>>>> +   if (!dirty)
>>>> return;
>>>>
>>>> -   /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/
>>>> +   dirty_lo = dirty;
>>>> +   dirty_hi = dirty >> 32;
>>>>
>>>> -   for (i = 0; i < num_atoms; i++) {
>>>> -  if (check_state(state, [i]->dirty))
>>>> - atoms[i]->update( st );
>>>> -   }
>>>> +   /* Update states. */
>>>> +   while (dirty_lo)
>>>> +  atoms[u_bit_scan(_lo)]->update(st);
>>>> +   while (dirty_hi)
>>>> +  atoms[32 + u_bit_scan(_hi)]->update(st);
>>>>
>>>
>>> Could we just use the u_bit_scan64() function and void the hi/lo split?
>>
>> fwiw, we actually did discuss that on irc, but I guess no one
>> summarized on email thread..
>>
>> Marek's concern was that would generate worse code on 32b since it
>> would pull the right-shift into the loop.
>>
>> I'm not entirely sure if gcc would be clever enough in this case or
>> not.  I guess someone needs to compare generated asm in both cases.
>> And either use u_bit_scan64() if the compiler is clever enough, or add
>> a comment explaining the reason.
>
> Yeah, I added this comment before the loops:
> "Don't use u_bit_scan64, it may be slower on 32-bit."
>
> On 32-bit, ffsll is an if-then-else expression with some arithmetic
> and shifting one bit to the left is another if-then-else expression.

fwiw, I did spend a bit of time this evening playing around with this,
and the dirty_hi/dirty_lo approach w/ 32b/i686 build works out to be
something like 12 instructions shorter for the loop body, ie. gcc
isn't clever enough (total instruction count increases by doubling the
loops but I think that doesn't matter)..  given that this is a hot
spot in profiles that I've looked at, it might even be worth having
some #ifdef 64b / #else.. but ofc that could be left as a future
exercise if someone cares..  either way, r-b

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] vc4: add hash table look-up for exported dmabufs

2016-07-25 Thread Rob Clark

On Mon, Jul 25, 2016 at 8:47 PM, Eric Anholt  wrote:
> Rob Herring  writes:
>
>> It is necessary to reuse existing BOs when dmabufs are imported. There
>> are 2 cases that need to be handled. dmabufs can be created/exported and
>> imported by the same process and can be imported multiple times.
>> Copying other drivers, add a hash table to track exported BOs so the
>> BOs get reused.
>>
>> Cc: Eric Anholt 
>> Signed-off-by: Rob Herring 
>
> Looks good to me, other than a bit of funny whitespace that I'll fix up.
> I built a piglit test for this today (want to go take a look at those?),
> and once I get a piglit run through, I'll push the change.

I don't suppose you have a branch somewhere?  I should probably give
that a try..  I've had
https://trello.com/c/x34U0kTQ/114-teach-piglit-about-libdrm-freedreno-for-dmabuf-tests
on my todo list for a while (but a generic gbm based solution seems
even better than teaching piglit about libdrm_$drivername ;-))

BR,
-R


> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/9] st/mesa: completely rewrite state atoms

2016-07-25 Thread Rob Clark

On Mon, Jul 25, 2016 at 11:16 AM, Brian Paul  wrote:
> On 07/18/2016 07:11 AM, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> The goal is to do this in st_validate_state:
>> while (dirty)
>>atoms[u_bit_scan()]->update(st);
>>
>> That implies that atoms can't specify which flags they consume.
>> There is exactly one ST_NEW_* flag for each atom. (58 flags in total)
>>
>> There are macros that combine multiple flags into one for easier use.
>>
>> All _NEW_* flags are translated into ST_NEW_* flags in
>> st_invalidate_state.
>> st/mesa doesn't keep the _NEW_* flags after that.
>>
>> torcs is 2% faster between the previous patch and the end of this series.
>> ---
>>   src/mesa/state_tracker/st_atom.c   | 153 +-
>>   src/mesa/state_tracker/st_atom.h   | 210
>> +
>>   src/mesa/state_tracker/st_atom_array.c |   4 -
>>   src/mesa/state_tracker/st_atom_atomicbuf.c |  24 ---
>>   src/mesa/state_tracker/st_atom_blend.c |   4 -
>>   src/mesa/state_tracker/st_atom_clip.c  |   4 -
>>   src/mesa/state_tracker/st_atom_constbuf.c  |  48 --
>>   src/mesa/state_tracker/st_atom_depth.c |   4 -
>>   src/mesa/state_tracker/st_atom_framebuffer.c   |   4 -
>>   src/mesa/state_tracker/st_atom_image.c |  24 ---
>>   src/mesa/state_tracker/st_atom_list.h  |  75 +
>>   src/mesa/state_tracker/st_atom_msaa.c  |   8 -
>>   src/mesa/state_tracker/st_atom_pixeltransfer.c |   4 -
>>   src/mesa/state_tracker/st_atom_rasterizer.c|  16 --
>>   src/mesa/state_tracker/st_atom_sampler.c   |   4 -
>>   src/mesa/state_tracker/st_atom_scissor.c   |   8 -
>>   src/mesa/state_tracker/st_atom_shader.c|  24 ---
>>   src/mesa/state_tracker/st_atom_stipple.c   |   5 -
>>   src/mesa/state_tracker/st_atom_storagebuf.c|  24 ---
>>   src/mesa/state_tracker/st_atom_tess.c  |   4 -
>>   src/mesa/state_tracker/st_atom_texture.c   |  24 ---
>>   src/mesa/state_tracker/st_atom_viewport.c  |   4 -
>>   src/mesa/state_tracker/st_cb_bitmap.c  |  10 +-
>>   src/mesa/state_tracker/st_cb_bufferobjects.c   |  10 +-
>>   src/mesa/state_tracker/st_cb_compute.c |   2 +-
>>   src/mesa/state_tracker/st_cb_feedback.c|   2 +-
>>   src/mesa/state_tracker/st_cb_program.c |  38 ++---
>>   src/mesa/state_tracker/st_cb_texture.c |   2 +-
>>   src/mesa/state_tracker/st_context.c| 100 ++--
>>   src/mesa/state_tracker/st_context.h|  42 +
>>   src/mesa/state_tracker/st_draw.c   |   4 +-
>>   src/mesa/state_tracker/st_manager.c|   4 +-
>>   32 files changed, 377 insertions(+), 516 deletions(-)
>>   create mode 100644 src/mesa/state_tracker/st_atom_list.h
>>
>> diff --git a/src/mesa/state_tracker/st_atom.c
>> b/src/mesa/state_tracker/st_atom.c
>> index 9d5cc0f..5843d2a 100644
>> --- a/src/mesa/state_tracker/st_atom.c
>> +++ b/src/mesa/state_tracker/st_atom.c
>> @@ -37,87 +37,18 @@
>>   #include "st_manager.h"
>>
>>
>> -/**
>> - * This is used to initialize st->render_atoms[].
>> - */
>> -static const struct st_tracked_state *render_atoms[] =
>> -{
>> -   _update_depth_stencil_alpha,
>> -   _update_clip,
>> -
>> -   _update_fp,
>> -   _update_gp,
>> -   _update_tep,
>> -   _update_tcp,
>> -   _update_vp,
>> -
>> -   _update_rasterizer,
>> -   _update_polygon_stipple,
>> -   _update_viewport,
>> -   _update_scissor,
>> -   _update_window_rectangles,
>> -   _update_blend,
>> -   _update_vertex_texture,
>> -   _update_fragment_texture,
>> -   _update_geometry_texture,
>> -   _update_tessctrl_texture,
>> -   _update_tesseval_texture,
>> -   _update_sampler, /* depends on update_*_texture for swizzle */
>> -   _bind_vs_images,
>> -   _bind_tcs_images,
>> -   _bind_tes_images,
>> -   _bind_gs_images,
>> -   _bind_fs_images,
>> -   _update_framebuffer, /* depends on update_*_texture and
>> bind_*_images */
>> -   _update_msaa,
>> -   _update_sample_shading,
>> -   _update_vs_constants,
>> -   _update_tcs_constants,
>> -   _update_tes_constants,
>> -   _update_gs_constants,
>> -   _update_fs_constants,
>> -   _bind_vs_ubos,
>> -   _bind_tcs_ubos,
>> -   _bind_tes_ubos,
>> -   _bind_fs_ubos,
>> -   _bind_gs_ubos,
>> -   _bind_vs_atomics,
>> -   _bind_tcs_atomics,
>> -   _bind_tes_atomics,
>> -   _bind_fs_atomics,
>> -   _bind_gs_atomics,
>> -   _bind_vs_ssbos,
>> -   _bind_tcs_ssbos,
>> -   _bind_tes_ssbos,
>> -   _bind_fs_ssbos,
>> -   _bind_gs_ssbos,
>> -   _update_pixel_transfer,
>> -   _update_tess,
>> -
>> -   /* this must be done after the vertex program update */
>> -   _update_array
>> -};
>> -
>> -
>> -/**
>> - * This is used to initialize st->compute_atoms[].
>> - */
>> -static const struct st_tracked_state *compute_atoms[] =
>> +/* The list state update functions. */
>> +static const struct st_tracked_state *atoms[] =
>>   {
>> -   _update_cp,
>> -

Re: [Mesa-dev] [PATCH 00/56] Die copy-and-paste code, die

2016-07-22 Thread Rob Clark

On Tue, Jul 19, 2016 at 3:24 PM, Ian Romanick  wrote:
> After seeing Dave's series to add support GL_ARB_shader_gpu_int64 and
> thinking about adding support for 8- and  16-bit integers, I decided
> that something had to be done about the cut-and-paste madness that is
> ir_constant_expression.cpp.  I decided to take a page from Jason's book
> and generate it from a machine description of the expressions.  The
> result is this series.

fwiw, I did go down the path of adding half-precision types in IR for
a spell, but ended up deciding it was a maddening explosion of
builtin-types and builtin-functions..

At some point (ie. might be a month or two or more before I get to
it), I was going to go back to my original idea of just keeping things
as-is in glsl IR (as type+precision-qualifier for <=32b types), and
dealing with it in glsl->nir.  Basically just need to add a
precision-visitor (which I think I already pushed on a branch
somewhere) to infer an expression's precision, and wire that up in a
few places (function inlining and various other places where
intermediate vars are created in ir), and mapping type+precision to
nir type in glsl_to_nir.

(Not an argument against autogenerating some of the IR bits..  I think
that would be a really good idea, I'm a much bigger fan of how some of
that is handled in NIR.. maybe there is some room to autogen some of
the builtin-types and builtin-functions, which might make me
reconsider..)

BR,
-R

> You may notice from some of the earlier patches in this series that I
> started this work over a year ago.  The previous work was an attempt to
> generate opt_algebraic.cpp which was ultimately abandonded.  It may be
> worth picking that up again.
>
> I haven't done *anything* for SCons, so hopefully Jose or someone can
> help out there.
>
> All of this is available at:
>
> https://cgit.freedesktop.org/~idr/mesa/log/?h=generated-glsl-ir
>
> Other possible follow-up work:
>
>  - A few expressions don't have constant evaluation support.  I don't
>think I've seen a real shader use any of these, so there's a reason
>we haven't "missed" them.
>
> - frexp_sig
> - frexp_exp
> - vote_any
> - vote_all
> - vote_eq
> - imul_high
> - carry
> - borrow
>
>  - Generate validation code for expressions.  A few times while
>developing this series I had questions about what the IR actually
>supported.  In quite a few cases the IR support is different from
>what GLSL supports.  I would often look to ir_validate.cpp to answer
>these questions, and I was almost always disappointed.  I would
>really like to see missing type information added to
>ir_expression_operation.py and generate a much more complete set of
>validations for ir_expression.  Then the assertions remaining in
>ir_expression_operation_constant.h could be removed.
>
>  - Generate ir_expression constructors.
>
>  - Generate ir_builder.  We don't support every possible expression in
>ir_builder, so this may not be worth the effort.
>
>  src/compiler/Android.glsl.gen.mk |2 +-
>  src/compiler/Makefile.glsl.am|   24 +-
>  src/compiler/Makefile.sources|8 +-
>  src/compiler/glsl/.gitignore |2 +
>  src/compiler/glsl/ir.cpp |  133 +-
>  src/compiler/glsl/ir.h   |  329 +
>  src/compiler/glsl/ir_constant_expression.cpp | 1332 ++
>  src/compiler/glsl/ir_expression_operation.py |  738 ++
>  src/compiler/glsl/ir_print_visitor.cpp   |2 +-
>  src/compiler/glsl/ir_validate.cpp|   11 +
>  src/compiler/glsl/lower_mat_op_to_vec.cpp|2 +-
>  src/mesa/Makefile.sources|1 +
>  src/mesa/drivers/dri/i965/Makefile.am|1 +
>  13 files changed, 901 insertions(+), 1684 deletions(-)
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-22 Thread Rob Clark

On Fri, Jul 22, 2016 at 4:39 AM, Vedran Miletić <ved...@miletic.net> wrote:
> On 07/22/2016 03:37 AM, Rob Clark wrote:
>>
>> On Thu, Jul 21, 2016 at 9:35 PM, Rob Clark <robdcl...@gmail.com> wrote:
>>>
>>> On Thu, Jul 21, 2016 at 1:48 PM, Vedran Miletić <ved...@miletic.net>
>>> wrote:
>>>>
>>>> LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
>>>> general practice, we should avoid using such generic names when it is
>>>> possible to do so.
>>>>
>>>> This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
>>>> and removes workarounds previously used to enable building Mesa with
>>>> LLVM (pop_macro() and push_macro() function calls).
>>>>
>>>> Please let me know if I missed any.
>>>
>>>
>>> I guess at least some in-flight patches (at least my
>>> pipe_mutex_assert_locked() patch, but I guess DEBUG is common enough
>>> that it might effect others).. not sure if there is a better way to
>>> deal with that without things falling through the cracks..  maybe
>>> introduce MESA_DEBUG which is the same as DEBUG first, and then a
>>> later patch to remove DEBUG.  Or at least including sed/etc rule to
>>> re-do the mass-change on a later baseline in the commit msg?
>>>
>>> I don't mind rebasing my patch, just more worried about things falling
>>> through the cracks with other in-progress stuff, since it seems like
>>> the end result would be a silent fail to enable intended debug code..
>>
>>
>> btw, possibly tilting at windmills here, but afaik we don't export
>> DEBUG outside the mesa codebase.. so actually it should be llvm that
>> s/DEBUG/LLVM_DEBUG/
>>
>> BR,
>> -R
>
>
> Regarding in-flight patches, I did this change manually ("it can't be that
> hard, right, there's just a bunch of them") but I suppose it could be
> scripted and I would prefer this approach to having both macros at the same
> time.

well, I wouldn't expect both macros to exist at the same time
forever.. and it would let you avoid the flag-day patch.  At any rate,
if the patch could be easily regenerated via sed or whatever, I guess
I'd be less concerned about that.

My main concern is that we silently lose some debug code.. for example
when backporting to release branches or rebasing work-in-progress
stuff, etc.  Not sure there is a way to catch that other than
follow-up audits.

> Regarding s/DEBUG/LLVM_DEBUG/, I understand the reasoning and agree that
> ideally LLVM should rename the macro and not export macros with generic
> names. However, to avoid potential future conflicts, Mesa should use
> non-generic macro names anyhow.

yeah, hence the 'tilting at windmills' comment..  mostly just
grumbling about how llvm is kind of a pita as a dependency ;-)

BR,
-R

> Regards,
> Vedran
>
> --
> Vedran Miletić
> vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-21 Thread Rob Clark

On Thu, Jul 21, 2016 at 9:35 PM, Rob Clark <robdcl...@gmail.com> wrote:
> On Thu, Jul 21, 2016 at 1:48 PM, Vedran Miletić <ved...@miletic.net> wrote:
>> LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
>> general practice, we should avoid using such generic names when it is
>> possible to do so.
>>
>> This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
>> and removes workarounds previously used to enable building Mesa with
>> LLVM (pop_macro() and push_macro() function calls).
>>
>> Please let me know if I missed any.
>
> I guess at least some in-flight patches (at least my
> pipe_mutex_assert_locked() patch, but I guess DEBUG is common enough
> that it might effect others).. not sure if there is a better way to
> deal with that without things falling through the cracks..  maybe
> introduce MESA_DEBUG which is the same as DEBUG first, and then a
> later patch to remove DEBUG.  Or at least including sed/etc rule to
> re-do the mass-change on a later baseline in the commit msg?
>
> I don't mind rebasing my patch, just more worried about things falling
> through the cracks with other in-progress stuff, since it seems like
> the end result would be a silent fail to enable intended debug code..

btw, possibly tilting at windmills here, but afaik we don't export
DEBUG outside the mesa codebase.. so actually it should be llvm that
s/DEBUG/LLVM_DEBUG/

BR,
-R

> BR,
> -R
>
>> Signed-off-by: Vedran Miletić <ved...@miletic.net>
>> ---
>>  configure.ac   |  2 +-
>>  src/compiler/glsl/ir_validate.cpp  |  4 +-
>>  src/compiler/nir/nir.h |  6 +-
>>  src/compiler/nir/nir_metadata.c|  4 +-
>>  src/compiler/nir/nir_validate.c|  5 +-
>>  src/egl/drivers/haiku/egl_haiku.cpp|  6 +-
>>  src/egl/main/eglconfig.c   |  6 +-
>>  src/gallium/auxiliary/draw/draw_cliptest_tmp.h |  4 +-
>>  src/gallium/auxiliary/gallivm/lp_bld_debug.h   | 12 ++--
>>  src/gallium/auxiliary/gallivm/lp_bld_init.c| 16 +++---
>>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 23 ++--
>>  src/gallium/auxiliary/gallivm/lp_bld_struct.c  | 16 +++---
>>  src/gallium/auxiliary/os/os_memory.h   |  6 +-
>>  src/gallium/auxiliary/os/os_misc.c |  4 +-
>>  src/gallium/auxiliary/os/os_misc.h |  6 +-
>>  .../auxiliary/pipebuffer/pb_buffer_fenced.c| 10 ++--
>>  src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |  6 +-
>>  src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 +++---
>>  src/gallium/auxiliary/tgsi/tgsi_ureg.c |  8 +--
>>  src/gallium/auxiliary/util/u_cache.c   | 16 +++---
>>  src/gallium/auxiliary/util/u_cpu_detect.c  |  8 +--
>>  src/gallium/auxiliary/util/u_debug.c   | 18 +++---
>>  src/gallium/auxiliary/util/u_debug.h   | 66 
>> +++---
>>  src/gallium/auxiliary/util/u_debug_flush.c |  4 +-
>>  src/gallium/auxiliary/util/u_debug_flush.h |  6 +-
>>  src/gallium/auxiliary/util/u_debug_image.c |  4 +-
>>  src/gallium/auxiliary/util/u_debug_image.h |  8 +--
>>  src/gallium/drivers/freedreno/ir3/ir3.c| 16 +++---
>>  src/gallium/drivers/freedreno/ir3/ir3.h| 18 +++---
>>  src/gallium/drivers/freedreno/ir3/ir3_print.c  |  4 +-
>>  src/gallium/drivers/freedreno/ir3/ir3_ra.c |  4 +-
>>  src/gallium/drivers/i915/i915_debug.c  |  6 +-
>>  src/gallium/drivers/i915/i915_debug.h  |  6 +-
>>  src/gallium/drivers/ilo/core/ilo_debug.h   | 17 +++---
>>  src/gallium/drivers/llvmpipe/lp_debug.h|  6 +-
>>  src/gallium/drivers/llvmpipe/lp_perf.h |  6 +-
>>  src/gallium/drivers/llvmpipe/lp_rast.c |  4 +-
>>  src/gallium/drivers/llvmpipe/lp_rast.h |  4 +-
>>  src/gallium/drivers/llvmpipe/lp_rast_priv.h|  6 +-
>>  src/gallium/drivers/llvmpipe/lp_scene.c|  4 +-
>>  src/gallium/drivers/llvmpipe/lp_screen.c   |  8 +--
>>  src/gallium/drivers/llvmpipe/lp_setup_line.c   |  4 +-
>>  src/gallium/drivers/llvmpipe/lp_setup_point.c  |  4 +-
>>  src/gallium/drivers/llvmpipe/lp_state_sampler.c|  4 +-
>>  src/gallium/drivers/llvmpipe/lp_test_main.c|  4 +-
>>  src/gallium/drivers/llvmpipe/lp_texture.c  | 24 
>>  src/gallium/drivers/llvmpipe/lp_texture.h  |  4 +-
>>  .../drivers/nouveau/codegen/nv50_ir_drive

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-21 Thread Rob Clark

On Thu, Jul 21, 2016 at 1:48 PM, Vedran Miletić  wrote:
> LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
> general practice, we should avoid using such generic names when it is
> possible to do so.
>
> This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
> and removes workarounds previously used to enable building Mesa with
> LLVM (pop_macro() and push_macro() function calls).
>
> Please let me know if I missed any.

I guess at least some in-flight patches (at least my
pipe_mutex_assert_locked() patch, but I guess DEBUG is common enough
that it might effect others).. not sure if there is a better way to
deal with that without things falling through the cracks..  maybe
introduce MESA_DEBUG which is the same as DEBUG first, and then a
later patch to remove DEBUG.  Or at least including sed/etc rule to
re-do the mass-change on a later baseline in the commit msg?

I don't mind rebasing my patch, just more worried about things falling
through the cracks with other in-progress stuff, since it seems like
the end result would be a silent fail to enable intended debug code..

BR,
-R

> Signed-off-by: Vedran Miletić 
> ---
>  configure.ac   |  2 +-
>  src/compiler/glsl/ir_validate.cpp  |  4 +-
>  src/compiler/nir/nir.h |  6 +-
>  src/compiler/nir/nir_metadata.c|  4 +-
>  src/compiler/nir/nir_validate.c|  5 +-
>  src/egl/drivers/haiku/egl_haiku.cpp|  6 +-
>  src/egl/main/eglconfig.c   |  6 +-
>  src/gallium/auxiliary/draw/draw_cliptest_tmp.h |  4 +-
>  src/gallium/auxiliary/gallivm/lp_bld_debug.h   | 12 ++--
>  src/gallium/auxiliary/gallivm/lp_bld_init.c| 16 +++---
>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 23 ++--
>  src/gallium/auxiliary/gallivm/lp_bld_struct.c  | 16 +++---
>  src/gallium/auxiliary/os/os_memory.h   |  6 +-
>  src/gallium/auxiliary/os/os_misc.c |  4 +-
>  src/gallium/auxiliary/os/os_misc.h |  6 +-
>  .../auxiliary/pipebuffer/pb_buffer_fenced.c| 10 ++--
>  src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |  6 +-
>  src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 +++---
>  src/gallium/auxiliary/tgsi/tgsi_ureg.c |  8 +--
>  src/gallium/auxiliary/util/u_cache.c   | 16 +++---
>  src/gallium/auxiliary/util/u_cpu_detect.c  |  8 +--
>  src/gallium/auxiliary/util/u_debug.c   | 18 +++---
>  src/gallium/auxiliary/util/u_debug.h   | 66 
> +++---
>  src/gallium/auxiliary/util/u_debug_flush.c |  4 +-
>  src/gallium/auxiliary/util/u_debug_flush.h |  6 +-
>  src/gallium/auxiliary/util/u_debug_image.c |  4 +-
>  src/gallium/auxiliary/util/u_debug_image.h |  8 +--
>  src/gallium/drivers/freedreno/ir3/ir3.c| 16 +++---
>  src/gallium/drivers/freedreno/ir3/ir3.h| 18 +++---
>  src/gallium/drivers/freedreno/ir3/ir3_print.c  |  4 +-
>  src/gallium/drivers/freedreno/ir3/ir3_ra.c |  4 +-
>  src/gallium/drivers/i915/i915_debug.c  |  6 +-
>  src/gallium/drivers/i915/i915_debug.h  |  6 +-
>  src/gallium/drivers/ilo/core/ilo_debug.h   | 17 +++---
>  src/gallium/drivers/llvmpipe/lp_debug.h|  6 +-
>  src/gallium/drivers/llvmpipe/lp_perf.h |  6 +-
>  src/gallium/drivers/llvmpipe/lp_rast.c |  4 +-
>  src/gallium/drivers/llvmpipe/lp_rast.h |  4 +-
>  src/gallium/drivers/llvmpipe/lp_rast_priv.h|  6 +-
>  src/gallium/drivers/llvmpipe/lp_scene.c|  4 +-
>  src/gallium/drivers/llvmpipe/lp_screen.c   |  8 +--
>  src/gallium/drivers/llvmpipe/lp_setup_line.c   |  4 +-
>  src/gallium/drivers/llvmpipe/lp_setup_point.c  |  4 +-
>  src/gallium/drivers/llvmpipe/lp_state_sampler.c|  4 +-
>  src/gallium/drivers/llvmpipe/lp_test_main.c|  4 +-
>  src/gallium/drivers/llvmpipe/lp_texture.c  | 24 
>  src/gallium/drivers/llvmpipe/lp_texture.h  |  4 +-
>  .../drivers/nouveau/codegen/nv50_ir_driver.h   |  6 +-
>  .../drivers/nouveau/codegen/nv50_ir_inlines.h  |  4 +-
>  src/gallium/drivers/nouveau/nouveau_screen.h   |  4 +-
>  src/gallium/drivers/nouveau/nouveau_statebuf.h | 10 ++--
>  src/gallium/drivers/nouveau/nv50/nv50_program.c|  6 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 14 ++---
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c| 12 ++--
>  src/gallium/drivers/r300/r300_cb.h |  6 +-
>  src/gallium/drivers/r300/r300_context.c|  6 +-
>  src/gallium/drivers/r300/r300_cs.h |  6 +-
>  src/gallium/drivers/softpipe/sp_tex_sample.c   |  4 +-
>  src/gallium/drivers/svga/svga_debug.h  |  8 +--
>  src/gallium/drivers/svga/svga_draw.c

Re: [Mesa-dev] [PATCH 07/10] egl/android: Make drm_gralloc headers optional

2016-07-21 Thread Rob Clark

On Thu, Jul 21, 2016 at 6:07 PM, Stéphane Marchesin
<stephane.marche...@gmail.com> wrote:
> On Tue, Jul 19, 2016 at 6:36 AM, Rob Clark <robdcl...@gmail.com> wrote:
>> On Tue, Jul 19, 2016 at 6:54 AM, Emil Velikov <emil.l.veli...@gmail.com> 
>> wrote:
>>> On 19 July 2016 at 04:21, Tomasz Figa <tf...@chromium.org> wrote:
>>>> On Tue, Jul 19, 2016 at 2:35 AM, Emil Velikov <emil.l.veli...@gmail.com> 
>>>> wrote:
>>>>> On 18 July 2016 at 16:38, Tomasz Figa <tf...@chromium.org> wrote:
>>>>>> On Mon, Jul 18, 2016 at 11:58 PM, Emil Velikov 
>>>>>> <emil.l.veli...@gmail.com> wrote:
>>>>>>> On 18 July 2016 at 13:02, Tomasz Figa <tf...@chromium.org> wrote:
>>>>>>>> On Mon, Jul 18, 2016 at 7:28 PM, Emil Velikov 
>>>>>>>> <emil.l.veli...@gmail.com> wrote:
>>>>>>>>> Hi Tomasz,
>>>>>>>>>
>>>>>>>>> On 15 July 2016 at 08:53, Tomasz Figa <tf...@chromium.org> wrote:
>>>>>>>>>
>>>>>>>>>> +#define DRM_RENDER_DEV_NAME  "%s/renderD%d"
>>>>>>>>>> +
>>>>>>>>>> +static int
>>>>>>>>>> +droid_open_device(_EGLDisplay *dpy)
>>>>>>>>>> +{
>>>>>>>>>> +   struct dri2_egl_display *dri2_dpy = dpy->DriverData;
>>>>>>>>>> +   const int limit = 64;
>>>>>>>>>> +   const int base = 128;
>>>>>>>>>> +   int fd;
>>>>>>>>>> +   int i;
>>>>>>>>>> +
>>>>>>>>>> +   for (i = 0; i < limit; ++i) {
>>>>>>>>>> +  char *card_path;
>>>>>>>>>> +  if (asprintf(_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, 
>>>>>>>>>> base + i) < 0)
>>>>>>>>> Why do we need any of this ? What gralloc implementation are you guys 
>>>>>>>>> using ?
>>>>>>>>
>>>>>>>> We are using our heavily rewritten fork of some old drm_gralloc
>>>>>>>> release. It supports only render nodes and PRIME FDs and doesn't
>>>>>>>> export the DRI device FD outside of its internals (which isn't
>>>>>>>> actually even fully correct, at least for PRIME and render nodes, see
>>>>>>>> my reply to Rob's comments).
>>>>>>>>
>>>>>>> That explain it, since https://chromium.googlesource.com/ does not
>>>>>>> have gralloc, and
>>>>>>> https://android.googlesource.com/platform/external/drm_gralloc/ has
>>>>>>> both the DRM_FD define and the gem/flink function(s)?
>>>>>>>
>>>>>>> Can I suggest porting the fd drm_gralloc/gbm_gralloc patches to your
>>>>>>> private copy/repo. This way we'll have some consistency throughout
>>>>>>> gralloc implementations
>>>>>>
>>>>>> I'd prefer if any code using flink names was not added back. On top of
>>>>>> that, our drm_gralloc doesn't really have much in common with that
>>>>>> from android-x86 anymore (as I said, it was heavily rewritten) and
>>>>>> there is not even a chance that with its current design flink names
>>>>>> could even work.
>>>>>>
>>>>>> Also I'm wondering why we want to consider current brokenness of
>>>>>> drm_gralloc as something to be consistent with. It's supposed to be a
>>>>>> HAL library providing an uniform abstraction, but it exports private
>>>>>> APIs on the side instead. Moreover, as I mentioned before, flink names
>>>>>> are considered insecure and it would be really much better if we could
>>>>>> just forget about them.
>>>>>>
>>>>>>> and you can use gbm_gralloc directly in the
>>>>>>> (hopefully) not too distant future.
>>>>>>
>>>>>> I agree with this part, though. gbm_gralloc is definitely something
>>>>>> that we might want to migrate to in the future. Although it's a bit
>>>>>> lacking at the moment, so it might need a bit more time to develop the
>>>>>> missing bits. [I'm CCing Gurcheta

[Mesa-dev] [PATCH] os: add pipe_mutex_assert_locked()

2016-07-21 Thread Rob Clark

Would be nice if we could also have lockdep, like in the linux kernel.
But this is better than nothing.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/auxiliary/os/os_thread.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/gallium/auxiliary/os/os_thread.h 
b/src/gallium/auxiliary/os/os_thread.h
index be8adcc..ec8adbc 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -116,6 +116,22 @@ typedef mtx_t pipe_mutex;
 #define pipe_mutex_unlock(mutex) \
(void) mtx_unlock(&(mutex))
 
+#define pipe_mutex_assert_locked(mutex) \
+   __pipe_mutex_assert_locked(&(mutex))
+
+static inline void
+__pipe_mutex_assert_locked(pipe_mutex *mutex)
+{
+#ifdef DEBUG
+   /* NOTE: this would not work for recursive mutexes, but
+* pipe_mutex doesn't support those
+*/
+   int ret = mtx_trylock(mutex);
+   assert(ret == thrd_busy);
+   if (ret == thrd_success)
+  mtx_unlock(mutex);
+#endif
+}
 
 /* pipe_condvar
  */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: add function to get prime buffer from memory+image

2016-07-20 Thread Rob Clark

On Wed, Jul 20, 2016 at 7:51 AM, Daniel Stone  wrote:
> On 20 July 2016 at 13:47, Daniel Stone  wrote:
>> On 19 July 2016 at 20:47, Jonathan  wrote:
>>> +typedef VkResult (VKAPI_PTR *PFN_vkGetDmaBufINTEL)(VkDevice device, 
>>> VkDeviceMemory mem, VkImage image, int *fd, uint32_t *pitch);
>>
>> Some things you should consider adding to this:
>>   - multi-plane support for multi-buffer formats (multiple fds,
>> multiple pitches, per-plane offset parameter)
>>   - an out parameter for format, using the DRM FourCC format codes
>>   - out parameters for a DRM modifier per-plane, to account for tiling
>> etc (and no longer calling anv_gem_set_tiling)
>
> Oops, hit send too early. Being able to export a VkFence to a kernel
> fence fd, as a companion, would also be incredibly helpful.
>

I wonder if android has already defined some extensions for this?
(Well I guess they must have already, no idea if public..)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/10] egl/android: Make drm_gralloc headers optional

2016-07-19 Thread Rob Clark

On Tue, Jul 19, 2016 at 11:40 AM, Emil Velikov <emil.l.veli...@gmail.com> wrote:
> On 19 July 2016 at 14:36, Rob Clark <robdcl...@gmail.com> wrote:
>> On Tue, Jul 19, 2016 at 6:54 AM, Emil Velikov <emil.l.veli...@gmail.com> 
>> wrote:
>>> On 19 July 2016 at 04:21, Tomasz Figa <tf...@chromium.org> wrote:
>>>> On Tue, Jul 19, 2016 at 2:35 AM, Emil Velikov <emil.l.veli...@gmail.com> 
>>>> wrote:
>>>>> On 18 July 2016 at 16:38, Tomasz Figa <tf...@chromium.org> wrote:
>>>>>> On Mon, Jul 18, 2016 at 11:58 PM, Emil Velikov 
>>>>>> <emil.l.veli...@gmail.com> wrote:
>>>>>>> On 18 July 2016 at 13:02, Tomasz Figa <tf...@chromium.org> wrote:
>>>>>>>> On Mon, Jul 18, 2016 at 7:28 PM, Emil Velikov 
>>>>>>>> <emil.l.veli...@gmail.com> wrote:
>>>>>>>>> Hi Tomasz,
>>>>>>>>>
>>>>>>>>> On 15 July 2016 at 08:53, Tomasz Figa <tf...@chromium.org> wrote:
>>>>>>>>>
>>>>>>>>>> +#define DRM_RENDER_DEV_NAME  "%s/renderD%d"
>>>>>>>>>> +
>>>>>>>>>> +static int
>>>>>>>>>> +droid_open_device(_EGLDisplay *dpy)
>>>>>>>>>> +{
>>>>>>>>>> +   struct dri2_egl_display *dri2_dpy = dpy->DriverData;
>>>>>>>>>> +   const int limit = 64;
>>>>>>>>>> +   const int base = 128;
>>>>>>>>>> +   int fd;
>>>>>>>>>> +   int i;
>>>>>>>>>> +
>>>>>>>>>> +   for (i = 0; i < limit; ++i) {
>>>>>>>>>> +  char *card_path;
>>>>>>>>>> +  if (asprintf(_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, 
>>>>>>>>>> base + i) < 0)
>>>>>>>>> Why do we need any of this ? What gralloc implementation are you guys 
>>>>>>>>> using ?
>>>>>>>>
>>>>>>>> We are using our heavily rewritten fork of some old drm_gralloc
>>>>>>>> release. It supports only render nodes and PRIME FDs and doesn't
>>>>>>>> export the DRI device FD outside of its internals (which isn't
>>>>>>>> actually even fully correct, at least for PRIME and render nodes, see
>>>>>>>> my reply to Rob's comments).
>>>>>>>>
>>>>>>> That explain it, since https://chromium.googlesource.com/ does not
>>>>>>> have gralloc, and
>>>>>>> https://android.googlesource.com/platform/external/drm_gralloc/ has
>>>>>>> both the DRM_FD define and the gem/flink function(s)?
>>>>>>>
>>>>>>> Can I suggest porting the fd drm_gralloc/gbm_gralloc patches to your
>>>>>>> private copy/repo. This way we'll have some consistency throughout
>>>>>>> gralloc implementations
>>>>>>
>>>>>> I'd prefer if any code using flink names was not added back. On top of
>>>>>> that, our drm_gralloc doesn't really have much in common with that
>>>>>> from android-x86 anymore (as I said, it was heavily rewritten) and
>>>>>> there is not even a chance that with its current design flink names
>>>>>> could even work.
>>>>>>
>>>>>> Also I'm wondering why we want to consider current brokenness of
>>>>>> drm_gralloc as something to be consistent with. It's supposed to be a
>>>>>> HAL library providing an uniform abstraction, but it exports private
>>>>>> APIs on the side instead. Moreover, as I mentioned before, flink names
>>>>>> are considered insecure and it would be really much better if we could
>>>>>> just forget about them.
>>>>>>
>>>>>>> and you can use gbm_gralloc directly in the
>>>>>>> (hopefully) not too distant future.
>>>>>>
>>>>>> I agree with this part, though. gbm_gralloc is definitely something
>>>>>> that we might want to migrate to in the future. Although it's a bit
>>>>>> lacking at the moment, so it might need a bit more time to develop the
>>>>>> missing bits. [I'm CCing Gurchetan, who was inves

Re: [Mesa-dev] [PATCH 3/9] st/mesa: completely rewrite state atoms

2016-07-19 Thread Rob Clark

()

On Mon, Jul 18, 2016 at 9:11 AM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> The goal is to do this in st_validate_state:
>while (dirty)
>   atoms[u_bit_scan()]->update(st);
>
> That implies that atoms can't specify which flags they consume.
> There is exactly one ST_NEW_* flag for each atom. (58 flags in total)
>
> There are macros that combine multiple flags into one for easier use.
>
> All _NEW_* flags are translated into ST_NEW_* flags in st_invalidate_state.
> st/mesa doesn't keep the _NEW_* flags after that.
>
> torcs is 2% faster between the previous patch and the end of this series.
> ---
>  src/mesa/state_tracker/st_atom.c   | 153 +-
>  src/mesa/state_tracker/st_atom.h   | 210 
> +
>  src/mesa/state_tracker/st_atom_array.c |   4 -
>  src/mesa/state_tracker/st_atom_atomicbuf.c |  24 ---
>  src/mesa/state_tracker/st_atom_blend.c |   4 -
>  src/mesa/state_tracker/st_atom_clip.c  |   4 -
>  src/mesa/state_tracker/st_atom_constbuf.c  |  48 --
>  src/mesa/state_tracker/st_atom_depth.c |   4 -
>  src/mesa/state_tracker/st_atom_framebuffer.c   |   4 -
>  src/mesa/state_tracker/st_atom_image.c |  24 ---
>  src/mesa/state_tracker/st_atom_list.h  |  75 +
>  src/mesa/state_tracker/st_atom_msaa.c  |   8 -
>  src/mesa/state_tracker/st_atom_pixeltransfer.c |   4 -
>  src/mesa/state_tracker/st_atom_rasterizer.c|  16 --
>  src/mesa/state_tracker/st_atom_sampler.c   |   4 -
>  src/mesa/state_tracker/st_atom_scissor.c   |   8 -
>  src/mesa/state_tracker/st_atom_shader.c|  24 ---
>  src/mesa/state_tracker/st_atom_stipple.c   |   5 -
>  src/mesa/state_tracker/st_atom_storagebuf.c|  24 ---
>  src/mesa/state_tracker/st_atom_tess.c  |   4 -
>  src/mesa/state_tracker/st_atom_texture.c   |  24 ---
>  src/mesa/state_tracker/st_atom_viewport.c  |   4 -
>  src/mesa/state_tracker/st_cb_bitmap.c  |  10 +-
>  src/mesa/state_tracker/st_cb_bufferobjects.c   |  10 +-
>  src/mesa/state_tracker/st_cb_compute.c |   2 +-
>  src/mesa/state_tracker/st_cb_feedback.c|   2 +-
>  src/mesa/state_tracker/st_cb_program.c |  38 ++---
>  src/mesa/state_tracker/st_cb_texture.c |   2 +-
>  src/mesa/state_tracker/st_context.c| 100 ++--
>  src/mesa/state_tracker/st_context.h|  42 +
>  src/mesa/state_tracker/st_draw.c   |   4 +-
>  src/mesa/state_tracker/st_manager.c|   4 +-
>  32 files changed, 377 insertions(+), 516 deletions(-)
>  create mode 100644 src/mesa/state_tracker/st_atom_list.h
>
> diff --git a/src/mesa/state_tracker/st_atom.c 
> b/src/mesa/state_tracker/st_atom.c
> index 9d5cc0f..5843d2a 100644
> --- a/src/mesa/state_tracker/st_atom.c
> +++ b/src/mesa/state_tracker/st_atom.c
> @@ -37,87 +37,18 @@
>  #include "st_manager.h"
>
>
> -/**
> - * This is used to initialize st->render_atoms[].
> - */
> -static const struct st_tracked_state *render_atoms[] =
> -{
> -   _update_depth_stencil_alpha,
> -   _update_clip,
> -
> -   _update_fp,
> -   _update_gp,
> -   _update_tep,
> -   _update_tcp,
> -   _update_vp,
> -
> -   _update_rasterizer,
> -   _update_polygon_stipple,
> -   _update_viewport,
> -   _update_scissor,
> -   _update_window_rectangles,
> -   _update_blend,
> -   _update_vertex_texture,
> -   _update_fragment_texture,
> -   _update_geometry_texture,
> -   _update_tessctrl_texture,
> -   _update_tesseval_texture,
> -   _update_sampler, /* depends on update_*_texture for swizzle */
> -   _bind_vs_images,
> -   _bind_tcs_images,
> -   _bind_tes_images,
> -   _bind_gs_images,
> -   _bind_fs_images,
> -   _update_framebuffer, /* depends on update_*_texture and bind_*_images 
> */
> -   _update_msaa,
> -   _update_sample_shading,
> -   _update_vs_constants,
> -   _update_tcs_constants,
> -   _update_tes_constants,
> -   _update_gs_constants,
> -   _update_fs_constants,
> -   _bind_vs_ubos,
> -   _bind_tcs_ubos,
> -   _bind_tes_ubos,
> -   _bind_fs_ubos,
> -   _bind_gs_ubos,
> -   _bind_vs_atomics,
> -   _bind_tcs_atomics,
> -   _bind_tes_atomics,
> -   _bind_fs_atomics,
> -   _bind_gs_atomics,
> -   _bind_vs_ssbos,
> -   _bind_tcs_ssbos,
> -   _bind_tes_ssbos,
> -   _bind_fs_ssbos,
> -   _bind_gs_ssbos,
> -   _update_pixel_transfer,
> -   _update_tess,
> -
> -   /* this must be done after the vertex program update */
> -   _update_array
> -};
> -
> -
> -/**
> - * This is used to initialize st->compute_atoms[].
> - */
> -static const struct st_tracked_state *compute_atoms[] =
> +/* The list state update functions. */
> +static const struct st_tracked_state *atoms[] =
>  {
> -   _update_cp,
> -   _update_compute_texture,
> -   _update_sampler, /* depends on update_compute_texture for swizzle */
> -   _update_cs_constants,
> -   _bind_cs_ubos,
> -   _bind_cs_atomics,
> -   _bind_cs_ssbos,
> -   _bind_cs_images,
>

Re: [Mesa-dev] [PATCH 07/10] egl/android: Make drm_gralloc headers optional

2016-07-19 Thread Rob Clark

On Tue, Jul 19, 2016 at 6:54 AM, Emil Velikov  wrote:
> On 19 July 2016 at 04:21, Tomasz Figa  wrote:
>> On Tue, Jul 19, 2016 at 2:35 AM, Emil Velikov  
>> wrote:
>>> On 18 July 2016 at 16:38, Tomasz Figa  wrote:
 On Mon, Jul 18, 2016 at 11:58 PM, Emil Velikov  
 wrote:
> On 18 July 2016 at 13:02, Tomasz Figa  wrote:
>> On Mon, Jul 18, 2016 at 7:28 PM, Emil Velikov  
>> wrote:
>>> Hi Tomasz,
>>>
>>> On 15 July 2016 at 08:53, Tomasz Figa  wrote:
>>>
 +#define DRM_RENDER_DEV_NAME  "%s/renderD%d"
 +
 +static int
 +droid_open_device(_EGLDisplay *dpy)
 +{
 +   struct dri2_egl_display *dri2_dpy = dpy->DriverData;
 +   const int limit = 64;
 +   const int base = 128;
 +   int fd;
 +   int i;
 +
 +   for (i = 0; i < limit; ++i) {
 +  char *card_path;
 +  if (asprintf(_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, 
 base + i) < 0)
>>> Why do we need any of this ? What gralloc implementation are you guys 
>>> using ?
>>
>> We are using our heavily rewritten fork of some old drm_gralloc
>> release. It supports only render nodes and PRIME FDs and doesn't
>> export the DRI device FD outside of its internals (which isn't
>> actually even fully correct, at least for PRIME and render nodes, see
>> my reply to Rob's comments).
>>
> That explain it, since https://chromium.googlesource.com/ does not
> have gralloc, and
> https://android.googlesource.com/platform/external/drm_gralloc/ has
> both the DRM_FD define and the gem/flink function(s)?
>
> Can I suggest porting the fd drm_gralloc/gbm_gralloc patches to your
> private copy/repo. This way we'll have some consistency throughout
> gralloc implementations

 I'd prefer if any code using flink names was not added back. On top of
 that, our drm_gralloc doesn't really have much in common with that
 from android-x86 anymore (as I said, it was heavily rewritten) and
 there is not even a chance that with its current design flink names
 could even work.

 Also I'm wondering why we want to consider current brokenness of
 drm_gralloc as something to be consistent with. It's supposed to be a
 HAL library providing an uniform abstraction, but it exports private
 APIs on the side instead. Moreover, as I mentioned before, flink names
 are considered insecure and it would be really much better if we could
 just forget about them.

> and you can use gbm_gralloc directly in the
> (hopefully) not too distant future.

 I agree with this part, though. gbm_gralloc is definitely something
 that we might want to migrate to in the future. Although it's a bit
 lacking at the moment, so it might need a bit more time to develop the
 missing bits. [I'm CCing Gurchetan, who was investigating GBM-backed
 gralloc usable for our purposes.]

 In any case, the missing flink API is quite easy to handle and can be
 just stubbed out in a local header as you suggested. I don't think it
 would hurt anyone and would definitely help us and anyone not willing
 to export any private APIs from their gralloc and rely only on the
 public HAL API.

>>> Looks like I wasn't clear enough here, realyl sorry about that. No
>>> objection on nuking _any_ of the gem/flink paths, but hoping to have
>>> the behaviour consistent with the one described in
>>> get_native_buffer_fd.
>>
>> Did you mean having the PRIME FD in native_handle_t::data[0]?
>>
>> If so, it's more or less guaranteed by the API, because all file
>> descriptors in handle have to be stored in first N (equals to
>> native_handle_t::numFds) ints of native_handle_t::data[] for
>> respective general code to properly transfer the FDs through binder
>> when sharing between processes.
>>
>> Our gralloc currently supports only one PRIME FD per buffer (no
>> separate memory planes for planar YUV) and stores it exactly in
>> native_handle_t::data[0].
>>
> Wasn't sure if the PRIME FD is at idx 0. Glad to hear it's there, thanks.
>
>>>
>
>>>
>>> Afaict the latter must provide reasonable result for
>>> hw_get_module(GRALLOC_HARDWARE_MODULE_ID...) and as it's missing the
>>> perform hook existing code should work just fine. Right ?
>>
>> Existing code would fail with -1 as file descriptor, wouldn't it? Or
>> I'm failing to see something?
>>
> Nope you're spot on - I had a dull moment. May I suggest revering the
> patch which removed the GRALLOC_MODULE_PERFORM_GET_DRM_FD handling in
> your gralloc ? Reason being is that the proposed code is very 'flaky'
> and can open the wrong render node on systems which

Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Rob Clark

possibly.. although sprinkling queue_barrier() calls (which is at
least useful for debugging, although I think I won't use it in the end
after debugging) hasn't found the issue yet.  I did at least find an
issue w/ fence handling (I was grabbing the fence # potentially before
the batch was flushed), but that also doesn't seem to be the issue I
am seeing.

The idea of having a ring of N fences (where N is given by
max_jobs-1), rather than embedding the fence in the refcnt'd batch, is
interesting, and sounds like it might solve some problems.  I may end
up doing that..

BR,
-R

On Mon, Jul 18, 2016 at 7:34 PM, Marek Olšák <mar...@gmail.com> wrote:
> I think your issue is that you have self-releasing jobs with the
> cleanup callback and you automatically lose fences that way, so there
> is no way to wait for completion.
>
> Since you have only 1 thread with N jobs at most, I suggest you keep
> N+1 fences around (a ring of fences) that you reuse for new jobs and
> keep a pointer to the most-recently-used fence. That way you know
> which fence you need to wait on to make the whole queue idle.
>
> Marek
>
> On Mon, Jul 18, 2016 at 10:25 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> Helper to block until all previous jobs are complete.
>> ---
>> So I think this might end up being useful to me in some cases.. but
>> the implementation only works for a single threaded queue (which is
>> all I need).  I could also just put a helper in my driver code.
>>
>> Opinions?
>>
>>  src/gallium/auxiliary/util/u_queue.c | 12 
>>  src/gallium/auxiliary/util/u_queue.h |  2 ++
>>  2 files changed, 14 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/util/u_queue.c 
>> b/src/gallium/auxiliary/util/u_queue.c
>> index 838464f..861faca 100644
>> --- a/src/gallium/auxiliary/util/u_queue.c
>> +++ b/src/gallium/auxiliary/util/u_queue.c
>> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
>> pipe_condvar_signal(queue->has_queued_cond);
>> pipe_mutex_unlock(queue->lock);
>>  }
>> +
>> +static void dummy_execute(void *job, int thread_index) {}
>> +
>> +/* blocks until all previously queued jobs complete: */
>> +void util_queue_barrier(struct util_queue *queue)
>> +{
>> +   struct util_queue_fence fence;
>> +   util_queue_fence_init();
>> +   util_queue_add_job(queue,  /*dummy*/, , dummy_execute, NULL);
>> +   util_queue_job_wait();
>> +   util_queue_fence_destroy();
>> +}
>> diff --git a/src/gallium/auxiliary/util/u_queue.h 
>> b/src/gallium/auxiliary/util/u_queue.h
>> index 59646cc..8a22ee0 100644
>> --- a/src/gallium/auxiliary/util/u_queue.h
>> +++ b/src/gallium/auxiliary/util/u_queue.h
>> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
>>
>>  void util_queue_job_wait(struct util_queue_fence *fence);
>>
>> +void util_queue_barrier(struct util_queue *queue);
>> +
>>  /* util_queue needs to be cleared to zeroes for this to work */
>>  static inline bool
>>  util_queue_is_initialized(struct util_queue *queue)
>> --
>> 2.7.4
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Rob Clark

On Mon, Jul 18, 2016 at 4:34 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:
> On 18.07.2016 22:25, Rob Clark wrote:
>>
>> Helper to block until all previous jobs are complete.
>> ---
>> So I think this might end up being useful to me in some cases.. but
>> the implementation only works for a single threaded queue (which is
>> all I need).  I could also just put a helper in my driver code.
>>
>> Opinions?
>
>
> What do you need it for? ISTR Marek had a half-finished patch for
> dependencies, maybe that does what you need and is more expressive?

no, I don't think dependencies would really help me..

This issue I'm chancing down is a race condition which, I think,
amounts to we initially flush a batch when we don't think we need to
wait for it to complete, and later get a flush_resource() or a flush()
and realize we did actually need it to complete, but no longer have a
ref to the batch (or it's contained fence)..

BR,
-R

> Cheers,
> Nicolai
>
>
>>
>>   src/gallium/auxiliary/util/u_queue.c | 12 
>>   src/gallium/auxiliary/util/u_queue.h |  2 ++
>>   2 files changed, 14 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/util/u_queue.c
>> b/src/gallium/auxiliary/util/u_queue.c
>> index 838464f..861faca 100644
>> --- a/src/gallium/auxiliary/util/u_queue.c
>> +++ b/src/gallium/auxiliary/util/u_queue.c
>> @@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
>>  pipe_condvar_signal(queue->has_queued_cond);
>>  pipe_mutex_unlock(queue->lock);
>>   }
>> +
>> +static void dummy_execute(void *job, int thread_index) {}
>> +
>> +/* blocks until all previously queued jobs complete: */
>> +void util_queue_barrier(struct util_queue *queue)
>> +{
>> +   struct util_queue_fence fence;
>> +   util_queue_fence_init();
>> +   util_queue_add_job(queue,  /*dummy*/, , dummy_execute,
>> NULL);
>> +   util_queue_job_wait();
>> +   util_queue_fence_destroy();
>> +}
>> diff --git a/src/gallium/auxiliary/util/u_queue.h
>> b/src/gallium/auxiliary/util/u_queue.h
>> index 59646cc..8a22ee0 100644
>> --- a/src/gallium/auxiliary/util/u_queue.h
>> +++ b/src/gallium/auxiliary/util/u_queue.h
>> @@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
>>
>>   void util_queue_job_wait(struct util_queue_fence *fence);
>>
>> +void util_queue_barrier(struct util_queue *queue);
>> +
>>   /* util_queue needs to be cleared to zeroes for this to work */
>>   static inline bool
>>   util_queue_is_initialized(struct util_queue *queue)
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC] gallium/u_queue: add barrier function

2016-07-18 Thread Rob Clark

Helper to block until all previous jobs are complete.
---
So I think this might end up being useful to me in some cases.. but
the implementation only works for a single threaded queue (which is
all I need).  I could also just put a helper in my driver code.

Opinions?

 src/gallium/auxiliary/util/u_queue.c | 12 
 src/gallium/auxiliary/util/u_queue.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_queue.c 
b/src/gallium/auxiliary/util/u_queue.c
index 838464f..861faca 100644
--- a/src/gallium/auxiliary/util/u_queue.c
+++ b/src/gallium/auxiliary/util/u_queue.c
@@ -242,3 +242,15 @@ util_queue_add_job(struct util_queue *queue,
pipe_condvar_signal(queue->has_queued_cond);
pipe_mutex_unlock(queue->lock);
 }
+
+static void dummy_execute(void *job, int thread_index) {}
+
+/* blocks until all previously queued jobs complete: */
+void util_queue_barrier(struct util_queue *queue)
+{
+   struct util_queue_fence fence;
+   util_queue_fence_init();
+   util_queue_add_job(queue,  /*dummy*/, , dummy_execute, NULL);
+   util_queue_job_wait();
+   util_queue_fence_destroy();
+}
diff --git a/src/gallium/auxiliary/util/u_queue.h 
b/src/gallium/auxiliary/util/u_queue.h
index 59646cc..8a22ee0 100644
--- a/src/gallium/auxiliary/util/u_queue.h
+++ b/src/gallium/auxiliary/util/u_queue.h
@@ -85,6 +85,8 @@ void util_queue_add_job(struct util_queue *queue,
 
 void util_queue_job_wait(struct util_queue_fence *fence);
 
+void util_queue_barrier(struct util_queue *queue);
+
 /* util_queue needs to be cleared to zeroes for this to work */
 static inline bool
 util_queue_is_initialized(struct util_queue *queue)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC mesa] Introduce .editorconfig

2016-07-18 Thread Rob Clark

On Sun, Jul 17, 2016 at 7:46 PM, Eric Engestrom  wrote:
> diff --git a/src/gallium/drivers/freedreno/.editorconfig 
> b/src/gallium/drivers/freedreno/.editorconfig
> new file mode 100644
> index 000..cc8e11f
> --- /dev/null
> +++ b/src/gallium/drivers/freedreno/.editorconfig
> @@ -0,0 +1,2 @@
> +[*.{c,h}]
> +indent_style = tab

fyi, I guess also add:

   tab_width = 4

and not sure if there is a way to tell it to use k rules?

But looks like a good idea to me, thanks :-)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_FLUSH_DEFERRED

2016-07-18 Thread Rob Clark

On Mon, Jul 18, 2016 at 9:24 AM, Marek Olšák <mar...@gmail.com> wrote:
> On Mon, Jul 18, 2016 at 2:25 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> On Mon, Jul 18, 2016 at 8:16 AM, Marek Olšák <mar...@gmail.com> wrote:
>>> From: Marek Olšák <marek.ol...@amd.com>
>>>
>>> There are 2 uses:
>>> - Asynchronous flushing for multithreaded drivers.
>>> - Return a fence without flushing (mid-command-buffer fence). The driver
>>>   can defer flushing until fence_finish is called.
>>
>> This should also be useful to me when I get a chance to rebase the
>> gallium bits of the egl fence-fd patchset.  I guess the one question
>> is what the behaviour is in screen->fence_finish().  I think I have a
>> solution for that in freedreno (if I end up going the
>> flush-from-u_queue route, since I'd end up with enough locking to
>> flush without a ctx), although maybe that isn't the most general
>> solution for other drivers.  I wonder if we should add an optional
>> pipe_context ptr to fence_finish() for the cases when there is a
>> context bound?
>>
>> Either way, I guess we need a bit more documentation about that.  With
>> that resolved, r-b
>
> The behavior of fence_finish isn't changed. The only side effect can
> be that fence_finish will wait a little longer. No guidance is given
> as to how drivers should implement fence_finish with deferred flushes.
> If some drivers can't do deferred flushes safely, they should just
> ignore the flag.

ok, mind adding something to that effect to the gallium docs?

I believe, at least for egl fences (less sure about glx), it would be
possible to not have the restriction that driver must be able to flush
a fence without a context.  And for the context-bound case, passing an
optional pipe_context to fence_finish() (ie. NULL if no ctx bound)
would be sufficient:

"
If the sync object being blocked upon will not be signaled in finite
time (for example, by an associated fence command issued previously,
but not yet flushed to the graphics pipeline), then
eglClientWaitSyncKHR may wait forever. To help prevent this behavior
(footnote1), if the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is set in
, and  is unsignaled when eglClientWaitSyncKHR is
called, then the equivalent of Flush() will be performed for the
current API context (i.e., the context returned by
eglGetCurrentContext()) before blocking on . If no context is
current for the bound API, the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit
is ignored.
"

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_FLUSH_DEFERRED

2016-07-18 Thread Rob Clark

On Mon, Jul 18, 2016 at 8:16 AM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> There are 2 uses:
> - Asynchronous flushing for multithreaded drivers.
> - Return a fence without flushing (mid-command-buffer fence). The driver
>   can defer flushing until fence_finish is called.

This should also be useful to me when I get a chance to rebase the
gallium bits of the egl fence-fd patchset.  I guess the one question
is what the behaviour is in screen->fence_finish().  I think I have a
solution for that in freedreno (if I end up going the
flush-from-u_queue route, since I'd end up with enough locking to
flush without a ctx), although maybe that isn't the most general
solution for other drivers.  I wonder if we should add an optional
pipe_context ptr to fence_finish() for the cases when there is a
context bound?

Either way, I guess we need a bit more documentation about that.  With
that resolved, r-b

> This is required to make Bioshock Infinite faster, which creates
> 1000 fences (flushes) per frame.

*ouch*

BR,
-R

> ---
>  src/gallium/docs/source/context.rst| 5 +
>  src/gallium/include/pipe/p_defines.h   | 3 ++-
>  src/mesa/state_tracker/st_cb_syncobj.c | 2 +-
>  3 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/docs/source/context.rst 
> b/src/gallium/docs/source/context.rst
> index 05c6f11..2f803ac 100644
> --- a/src/gallium/docs/source/context.rst
> +++ b/src/gallium/docs/source/context.rst
> @@ -463,6 +463,11 @@ Flushing
>
>  ``flush``
>
> +PIPE_FLUSH_END_OF_FRAME: Whether the flush marks the end of frame.
> +PIPE_FLUSH_DEFERRED: It is not required to flush right away, but it is 
> required
> +to return a valid fence.
> +
> +
>
>  ``flush_resource``
>
> diff --git a/src/gallium/include/pipe/p_defines.h 
> b/src/gallium/include/pipe/p_defines.h
> index 62fa673..a2f5193 100644
> --- a/src/gallium/include/pipe/p_defines.h
> +++ b/src/gallium/include/pipe/p_defines.h
> @@ -348,7 +348,8 @@ enum pipe_transfer_usage
>   */
>  enum pipe_flush_flags
>  {
> -   PIPE_FLUSH_END_OF_FRAME = (1 << 0)
> +   PIPE_FLUSH_END_OF_FRAME = (1 << 0),
> +   PIPE_FLUSH_DEFERRED = (1 << 1),
>  };
>
>  /**
> diff --git a/src/mesa/state_tracker/st_cb_syncobj.c 
> b/src/mesa/state_tracker/st_cb_syncobj.c
> index ec2687f..69f2a28 100644
> --- a/src/mesa/state_tracker/st_cb_syncobj.c
> +++ b/src/mesa/state_tracker/st_cb_syncobj.c
> @@ -73,7 +73,7 @@ static void st_fence_sync(struct gl_context *ctx, struct 
> gl_sync_object *obj,
> assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE && flags == 0);
> assert(so->fence == NULL);
>
> -   pipe->flush(pipe, >fence, 0);
> +   pipe->flush(pipe, >fence, PIPE_FLUSH_DEFERRED);
>  }
>
>  static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/12] render reordering for optimized tile buffer usage

2016-07-16 Thread Rob Clark

On Fri, Jul 8, 2016 at 12:14 PM, Rob Clark <robdcl...@gmail.com> wrote:
> One of the annoying things, since pipe_resource is per-screen, not
> per-context, I end up having to push batch_cache down into screen.
> Which means that, for example, one context switching fb state could
> force flushing a batch from another context.  Eventually if I push of
> gmem+submit to a per-context helper thread, that should help keep
> things properly serialized.  Although I still need some (currently
> missing) mutexes to serialize batch_cache access, etc.  Also it means
> that the upper limit on # of batches is per-screen, not per-context.
> Not really sure what to do about that.  I really wish resources were
> not shared across contexts (but rather just use flink or dmabuf when
> you need to share), but I guess it is too late for that now :-(

so what sorts of scenarios do I have to care about multiple contexts
sharing a pipe_resource?  Pushing batch_cache down into screen is
awfully painful.  I more or less have it working, but not terribly
happy with all the extra locking and complexity.

If I restrict things so that a pipe_resource could only be active (ie.
pending read or write) by a single context at a time (without blocking
on a fence when switching between contexts) I can move batch_cache
back up to the context, and make a whole lot of pain go away..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/st: reduce size of state->st bitmask

2016-07-14 Thread Rob Clark

On Thu, Jul 14, 2016 at 4:41 PM, Gustaw Smolarczyk <wielkie...@gmail.com> wrote:
> 2016-07-14 22:14 GMT+02:00 Rob Clark <robdcl...@gmail.com>:
>> In d035d50 this changed to 64b.. which I'm pretty sure was
>> unintentional.  Revert it back to 32b so the entire state struct
>> is a nice round 64b (cache-line size).
> Actually, cache line size, at least on most x86 and ARM processors, is
> 64B (bytes) not 64b (bits).

oh, yeah, right.. a bit of a think-o

(all the same, I think this need only be 32b)

BR,
-R


> Regards,
> Gustaw
>>
>> (Note sure that it would actually be measurable, but I did notice
>> that check_state() was hot in some benchmarks.)
>>
>> Signed-off-by: Rob Clark <robdcl...@gmail.com>
>> ---
>>  src/mesa/state_tracker/st_context.h | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/state_tracker/st_context.h 
>> b/src/mesa/state_tracker/st_context.h
>> index cc25e06..18394eb 100644
>> --- a/src/mesa/state_tracker/st_context.h
>> +++ b/src/mesa/state_tracker/st_context.h
>> @@ -70,7 +70,7 @@ struct u_upload_mgr;
>>
>>  struct st_state_flags {
>> GLbitfield mesa;  /**< Mask of _NEW_x flags */
>> -   uint64_t st;  /**< Mask of ST_NEW_x flags */
>> +   uint32_t st;  /**< Mask of ST_NEW_x flags */
>>  };
>>
>>  struct st_tracked_state {
>> --
>> 2.7.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa/st: reduce size of state->st bitmask

2016-07-14 Thread Rob Clark

In d035d50 this changed to 64b.. which I'm pretty sure was
unintentional.  Revert it back to 32b so the entire state struct
is a nice round 64b (cache-line size).

(Note sure that it would actually be measurable, but I did notice
that check_state() was hot in some benchmarks.)

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/mesa/state_tracker/st_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index cc25e06..18394eb 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -70,7 +70,7 @@ struct u_upload_mgr;
 
 struct st_state_flags {
GLbitfield mesa;  /**< Mask of _NEW_x flags */
-   uint64_t st;  /**< Mask of ST_NEW_x flags */
+   uint32_t st;  /**< Mask of ST_NEW_x flags */
 };
 
 struct st_tracked_state {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallium/u_queue: add optional cleanup callback

2016-07-14 Thread Rob Clark

Adds a second optional cleanup callback, called after the fence is
signaled.  This is needed if, for example, the queue has the last
reference to the object that embeds the util_queue_fence.  In this
case we cannot drop the ref in the main callback, since that would
result in the fence being destroyed before it is signaled.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
v2: drop the util_queue_add_job2() and just fixup existing callers

 src/gallium/auxiliary/util/u_queue.c| 6 +-
 src/gallium/auxiliary/util/u_queue.h| 6 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++-
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c   | 2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c   | 2 +-
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_queue.c 
b/src/gallium/auxiliary/util/u_queue.c
index ac3afa1..838464f 100644
--- a/src/gallium/auxiliary/util/u_queue.c
+++ b/src/gallium/auxiliary/util/u_queue.c
@@ -91,6 +91,8 @@ static PIPE_THREAD_ROUTINE(util_queue_thread_func, input)
   if (job.job) {
  job.execute(job.job, thread_index);
  util_queue_fence_signal(job.fence);
+ if (job.cleanup)
+job.cleanup(job.job, thread_index);
   }
}
 
@@ -213,7 +215,8 @@ void
 util_queue_add_job(struct util_queue *queue,
void *job,
struct util_queue_fence *fence,
-   util_queue_execute_func execute)
+   util_queue_execute_func execute,
+   util_queue_execute_func cleanup)
 {
struct util_queue_job *ptr;
 
@@ -232,6 +235,7 @@ util_queue_add_job(struct util_queue *queue,
ptr->job = job;
ptr->fence = fence;
ptr->execute = execute;
+   ptr->cleanup = cleanup;
queue->write_idx = (queue->write_idx + 1) % queue->max_jobs;
 
queue->num_queued++;
diff --git a/src/gallium/auxiliary/util/u_queue.h 
b/src/gallium/auxiliary/util/u_queue.h
index f70d646..59646cc 100644
--- a/src/gallium/auxiliary/util/u_queue.h
+++ b/src/gallium/auxiliary/util/u_queue.h
@@ -50,6 +50,7 @@ struct util_queue_job {
void *job;
struct util_queue_fence *fence;
util_queue_execute_func execute;
+   util_queue_execute_func cleanup;
 };
 
 /* Put this into your context. */
@@ -75,10 +76,13 @@ void util_queue_destroy(struct util_queue *queue);
 void util_queue_fence_init(struct util_queue_fence *fence);
 void util_queue_fence_destroy(struct util_queue_fence *fence);
 
+/* optional cleanup callback is called after fence is signaled: */
 void util_queue_add_job(struct util_queue *queue,
 void *job,
 struct util_queue_fence *fence,
-util_queue_execute_func execute);
+util_queue_execute_func execute,
+util_queue_execute_func cleanup);
+
 void util_queue_job_wait(struct util_queue_fence *fence);
 
 /* util_queue needs to be cleared to zeroes for this to work */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c24130d..a423296 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1330,7 +1330,8 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
si_init_shader_selector_async(sel, -1);
else
util_queue_add_job(>shader_compiler_queue, sel,
-   >ready, si_init_shader_selector_async);
+   >ready, si_init_shader_selector_async,
+   NULL);
 
return sel;
 }
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 1302f29..75bbfeb 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1054,7 +1054,7 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
   if ((flags & RADEON_FLUSH_ASYNC) &&
   util_queue_is_initialized(>cs_queue)) {
  util_queue_add_job(>cs_queue, cs, >flush_completed,
-amdgpu_cs_submit_ib);
+amdgpu_cs_submit_ib, NULL);
   } else {
  amdgpu_cs_submit_ib(cs, 0);
   }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index ed34a2c..8dc8b04 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -587,7 +587,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs 
*rcs,
 
 if (util_queue_is_initialized(>ws->cs_queue)) {
 util_queue_add_job(>ws->cs_queue, cs, >flush_completed,
-   radeon_drm_cs_emit_ioctl_oneshot);
+   radeon_drm_

Re: [Mesa-dev] mesa from git fails to compile

2016-07-14 Thread Rob Clark

On Thu, Jul 14, 2016 at 10:41 AM, Eric Engestrom
 wrote:
> On Thu, Jul 14, 2016 at 12:24:32PM +0200, Pali Rohár wrote:
>> Any news? Or possible fix?
>
> Have you tried Emil's suggestion, ie. upgrading to at least 0.8.0?
>
> Build system wizards:
> Any way to check the version and abort the compilation before running
> into this issue?  If it helps, this prints the version:
> python <<< 'import mako; print(mako.__version__)'

maybe just check in configure.ac?

BR,
-R

>>
>> On Sunday 10 July 2016 18:26:28 Jason Ekstrand wrote:
>> > This smells like strange Python problems.  Ccing the resident Python 
>> > expert.
>> > Hello, compiling mesa from git is failing on this error:
>> >
>> > Making all in isl
>> > make[5]: Entering directory `/«PKGBUILDDIR»/build/dri/src/intel/isl'
>> > python2.7  ../../../../../src/intel/isl/gen_format_layout.py \
>> > --csv ../../../../../src/intel/isl/isl_format_layout.csv --out
>> > isl_format_layout.c
>> > Traceback (most recent call last):
>> >   File "../../../../../src/intel/isl/gen_format_layout.py", line 92, in
>> > 
>> > output_encoding='utf-8')
>> > TypeError: __init__() got an unexpected keyword argument 'future_imports'
>> > make[5]: *** [isl_format_layout.c] Error 1
>> > make[5]: Leaving directory `/«PKGBUILDDIR»/build/dri/src/intel/isl'
>> > make[4]: *** [all-recursive] Error 1
>> > make[4]: Leaving directory `/«PKGBUILDDIR»/build/dri/src/intel'
>> > make[3]: *** [all-recursive] Error 1
>> > make[3]: Leaving directory `/«PKGBUILDDIR»/build/dri/src'
>> > make[2]: *** [all] Error 2
>> > make[2]: Leaving directory `/«PKGBUILDDIR»/build/dri/src'
>> > make[1]: *** [all-recursive] Error 1
>> > make[1]: Leaving directory `/«PKGBUILDDIR»/build/dri'
>> > make: *** [debian/stamp/x86_64-linux-gnu-build-dri] Error 2
>> >
>> > Any idea where is problem and how to fix it?
>> >
>> > Full build log is available at:
>> >
>> > https://launchpad.net/~pali/+archive/ubuntu/graphics-drivers/+build/10446196/+files/buildlog_ubuntu-precise-amd64.mesa-lts-trusty_11.3.0-git201607100358.5c17fb2~ubuntu12.04.1_BUILDING.txt.gz
>> >
>> > --
>> > Pali Rohár
>> > pali.ro...@gmail.com
>> >
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>> --
>> Pali Rohár
>> pali.ro...@gmail.com
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] freedreno/a2xx: Fix sign compare warnings

2016-07-14 Thread Rob Clark

I've pushed 'em, thx

On Wed, Jul 13, 2016 at 1:03 PM, Francesco Ansanelli
 wrote:
> Yes, please.
>
> Cheers,
> Francesco
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] freedreno/a2xx: Fix sign compare warnings

2016-07-13 Thread Rob Clark

On Thu, Jun 30, 2016 at 1:16 PM, Francesco Ansanelli
 wrote:
>

this looks like the right thing to do.. r-b for the series.  Do you
need me to push?

BR,
-R

> ---
>  src/gallium/drivers/freedreno/a2xx/fd2_screen.c |8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 
> b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
> index c2baa6f..fe4849b 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
> @@ -61,7 +61,7 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
>
> if ((usage & (PIPE_BIND_SAMPLER_VIEW |
> PIPE_BIND_VERTEX_BUFFER)) &&
> -   (fd2_pipe2surface(format) != ~0)) {
> +   (fd2_pipe2surface(format) != ~0u)) {
> retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
> PIPE_BIND_VERTEX_BUFFER);
> }
> @@ -70,7 +70,7 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
> PIPE_BIND_DISPLAY_TARGET |
> PIPE_BIND_SCANOUT |
> PIPE_BIND_SHARED)) &&
> -   (fd2_pipe2color(format) != ~0)) {
> +   (fd2_pipe2color(format) != ~0u)) {
> retval |= usage & (PIPE_BIND_RENDER_TARGET |
> PIPE_BIND_DISPLAY_TARGET |
> PIPE_BIND_SCANOUT |
> @@ -78,12 +78,12 @@ fd2_screen_is_format_supported(struct pipe_screen 
> *pscreen,
> }
>
> if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
> -   (fd_pipe2depth(format) != ~0)) {
> +   (fd_pipe2depth(format) != ~0u)) {
> retval |= PIPE_BIND_DEPTH_STENCIL;
> }
>
> if ((usage & PIPE_BIND_INDEX_BUFFER) &&
> -   (fd_pipe2index(format) != ~0)) {
> +   (fd_pipe2index(format) != ~0u)) {
> retval |= PIPE_BIND_INDEX_BUFFER;
> }
>
> --
> 1.7.9.5
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallium/u_queue: add optional cleanup callback

2016-07-13 Thread Rob Clark

Adds a second optional cleanup callback, called after the fence is
signaled.  This is needed if, for example, the queue has the last
reference to the object that embeds the util_queue_fence.  In this
case we cannot drop the ref in the main callback, since that would
result in the fence being destroyed before it is signaled.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
Maybe adding util_queue_add_job2() is a bit overkill.. although I
think Marek has some in-flight stuff using u_queue, so maybe this
approach is less conflicty?

 src/gallium/auxiliary/util/u_queue.c | 12 
 src/gallium/auxiliary/util/u_queue.h | 21 +
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_queue.c 
b/src/gallium/auxiliary/util/u_queue.c
index ac3afa1..74f6a67 100644
--- a/src/gallium/auxiliary/util/u_queue.c
+++ b/src/gallium/auxiliary/util/u_queue.c
@@ -91,6 +91,8 @@ static PIPE_THREAD_ROUTINE(util_queue_thread_func, input)
   if (job.job) {
  job.execute(job.job, thread_index);
  util_queue_fence_signal(job.fence);
+ if (job.cleanup)
+job.cleanup(job.job, thread_index);
   }
}
 
@@ -210,10 +212,11 @@ util_queue_fence_destroy(struct util_queue_fence *fence)
 }
 
 void
-util_queue_add_job(struct util_queue *queue,
-   void *job,
-   struct util_queue_fence *fence,
-   util_queue_execute_func execute)
+util_queue_add_job2(struct util_queue *queue,
+void *job,
+struct util_queue_fence *fence,
+util_queue_execute_func execute,
+util_queue_execute_func cleanup)
 {
struct util_queue_job *ptr;
 
@@ -232,6 +235,7 @@ util_queue_add_job(struct util_queue *queue,
ptr->job = job;
ptr->fence = fence;
ptr->execute = execute;
+   ptr->cleanup = cleanup;
queue->write_idx = (queue->write_idx + 1) % queue->max_jobs;
 
queue->num_queued++;
diff --git a/src/gallium/auxiliary/util/u_queue.h 
b/src/gallium/auxiliary/util/u_queue.h
index f70d646..ec9d21b 100644
--- a/src/gallium/auxiliary/util/u_queue.h
+++ b/src/gallium/auxiliary/util/u_queue.h
@@ -50,6 +50,7 @@ struct util_queue_job {
void *job;
struct util_queue_fence *fence;
util_queue_execute_func execute;
+   util_queue_execute_func cleanup;
 };
 
 /* Put this into your context. */
@@ -75,10 +76,22 @@ void util_queue_destroy(struct util_queue *queue);
 void util_queue_fence_init(struct util_queue_fence *fence);
 void util_queue_fence_destroy(struct util_queue_fence *fence);
 
-void util_queue_add_job(struct util_queue *queue,
-void *job,
-struct util_queue_fence *fence,
-util_queue_execute_func execute);
+/* optional cleanup callback is called after fence is signaled: */
+void util_queue_add_job2(struct util_queue *queue,
+ void *job,
+ struct util_queue_fence *fence,
+ util_queue_execute_func execute,
+ util_queue_execute_func cleanup);
+
+static inline void
+util_queue_add_job(struct util_queue *queue,
+   void *job,
+   struct util_queue_fence *fence,
+   util_queue_execute_func execute)
+{
+   util_queue_add_job2(queue, job, fence, execute, NULL);
+}
+
 void util_queue_job_wait(struct util_queue_fence *fence);
 
 /* util_queue needs to be cleared to zeroes for this to work */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/13] todo! dri: Questions about fence fd ownership

2016-07-10 Thread Rob Clark

On Fri, Jul 8, 2016 at 8:00 PM, Chad Versace  wrote:
> See the comments.
> ---
>  include/GL/internal/dri_interface.h | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/include/GL/internal/dri_interface.h 
> b/include/GL/internal/dri_interface.h
> index 99c83ec..051ddb4 100644
> --- a/include/GL/internal/dri_interface.h
> +++ b/include/GL/internal/dri_interface.h
> @@ -418,6 +418,12 @@ struct __DRI2fenceExtensionRec {
>  *
>  * \param ctx the context associated with the fence
>  * \param fd  the fence fd or -1
> +*
> +* TODO(chadv): Who owns the fence fd when the caller imports it (when
> +* fd != 1)? Does EGL or the driver own it?
> +*
> +* TODO(chadv): Who owns the fence fd when the driver creates it (when
> +* fd == -1)? Does EGL or the driver own it?

btw, thanks for picking up this patchset.. it was on my TODO list for
a while, but never quite bubbled up to the top..

When the caller creates an egl fence from an fd, the driver takes
ownership (and the caller must dup() if it wants to hold on to the fd
itself).  When caller calls eglDupNativeFenceFD() the caller takes
ownership of the returned fd (so the driver must dup()).

As far as internal ownership, I don't 100% remember what my plan was..
I guess the driver needs to dup(), since at the egl api level the
fence can be destroyed while the internal dri2_egl_sync is still live
due to driver holding a ref.

BR,
-R

>  */
> void *(*create_fence_fd)(__DRIcontext *ctx, int fd);
>
> @@ -430,6 +436,9 @@ struct __DRI2fenceExtensionRec {
>  *
>  * \param screen  the screen associated with the fence
>  * \param fence   the fence
> +*
> +* TODO(chadv): Should the driver return its actual fence fd? Or should it
> +* return a dup'd fd?
>  */
> int (*get_fence_fd)(__DRIscreen *screen, void *fence);
>  };
> --
> 2.9.0.rc2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/12] render reordering for optimized tile buffer usage

2016-07-08 Thread Rob Clark

On Sat, Jul 2, 2016 at 12:52 PM, Rob Clark <robdcl...@gmail.com> wrote:
> So, games/apps that are aware of how a tiler gpu works will make an
> effort to avoid mid-batch (tile pass) updates to textures, UBOs, etc,
> since this will force a flush, and extra resolve (tile->mem) and
> restore (mem->tile) in the next batch.  They also avoid unnecessary
> framebuffer switches, for the same reason.
>
> But turns out that many games, benchmarks, etc, aren't very good at
> this.  But what if we could re-order the batches (and potentially
> shadow texture/UBO/etc resources) to minimize the tile passes and
> unnecessary resolve/restore?
>
> This is based on a rough idea that Eric suggested a while back, and
> a few other experiments that I have been trying recently.  It boils
> down to three parts:
>
> 1) Add an fd_batch object, which tracks cmdstream being built for that
>particular tile pass.  State that is global to the tile pass is
>move from fd_context to fd_batch.  (Mostly the framebuffer state,
>but also so internal tracking that is done to decide whether to
>use GMEM or sysmem/bypass mode, etc.)
>
>Tracking of resources written/read in the batch is also moved from
>ctx to batch.

So, it turned out that tracking only the most recent batch that reads
a resource leads to unnecessary dependencies, and results in batches
getting force-flushed (to avoid a dependency loop) when otherwise not
needed.

I initially did things this way so I could have a single list_head in
the pipe_resource, and to avoid needing to track a 'struct set' of
batches per pipe_resource.  But we really need a way to allow tracking
of multiple batches that read a resource without introducing an
artificial dependency between the reading batches.

So I came up with a different approach after discussing a few
different options with glisse.  It involves putting an upper bound on
the # of batches at 32 (although 64 would be a possibility).  In the
batch, we end up needing a hash-set to track resources accessed by the
batch.  But in the resource we only need a bitmask of which batches
access this resource (and a single 'struct fd_batch *write_batch' for
most recent writer).  (And I check the bitmask to short-circuit the
hashset lookup/insert in the common case.)

So now I'm getting ~+20% on manhattan, and a bit more improvement in
xonotic than before.  There are still a few glitches in xonotic (the
increased re-ordering exposes that occlusion query is completely
broken and queries need some work to dtrt in the face of re-ordering).
And the map in the upper left corner somehow doesn't show the
outline/map of the level (just the dots where the players are at).
Not sure yet what is going on there.

Mostly I only hit forced flushes due to hitting upper limit on # of
batches during game startup, when it is doing a lot of texture uploads
and mipmap generation, but not yet submitted any rendering that uses
those textures.  And an upper-bound on un-flushed batches in that sort
of scenario actually seems like a good thing.  Although I could
probably be more clever about picking which batch(es) to flush in that
scenario.  The upper limit could be problematic if someone uploaded
layer 0 to a bunch of textures, and then generated mipmap for all of
the textures (as opposed to interleaving upload/genmipmap).  I guess
you probably have to go out of your way to be that stupid, so meh?

One of the annoying things, since pipe_resource is per-screen, not
per-context, I end up having to push batch_cache down into screen.
Which means that, for example, one context switching fb state could
force flushing a batch from another context.  Eventually if I push of
gmem+submit to a per-context helper thread, that should help keep
things properly serialized.  Although I still need some (currently
missing) mutexes to serialize batch_cache access, etc.  Also it means
that the upper limit on # of batches is per-screen, not per-context.
Not really sure what to do about that.  I really wish resources were
not shared across contexts (but rather just use flink or dmabuf when
you need to share), but I guess it is too late for that now :-(

https://github.com/freedreno/mesa/commit/e23dac02234de1c688efbad58758fdf9d837c94b

BR,
-R

> 2) Add a batch-cache.  Previously, whenever new framebuffer state is
>set, it forced a flush.  Now (if reordering is enabled), we use
>the framebuffer state as key into a hashtable to map it to an
>existing batch (if there is one, otherwise construct a new batch
>and add it to the table).
>
>When a resource is marked as read/written by a batch, which is
>already pending access by another batch, a dependency between the
>two batches is added.
>
>TODO there is probably a bit more room for improvement here.  See
>below analysis of supertuxkart.
>
> 3) Shadow resources.  Mid-bat

Re: [Mesa-dev] Mesa 12.0.0 release candidate 4

2016-07-06 Thread Rob Clark

On Thu, Jun 23, 2016 at 9:35 AM, Emil Velikov  wrote:
> Hi all,
>
> On 21 June 2016 at 15:35, Emil Velikov  wrote:
>> The fourth release candidate for Mesa 12.0.0 is now available.
>>
>> Note: this is the final release candidate, with Mesa 12.0.0 expected in a 
>> couple of days.
>>
> Considering the requests, from different parties, the final release
> will be out tomorrow Friday after 20:00 GMT.
>
> All your nominations (that have master landed in master, if
> applicable) will be included, but do let me know if certain patch(es)
> should be included/excluded from the release.

btw, in case you missed my note on IRC, these would be good to have on
the 12.0 branch:

7295428 freedreno: fix crash on smaller gpus and higher resolutions
01ccb0d i965: don't drop const initializers in vector splitting
f78a6b1 glsl: add driconf to zero-init unintialized vars

BR,
-R

> Thanks
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/12] render reordering for optimized tile buffer usage

2016-07-06 Thread Rob Clark

On Sat, Jul 2, 2016 at 12:52 PM, Rob Clark <robdcl...@gmail.com> wrote:
> So, games/apps that are aware of how a tiler gpu works will make an
> effort to avoid mid-batch (tile pass) updates to textures, UBOs, etc,
> since this will force a flush, and extra resolve (tile->mem) and
> restore (mem->tile) in the next batch.  They also avoid unnecessary
> framebuffer switches, for the same reason.
>
> But turns out that many games, benchmarks, etc, aren't very good at
> this.  But what if we could re-order the batches (and potentially
> shadow texture/UBO/etc resources) to minimize the tile passes and
> unnecessary resolve/restore?
>
> This is based on a rough idea that Eric suggested a while back, and
> a few other experiments that I have been trying recently.  It boils
> down to three parts:
>
> 1) Add an fd_batch object, which tracks cmdstream being built for that
>particular tile pass.  State that is global to the tile pass is
>move from fd_context to fd_batch.  (Mostly the framebuffer state,
>but also so internal tracking that is done to decide whether to
>use GMEM or sysmem/bypass mode, etc.)
>
>Tracking of resources written/read in the batch is also moved from
>ctx to batch.
>
> 2) Add a batch-cache.  Previously, whenever new framebuffer state is
>set, it forced a flush.  Now (if reordering is enabled), we use
>the framebuffer state as key into a hashtable to map it to an
>existing batch (if there is one, otherwise construct a new batch
>and add it to the table).
>
>When a resource is marked as read/written by a batch, which is
>already pending access by another batch, a dependency between the
>two batches is added.

so I noticed one slightly annoying thing.. u_blitter doesn't clear the
vtx sampler state, and only updates the first sampler for frag state..
resulting in us adding a bunch of unneeded dependencies during a
blitter draw..

not sure if it is better to fix u_blitter or hack around it in the driver..

BR,
-R

>TODO there is probably a bit more room for improvement here.  See
>below analysis of supertuxkart.
>
> 3) Shadow resources.  Mid-batch UBO updates or uploading new contents
>to an in-use texture is sadly too common.  Traditional (non-tiler)
>gpu's could solve this with a staging buffer, and blitting from the
>staging to real buffer at the appropriate spot in the cmdstream.
>But this doesn't work for a tiling gpu, since we'll need the old
>contents again when we move on to the next tile.  To solve this,
>allocate a new buffer and back-blit the previous contents to the
>new buffer.  The existing buffer becomes a shadow and is unref'd
>(the backing GEM object is kept alive since it is referenced by
>the cmdstream).
>
>For example, a texture upload + mipmap gen turns into transfer_map
>for level zero (glTexSubImage*, etc), followed by blits to the
>remaining mipmap levels (glGenerateMipmap()).  So in transfer_map()
>if writing new contents into the buffer would trigger a flush or
>stall, we shadow the existing buffer, and blit the remaining levels
>from old to new.  Each blit turns into a batch (different frame-
>buffer state), and is not immediately flushed, but just hangs out
>in the batch cache.  When the next blit (from glGenerateMipmap()
>overwrites the contents from the back-blit, we realize this and
>drop the previous rendering to the batch, so in many cases the
>back-blit ends up discarded.
>
>
>
> Results:
>
> supertuxkart was a big winner, with an overall ~30% boost, making the
> new render engine finally playable on most levels.  Fps varies a lot
> by level, but on average going from 14-19fps to 20-25fps.
>
> (Sadly, the old render engine, which was much faster on lower end hw,
> seems to be in disrepair.)
>
> I did also add some instrumentation to collect some stats on # of
> different sorts of batches.  Since supertuxkart --profile-laps is
> not repeatable, I could not directly compare results there, but I
> could compare an apitrace replay of stk level:
>
>   normal:  batch_sysmem=10398, batch_gmem=6958, batch_restore=3864
>   reorder: batch_sysmem=16825, batch_gmem=6956, batch_restore=3863
>   (for 792 frames)
>
> I was expecting a drop in gmem batches, and restores, because stk
> does two problematic things: (1) render target switches, ie. clear,
> switch fb, clear, switch fb, draw, etc., and (2) mid-batch UBO
> update.
>
> I've looked a bit into the render target switches, but it seems like
> it is mixing/matching zsbuf and cbuf's in a way that makes them map
> to different batches.  Ie:
>
>set fb: zsbuf=A, cbuf[0]=B
>clear color0
>clear stencil

Re: [Mesa-dev] [PATCH] vbo: fix attr reset

2016-07-06 Thread Rob Clark

On Wed, Jul 6, 2016 at 9:53 AM, Brian Paul <bri...@vmware.com> wrote:
> On 07/05/2016 08:57 PM, Mathias Fröhlich wrote:
>>
>> On Tuesday, July 05, 2016 15:47:45 Rob Clark wrote:
>>
>>  > In bc4e0c4 (vbo: Use a bitmask to track the active arrays in
>> vbo_exec*.)
>>
>>  > we stopped looping over all the attributes and resetting all slots.
>>
>>  > Which exposed an issue in vbo_exec_bind_arrays() for handling GENERIC0
>>
>>  > vs. POS.
>>
>>  >
>>
>>  > Split out a helper which can reset a particular slot, so that
>>
>>  > vbo_exec_bind_arrays() can re-use it to reset POS.
>
>
> Rob, can you mention in the commit msg that this fixes 0ad (I think that was
> it)?

yup, it was 0ad.. I'll add a note in the commit msg

BR,
-R


> -Brian
>
>
>
>>
>> I had a similar problem with the pos/generic0 handling
>>
>> fixed, but obviously missed this!
>>
>> The fix looks good to me.
>>
>> Thanks for taking care!
>>
>> Reviewed-by: Mathias Fröhlich <mathias.froehl...@web.de>
>>
>>  >
>>
>>  > Signed-off-by: Rob Clark <robdcl...@gmail.com>
>>
>>  > ---
>>
>>  > src/mesa/vbo/vbo_exec.h | 1 +
>>
>>  > src/mesa/vbo/vbo_exec_api.c | 14 --
>>
>>  > src/mesa/vbo/vbo_exec_draw.c | 2 +-
>>
>>  > 3 files changed, 10 insertions(+), 7 deletions(-)
>>
>>  >
>>
>>  > diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
>>
>>  > index 5e20cf6..4f11f17 100644
>>
>>  > --- a/src/mesa/vbo/vbo_exec.h
>>
>>  > +++ b/src/mesa/vbo/vbo_exec.h
>>
>>  > @@ -152,6 +152,7 @@ void vbo_exec_invalidate_state( struct gl_context
>> *ctx, GLbitfield new_state );
>>
>>  >
>>
>>  > /* Internal functions:
>>
>>  > */
>>
>>  > +void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr);
>>
>>  >
>>
>>  > void vbo_exec_vtx_init( struct vbo_exec_context *exec );
>>
>>  > void vbo_exec_vtx_destroy( struct vbo_exec_context *exec );
>>
>>  > diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
>>
>>  > index e02bb90..32f15db 100644
>>
>>  > --- a/src/mesa/vbo/vbo_exec_api.c
>>
>>  > +++ b/src/mesa/vbo/vbo_exec_api.c
>>
>>  > @@ -1275,16 +1275,18 @@ void vbo_exec_FlushVertices( struct
>> gl_context *ctx, GLuint flags )
>>
>>  > #endif
>>
>>  > }
>>
>>  >
>>
>>  > +void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr)
>>
>>  > +{
>>
>>  > + exec->vtx.attrsz[attr] = 0;
>>
>>  > + exec->vtx.attrtype[attr] = GL_FLOAT;
>>
>>  > + exec->vtx.active_sz[attr] = 0;
>>
>>  > +}
>>
>>  >
>>
>>  > static void reset_attrfv( struct vbo_exec_context *exec )
>>
>>  > -{
>>
>>  > +{
>>
>>  > while (exec->vtx.enabled) {
>>
>>  > const int i = u_bit_scan64(>vtx.enabled);
>>
>>  > - assert(exec->vtx.attrsz[i]);
>>
>>  > -
>>
>>  > - exec->vtx.attrsz[i] = 0;
>>
>>  > - exec->vtx.attrtype[i] = GL_FLOAT;
>>
>>  > - exec->vtx.active_sz[i] = 0;
>>
>>  > + vbo_reset_attr(exec, i);
>>
>>  > }
>>
>>  >
>>
>>  > exec->vtx.vertex_size = 0;
>>
>>  > diff --git a/src/mesa/vbo/vbo_exec_draw.c
>> b/src/mesa/vbo/vbo_exec_draw.c
>>
>>  > index 8d1b2c0..0ef3081 100644
>>
>>  > --- a/src/mesa/vbo/vbo_exec_draw.c
>>
>>  > +++ b/src/mesa/vbo/vbo_exec_draw.c
>>
>>  > @@ -213,7 +213,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>>
>>  > exec->vtx.inputs[VERT_ATTRIB_GENERIC0] = exec->vtx.inputs[0];
>>
>>  > exec->vtx.attrsz[VERT_ATTRIB_GENERIC0] = exec->vtx.attrsz[0];
>>
>>  > exec->vtx.attrptr[VERT_ATTRIB_GENERIC0] = exec->vtx.attrptr[0];
>>
>>  > - exec->vtx.attrsz[0] = 0;
>>
>>  > + vbo_reset_attr(exec, VERT_ATTRIB_POS);
>>
>>  > exec->vtx.enabled &= (~BITFIELD64_BIT(VBO_ATTRIB_POS));
>>
>>  > exec->vtx.enabled |= BITFIELD64_BIT(VBO_ATTRIB_GENERIC0);
>>
>>  > }
>>
>>  >
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] vbo: fix attr reset

2016-07-05 Thread Rob Clark

In bc4e0c4 (vbo: Use a bitmask to track the active arrays in vbo_exec*.)
we stopped looping over all the attributes and resetting all slots.
Which exposed an issue in vbo_exec_bind_arrays() for handling GENERIC0
vs. POS.

Split out a helper which can reset a particular slot, so that
vbo_exec_bind_arrays() can re-use it to reset POS.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/mesa/vbo/vbo_exec.h  |  1 +
 src/mesa/vbo/vbo_exec_api.c  | 14 --
 src/mesa/vbo/vbo_exec_draw.c |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
index 5e20cf6..4f11f17 100644
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -152,6 +152,7 @@ void vbo_exec_invalidate_state( struct gl_context *ctx, 
GLbitfield new_state );
 
 /* Internal functions:
  */
+void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr);
 
 void vbo_exec_vtx_init( struct vbo_exec_context *exec );
 void vbo_exec_vtx_destroy( struct vbo_exec_context *exec );
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index e02bb90..32f15db 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -1275,16 +1275,18 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, 
GLuint flags )
 #endif
 }
 
+void vbo_reset_attr(struct vbo_exec_context *exec, GLuint attr)
+{
+   exec->vtx.attrsz[attr] = 0;
+   exec->vtx.attrtype[attr] = GL_FLOAT;
+   exec->vtx.active_sz[attr] = 0;
+}
 
 static void reset_attrfv( struct vbo_exec_context *exec )
-{   
+{
while (exec->vtx.enabled) {
   const int i = u_bit_scan64(>vtx.enabled);
-  assert(exec->vtx.attrsz[i]);
-
-  exec->vtx.attrsz[i] = 0;
-  exec->vtx.attrtype[i] = GL_FLOAT;
-  exec->vtx.active_sz[i] = 0;
+  vbo_reset_attr(exec, i);
}
 
exec->vtx.vertex_size = 0;
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 8d1b2c0..0ef3081 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -213,7 +213,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
  exec->vtx.inputs[VERT_ATTRIB_GENERIC0] = exec->vtx.inputs[0];
  exec->vtx.attrsz[VERT_ATTRIB_GENERIC0] = exec->vtx.attrsz[0];
  exec->vtx.attrptr[VERT_ATTRIB_GENERIC0] = exec->vtx.attrptr[0];
- exec->vtx.attrsz[0] = 0;
+ vbo_reset_attr(exec, VERT_ATTRIB_POS);
  exec->vtx.enabled &= (~BITFIELD64_BIT(VBO_ATTRIB_POS));
  exec->vtx.enabled |= BITFIELD64_BIT(VBO_ATTRIB_GENERIC0);
   }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 28/29] vbo: Use a bitmask to track the active arrays in vbo_exec*.

2016-07-05 Thread Rob Clark

ok, so either (or both) of:

---
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index e02bb90..a7ae50b 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -1278,9 +1278,11 @@ void vbo_exec_FlushVertices( struct gl_context
*ctx, GLuint flags )

 static void reset_attrfv( struct vbo_exec_context *exec )
 {
+   /* counter-part to trick in vbo_exec_bind_arrays().. */
+   if (exec->vtx.active_sz[0])
+  exec->vtx.enabled |= (1 << VERT_ATTRIB_POS);
while (exec->vtx.enabled) {
   const int i = u_bit_scan64(>vtx.enabled);
-  assert(exec->vtx.attrsz[i]);

   exec->vtx.attrsz[i] = 0;
   exec->vtx.attrtype[i] = GL_FLOAT;
---

or

---
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 8d1b2c0..cbac3be 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -214,6 +214,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
  exec->vtx.attrsz[VERT_ATTRIB_GENERIC0] = exec->vtx.attrsz[0];
  exec->vtx.attrptr[VERT_ATTRIB_GENERIC0] = exec->vtx.attrptr[0];
  exec->vtx.attrsz[0] = 0;
+ exec->vtx.active_sz[0] = 0;
  exec->vtx.enabled &= (~BITFIELD64_BIT(VBO_ATTRIB_POS));
  exec->vtx.enabled |= BITFIELD64_BIT(VBO_ATTRIB_GENERIC0);
   }
---

will fix it...

BR,
-R


On Tue, Jul 5, 2016 at 2:56 PM, Rob Clark <robdcl...@gmail.com> wrote:
> So, this is a bit sad, but this breaks things for 0ad.. and maybe
> others.  I have an api-trace:
>
>   https://people.freedesktop.org/~robclark/0ad-cycladic-archipelago.trace.xz
>
> The problem is the interaction with the VERT_ATTRIB_POS /
> VERT_ATTRIB_GENERIC0 switcharoo in vbo_exec_bind_arrays(), although
> not entirely sure what the best thing to do is.  At any rate, it
> leaves a stale value in exec->vtx.active_sz[0], which results that
> vbo_exec_fixup_vertex() never happens..
>
> BR,
> -R
>
> On Tue, Jun 14, 2016 at 1:00 AM,  <mathias.froehl...@gmx.net> wrote:
>> From: Mathias Fröhlich <mathias.froehl...@web.de>
>>
>> The use of a bitmask makes functions iterating only active
>> attributes less visible in profiles.
>>
>> v2: Use _mesa_bit_scan{,64} instead of open coding.
>> v3: Use u_bit_scan{,64} instead of _mesa_bit_scan{,64}.
>>
>> Reviewed-by: Brian Paul <bri...@vmware.com>
>> Signed-off-by: Mathias Fröhlich <mathias.froehl...@web.de>
>> ---
>>  src/mesa/vbo/vbo_exec.h  |   1 +
>>  src/mesa/vbo/vbo_exec_api.c  | 146 
>> ++-
>>  src/mesa/vbo/vbo_exec_draw.c |   2 +
>>  3 files changed, 79 insertions(+), 70 deletions(-)
>>
>> diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
>> index 27bff4a..5e20cf6 100644
>> --- a/src/mesa/vbo/vbo_exec.h
>> +++ b/src/mesa/vbo/vbo_exec.h
>> @@ -101,6 +101,7 @@ struct vbo_exec_context
>>GLuint max_vert; /**< Max number of vertices allowed in buffer */
>>struct vbo_exec_copied_vtx copied;
>>
>> +  GLbitfield64 enabled; /**< mask of enabled vbo arrays. */
>>GLubyte attrsz[VBO_ATTRIB_MAX];   /**< nr. of attrib components 
>> (1..4) */
>>GLenum attrtype[VBO_ATTRIB_MAX];  /**< GL_FLOAT, GL_DOUBLE, GL_INT, 
>> etc */
>>GLubyte active_sz[VBO_ATTRIB_MAX];  /**< attrib size (nr. 32-bit 
>> words) */
>> diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
>> index 7534599..e02bb90 100644
>> --- a/src/mesa/vbo/vbo_exec_api.c
>> +++ b/src/mesa/vbo/vbo_exec_api.c
>> @@ -42,6 +42,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
>>  #include "main/api_arrayelt.h"
>>  #include "main/api_validate.h"
>>  #include "main/dispatch.h"
>> +#include "util/bitscan.h"
>>
>>  #include "vbo_context.h"
>>  #include "vbo_noop.h"
>> @@ -167,54 +168,56 @@ static void vbo_exec_copy_to_current( struct 
>> vbo_exec_context *exec )
>>  {
>> struct gl_context *ctx = exec->ctx;
>> struct vbo_context *vbo = vbo_context(ctx);
>> -   GLuint i;
>> +   GLbitfield64 enabled = exec->vtx.enabled & 
>> (~BITFIELD64_BIT(VBO_ATTRIB_POS));
>>
>> -   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
>> -  if (exec->vtx.attrsz[i]) {
>> - /* Note: the exec->vtx.current[i] pointers point into the
>> -  * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
>> -  */
>> -GLfloat *current = (GLfloat *)vbo->currval[i].P

Re: [Mesa-dev] [PATCH 28/29] vbo: Use a bitmask to track the active arrays in vbo_exec*.

2016-07-05 Thread Rob Clark

So, this is a bit sad, but this breaks things for 0ad.. and maybe
others.  I have an api-trace:

  https://people.freedesktop.org/~robclark/0ad-cycladic-archipelago.trace.xz

The problem is the interaction with the VERT_ATTRIB_POS /
VERT_ATTRIB_GENERIC0 switcharoo in vbo_exec_bind_arrays(), although
not entirely sure what the best thing to do is.  At any rate, it
leaves a stale value in exec->vtx.active_sz[0], which results that
vbo_exec_fixup_vertex() never happens..

BR,
-R

On Tue, Jun 14, 2016 at 1:00 AM,   wrote:
> From: Mathias Fröhlich 
>
> The use of a bitmask makes functions iterating only active
> attributes less visible in profiles.
>
> v2: Use _mesa_bit_scan{,64} instead of open coding.
> v3: Use u_bit_scan{,64} instead of _mesa_bit_scan{,64}.
>
> Reviewed-by: Brian Paul 
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/mesa/vbo/vbo_exec.h  |   1 +
>  src/mesa/vbo/vbo_exec_api.c  | 146 
> ++-
>  src/mesa/vbo/vbo_exec_draw.c |   2 +
>  3 files changed, 79 insertions(+), 70 deletions(-)
>
> diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
> index 27bff4a..5e20cf6 100644
> --- a/src/mesa/vbo/vbo_exec.h
> +++ b/src/mesa/vbo/vbo_exec.h
> @@ -101,6 +101,7 @@ struct vbo_exec_context
>GLuint max_vert; /**< Max number of vertices allowed in buffer */
>struct vbo_exec_copied_vtx copied;
>
> +  GLbitfield64 enabled; /**< mask of enabled vbo arrays. */
>GLubyte attrsz[VBO_ATTRIB_MAX];   /**< nr. of attrib components (1..4) 
> */
>GLenum attrtype[VBO_ATTRIB_MAX];  /**< GL_FLOAT, GL_DOUBLE, GL_INT, 
> etc */
>GLubyte active_sz[VBO_ATTRIB_MAX];  /**< attrib size (nr. 32-bit 
> words) */
> diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
> index 7534599..e02bb90 100644
> --- a/src/mesa/vbo/vbo_exec_api.c
> +++ b/src/mesa/vbo/vbo_exec_api.c
> @@ -42,6 +42,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #include "main/api_arrayelt.h"
>  #include "main/api_validate.h"
>  #include "main/dispatch.h"
> +#include "util/bitscan.h"
>
>  #include "vbo_context.h"
>  #include "vbo_noop.h"
> @@ -167,54 +168,56 @@ static void vbo_exec_copy_to_current( struct 
> vbo_exec_context *exec )
>  {
> struct gl_context *ctx = exec->ctx;
> struct vbo_context *vbo = vbo_context(ctx);
> -   GLuint i;
> +   GLbitfield64 enabled = exec->vtx.enabled & 
> (~BITFIELD64_BIT(VBO_ATTRIB_POS));
>
> -   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
> -  if (exec->vtx.attrsz[i]) {
> - /* Note: the exec->vtx.current[i] pointers point into the
> -  * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
> -  */
> -GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
> - fi_type tmp[8]; /* space for doubles */
> - int dmul = exec->vtx.attrtype[i] == GL_DOUBLE ? 2 : 1;
> -
> - if (exec->vtx.attrtype[i] == GL_DOUBLE) {
> -memset(tmp, 0, sizeof(tmp));
> -memcpy(tmp, exec->vtx.attrptr[i], exec->vtx.attrsz[i] * 
> sizeof(GLfloat));
> - } else {
> -COPY_CLEAN_4V_TYPE_AS_UNION(tmp,
> -exec->vtx.attrsz[i],
> -exec->vtx.attrptr[i],
> -exec->vtx.attrtype[i]);
> - }
> +   while (enabled) {
> +  const int i = u_bit_scan64();
> +
> +  /* Note: the exec->vtx.current[i] pointers point into the
> +   * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
> +   */
> +  GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
> +  fi_type tmp[8]; /* space for doubles */
> +  int dmul = exec->vtx.attrtype[i] == GL_DOUBLE ? 2 : 1;
> +
> +  assert(exec->vtx.attrsz[i]);
> +
> +  if (exec->vtx.attrtype[i] == GL_DOUBLE) {
> + memset(tmp, 0, sizeof(tmp));
> + memcpy(tmp, exec->vtx.attrptr[i], exec->vtx.attrsz[i] * 
> sizeof(GLfloat));
> +  } else {
> + COPY_CLEAN_4V_TYPE_AS_UNION(tmp,
> + exec->vtx.attrsz[i],
> + exec->vtx.attrptr[i],
> + exec->vtx.attrtype[i]);
> +  }
>
> - if (exec->vtx.attrtype[i] != vbo->currval[i].Type ||
> - memcmp(current, tmp, 4 * sizeof(GLfloat) * dmul) != 0) {
> -memcpy(current, tmp, 4 * sizeof(GLfloat) * dmul);
> +  if (exec->vtx.attrtype[i] != vbo->currval[i].Type ||
> +  memcmp(current, tmp, 4 * sizeof(GLfloat) * dmul) != 0) {
> + memcpy(current, tmp, 4 * sizeof(GLfloat) * dmul);
>
> -/* Given that we explicitly state size here, there is no need
> - * for the COPY_CLEAN above, could just copy 16 bytes and be
> - * done.  The only problem is when Mesa accesses ctx->Current
> - * directly.
> -

[Mesa-dev] [PATCH 09/12] freedreno: batch re-ordering support

2016-07-02 Thread Rob Clark

For now, not enabled by default, but can be enabled (on a3xx/a4xx) with
FD_MESA_DEBUG=reorder.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_batch.c| 168 ++---
 src/gallium/drivers/freedreno/freedreno_batch.h|   1 +
 src/gallium/drivers/freedreno/freedreno_context.c  |  38 ++---
 src/gallium/drivers/freedreno/freedreno_context.h  |   2 -
 src/gallium/drivers/freedreno/freedreno_query_hw.c |   2 +-
 src/gallium/drivers/freedreno/freedreno_resource.c |   6 +-
 src/gallium/drivers/freedreno/freedreno_resource.h |   1 +
 src/gallium/drivers/freedreno/freedreno_screen.c   |   9 ++
 src/gallium/drivers/freedreno/freedreno_screen.h   |   2 +
 src/gallium/drivers/freedreno/freedreno_state.c|  15 +-
 src/gallium/drivers/freedreno/freedreno_util.h |   1 +
 11 files changed, 188 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c 
b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5c6ae76..9d5bcf8 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -25,26 +25,20 @@
  */
 
 #include "util/list.h"
+#include "util/set.h"
+#include "util/hash_table.h"
 #include "util/u_string.h"
 
 #include "freedreno_batch.h"
 #include "freedreno_context.h"
 #include "freedreno_resource.h"
 
-struct fd_batch *
-fd_batch_create(struct fd_context *ctx)
+static void
+batch_init(struct fd_batch *batch)
 {
-   struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
-   static unsigned seqno = 0;
+   struct fd_context *ctx = batch->ctx;
unsigned size = 0;
 
-   if (!batch)
-   return NULL;
-
-   pipe_reference_init(>reference, 1);
-   batch->seqno = ++seqno;
-   batch->ctx = ctx;
-
/* if kernel is too old to support unlimited # of cmd buffers, we
 * have no option but to allocate large worst-case sizes so that
 * we don't need to grow the ringbuffer.  Performance is likely to
@@ -62,7 +56,11 @@ fd_batch_create(struct fd_context *ctx)
fd_ringbuffer_set_parent(batch->draw, batch->gmem);
fd_ringbuffer_set_parent(batch->binning, batch->gmem);
 
-   list_inithead(>used_resources);
+   batch->cleared = batch->partial_cleared = 0;
+   batch->restore = batch->resolve = 0;
+   batch->needs_flush = false;
+   batch->gmem_reason = 0;
+   batch->num_draws = 0;
 
/* reset maximal bounds: */
batch->max_scissor.minx = batch->max_scissor.miny = ~0;
@@ -73,16 +71,37 @@ fd_batch_create(struct fd_context *ctx)
if (is_a3xx(ctx->screen))
util_dynarray_init(>rbrc_patches);
 
-   return batch;
+   assert(LIST_IS_EMPTY(>used_resources));
 }
 
-void
-__fd_batch_destroy(struct fd_batch *batch)
+struct fd_batch *
+fd_batch_create(struct fd_context *ctx)
 {
-   fd_bc_invalidate_batch(batch);
+   struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+   static unsigned seqno = 0;
 
-   util_copy_framebuffer_state(>framebuffer, NULL);
+   if (!batch)
+   return NULL;
+
+   DBG("%p", batch);
+
+   pipe_reference_init(>reference, 1);
+   batch->seqno = ++seqno;
+   batch->ctx = ctx;
+
+   list_inithead(>used_resources);
+
+   batch_init(batch);
+
+   batch->dependencies = _mesa_set_create(NULL, _mesa_hash_pointer,
+   _mesa_key_pointer_equal);
 
+   return batch;
+}
+
+static void
+batch_fini(struct fd_batch *batch)
+{
fd_ringbuffer_del(batch->draw);
fd_ringbuffer_del(batch->binning);
fd_ringbuffer_del(batch->gmem);
@@ -91,6 +110,51 @@ __fd_batch_destroy(struct fd_batch *batch)
 
if (is_a3xx(batch->ctx->screen))
util_dynarray_fini(>rbrc_patches);
+}
+
+static void
+batch_reset(struct fd_batch *batch)
+{
+   struct set_entry *entry;
+
+   DBG("%p", batch);
+
+   batch_fini(batch);
+   batch_init(batch);
+
+   set_foreach(batch->dependencies, entry) {
+   struct fd_batch *dep = (struct fd_batch *)entry->key;
+   _mesa_set_remove(batch->dependencies, entry);
+   fd_batch_reference(, NULL);
+   }
+}
+
+void
+fd_batch_reset(struct fd_batch *batch)
+{
+   if (batch->needs_flush)
+   batch_reset(batch);
+}
+
+static void
+unref_batch(struct set_entry *entry)
+{
+   struct fd_batch *batch = (struct fd_batch *)entry->key;
+   fd_batch_reference(, NULL);
+}
+
+void
+__fd_batch_destroy(struct fd_batch *batch)
+{
+   fd_bc_invalidate_batch(batch);
+
+   DBG("%p", batch);
+
+   util_copy_framebuffer_state(>framebuffer, NULL);
+
+   batch_fini(batch);
+
+   _mesa_set_destroy(batch->dependencie

[Mesa-dev] [PATCH 07/12] freedreno: move more batch related tracking to fd_batch

2016-07-02 Thread Rob Clark

To flush batches out of order, the gmem code needs to not depend on
state from fd_context (since that may apply to a more recent batch).
So this all moves into batch.

The one exception is the gmem/pipe/tile state itself.  But this is
only used from gmem code (and batches are flushed serially).  The
alternative would be having to re-calculate GMEM layout on every
batch, even if the dimensions of the render targets are the same.

Note: This opens up the possibility of pushing gmem/submit into a
helper thread.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/a2xx/fd2_draw.c  |   6 +-
 src/gallium/drivers/freedreno/a2xx/fd2_emit.c  |   8 +-
 src/gallium/drivers/freedreno/a2xx/fd2_gmem.c  |  63 -
 src/gallium/drivers/freedreno/a3xx/fd3_context.c   |   4 -
 src/gallium/drivers/freedreno/a3xx/fd3_context.h   |   5 -
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c  |  15 +--
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c  |  15 ++-
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c  | 145 +++--
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c  |  11 +-
 src/gallium/drivers/freedreno/a4xx/fd4_draw.h  |  13 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c  |  16 +--
 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c  | 121 -
 src/gallium/drivers/freedreno/freedreno_batch.c|  22 +++-
 src/gallium/drivers/freedreno/freedreno_batch.h|  66 ++
 src/gallium/drivers/freedreno/freedreno_context.c  |  19 +--
 src/gallium/drivers/freedreno/freedreno_context.h  |  77 ++-
 src/gallium/drivers/freedreno/freedreno_draw.c |  82 ++--
 src/gallium/drivers/freedreno/freedreno_draw.h |  15 ++-
 src/gallium/drivers/freedreno/freedreno_gmem.c |  96 +++---
 src/gallium/drivers/freedreno/freedreno_gmem.h |   6 +-
 src/gallium/drivers/freedreno/freedreno_query_hw.c |   2 +-
 src/gallium/drivers/freedreno/freedreno_resource.c |   2 +-
 src/gallium/drivers/freedreno/freedreno_state.c|   6 +-
 23 files changed, 418 insertions(+), 397 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index 030e6f6..a824018 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -107,7 +107,7 @@ fd2_draw_vbo(struct fd_context *ctx, const struct 
pipe_draw_info *info)
OUT_RING(ring, info->max_index);/* VGT_MAX_VTX_INDX */
OUT_RING(ring, info->min_index);/* VGT_MIN_VTX_INDX */
 
-   fd_draw_emit(ctx, ring, ctx->primtypes[info->mode],
+   fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
 IGNORE_VISIBILITY, info);
 
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
@@ -126,7 +126,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
 {
struct fd2_context *fd2_ctx = fd2_context(ctx);
struct fd_ringbuffer *ring = ctx->batch->draw;
-   struct pipe_framebuffer_state *fb = >framebuffer;
+   struct pipe_framebuffer_state *fb = >batch->framebuffer;
uint32_t reg, colr = 0;
 
if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
@@ -266,7 +266,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
 
-   fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+   fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, 
NULL);
 
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index 0327803..b3a1b3d 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -250,10 +250,10 @@ fd2_emit_state(struct fd_context *ctx, uint32_t dirty)
OUT_RING(ring, xy2d(scissor->maxx,   /* 
PA_SC_WINDOW_SCISSOR_BR */
scissor->maxy));
 
-   ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, 
scissor->minx);
-   ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, 
scissor->miny);
-   ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, 
scissor->maxx);
-   ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, 
scissor->maxy);
+   ctx->batch->max_scissor.minx = 
MIN2(ctx->batch->max_scissor.minx, scissor->minx);
+   ctx->batch->max_scissor.miny = 
MIN2(ctx->batch->max_scissor.miny, scissor->miny);
+   ctx->batch->max_scissor.maxx = 
MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
+   ctx->batch->max_scissor.maxy = 
MAX2(ctx->

[Mesa-dev] [PATCH 11/12] freedreno: shadow textures if possible to avoid stall/flush

2016-07-02 Thread Rob Clark

To make batch re-ordering useful, we need to be able to create shadow
resources to avoid a flush/stall in transfer_map().  For example,
uploading new texture contents or updating a UBO mid-batch.  In these
cases, we want to clone the buffer, and update the new buffer, leaving
the old buffer (whose reference is held by cmdstream) as a shadow.

This is done by blitting the remaining other levels (and whatever part
of current level that is not discarded) from the old/shadow buffer to
the new one.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_context.h  |   5 +
 src/gallium/drivers/freedreno/freedreno_resource.c | 208 -
 src/gallium/drivers/freedreno/freedreno_util.h |   6 +
 3 files changed, 210 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_context.h 
b/src/gallium/drivers/freedreno/freedreno_context.h
index 012f452..ca3c01b 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -296,6 +296,11 @@ struct fd_context {
bool cond_cond; /* inverted rendering condition */
uint cond_mode;
 
+   /* Are we in process of shadowing a resource? Used to detect recursion
+* in transfer_map, and skip unneeded synchronization.
+*/
+   bool in_shadow;
+
struct pipe_debug_callback debug;
 
/* GMEM/tile handling fxns: */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index d7603b2..8ff0d4a 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -116,6 +116,171 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
util_range_set_empty(>valid_buffer_range);
 }
 
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
+static void fd_blitter_pipe_end(struct fd_context *ctx);
+
+static void
+do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool 
fallback)
+{
+   /* TODO size threshold too?? */
+   if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
+   /* do blit on gpu: */
+   fd_blitter_pipe_begin(ctx, false);
+   util_blitter_blit(ctx->blitter, blit);
+   fd_blitter_pipe_end(ctx);
+   } else {
+   /* do blit on cpu: */
+   util_resource_copy_region(>base,
+   blit->dst.resource, blit->dst.level, 
blit->dst.box.x,
+   blit->dst.box.y, blit->dst.box.z,
+   blit->src.resource, blit->src.level, 
>src.box);
+   }
+}
+
+static bool
+fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
+   unsigned level, unsigned usage, const struct pipe_box *box)
+{
+   struct pipe_context *pctx = >base;
+   struct pipe_resource *prsc = >base.b;
+   bool fallback = false;
+
+   /* TODO: somehow munge dimensions and format to copy unsupported
+* render target format to something that is supported?
+*/
+   if (!pctx->screen->is_format_supported(pctx->screen,
+   prsc->format, prsc->target, prsc->nr_samples,
+   PIPE_BIND_RENDER_TARGET))
+   fallback = true;
+
+   /* these cases should be handled elsewhere.. just for future
+* reference in case this gets split into a more generic(ish)
+* helper.
+*/
+   debug_assert(!(usage & PIPE_TRANSFER_READ));
+   debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
+
+   /* if we do a gpu blit to clone the whole resource, we'll just
+* end up stalling on that.. so only allow if we can discard
+* current range (and blit, possibly cpu or gpu, the rest)
+*/
+   if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
+   return false;
+
+   bool whole_level = util_texrange_covers_whole_level(prsc, level,
+   box->x, box->y, box->z, box->width, box->height, box->depth);
+
+   /* TODO need to be more clever about current level */
+   if ((prsc->target > PIPE_TEXTURE_2D) && !whole_level)
+   return false;
+
+   struct pipe_resource *pshadow =
+   pctx->screen->resource_create(pctx->screen, prsc);
+
+   if (!pshadow)
+   return false;
+
+   assert(!ctx->in_shadow);
+   ctx->in_shadow = true;
+
+   /* get rid of any references that batch-cache might have to us (which
+* should empty/destroy rsc->batches hashset)
+*/
+   fd_bc_invalidate_resource(rsc);
+
+   /* Swap the backing bo's, so shadow becomes the old buffer,
+* blit from shadow to new buffer.  From here on out, we
+* c

[Mesa-dev] [PATCH 12/12] freedreno: support discarding previous rendering in special cases

2016-07-02 Thread Rob Clark

Basically, to "DCE" blits triggered by resource shadowing, in cases
where the levels are immediately completely overwritten.  For example,
mid-frame texture upload to level zero triggers shadowing and back-blits
to the remaining levels, which are immediately overwritten by
glGenerateMipmap().

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_context.h  |  6 ++
 src/gallium/drivers/freedreno/freedreno_draw.c | 10 ++
 src/gallium/drivers/freedreno/freedreno_resource.c | 21 -
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_context.h 
b/src/gallium/drivers/freedreno/freedreno_context.h
index ca3c01b..5b3d707 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -301,6 +301,12 @@ struct fd_context {
 */
bool in_shadow;
 
+   /* Ie. in blit situation where we no longer care about previous 
framebuffer
+* contents.  Main point is to eliminate blits from 
fd_try_shadow_resource().
+* For example, in case of texture upload + gen-mipmaps.
+*/
+   bool discard;
+
struct pipe_debug_callback debug;
 
/* GMEM/tile handling fxns: */
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c 
b/src/gallium/drivers/freedreno/freedreno_draw.c
index 481fb3d..fd4fc08 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -84,6 +84,11 @@ fd_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
return;
}
 
+   if (ctx->discard) {
+   fd_batch_reset(ctx->batch);
+   ctx->discard = false;
+   }
+
/*
 * Figure out the buffers/features we need:
 */
@@ -209,6 +214,11 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
if (!fd_render_condition_check(pctx))
return;
 
+   if (ctx->discard) {
+   fd_batch_reset(ctx->batch);
+   ctx->discard = false;
+   }
+
/* for bookkeeping about which buffers have been cleared (and thus
 * can fully or partially skip mem2gmem) we need to ignore buffers
 * that have already had a draw, in case apps do silly things like
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index 8ff0d4a..46bb7c7 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -116,7 +116,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
util_range_set_empty(>valid_buffer_range);
 }
 
-static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond);
+static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, 
bool discard);
 static void fd_blitter_pipe_end(struct fd_context *ctx);
 
 static void
@@ -125,7 +125,7 @@ do_blit(struct fd_context *ctx, const struct pipe_blit_info 
*blit, bool fallback
/* TODO size threshold too?? */
if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
/* do blit on gpu: */
-   fd_blitter_pipe_begin(ctx, false);
+   fd_blitter_pipe_begin(ctx, false, true);
util_blitter_blit(ctx->blitter, blit);
fd_blitter_pipe_end(ctx);
} else {
@@ -897,7 +897,8 @@ fd_blitter_pipe_copy_region(struct fd_context *ctx,
if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
return false;
 
-   fd_blitter_pipe_begin(ctx, false);
+   /* TODO we could discard if dst box covers dst level fully.. */
+   fd_blitter_pipe_begin(ctx, false, false);
util_blitter_copy_texture(ctx->blitter,
dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
@@ -967,6 +968,7 @@ fd_blit(struct pipe_context *pctx, const struct 
pipe_blit_info *blit_info)
 {
struct fd_context *ctx = fd_context(pctx);
struct pipe_blit_info info = *blit_info;
+   bool discard = false;
 
if (info.src.resource->nr_samples > 1 &&
info.dst.resource->nr_samples <= 1 &&
@@ -979,6 +981,13 @@ fd_blit(struct pipe_context *pctx, const struct 
pipe_blit_info *blit_info)
if (info.render_condition_enable && !fd_render_condition_check(pctx))
return;
 
+   if (!info.scissor_enable && !info.alpha_blend) {
+   discard = util_texrange_covers_whole_level(info.dst.resource,
+   info.dst.level, info.dst.box.x, info.dst.box.y,
+   info.dst.box.z, info.dst.box.width,
+   info.dst.box.height, info.dst.box.depth);
+   }
+

[Mesa-dev] [PATCH 04/12] freedreno: introduce fd_batch

2016-07-02 Thread Rob Clark

From: Rob Clark <robcl...@freedesktop.org>

Introduce the batch object, to track a batch/submit's worth of
ringbuffers and other bookkeeping.  In this first step, just move
the ringbuffers into batch, since that is mostly uninteresting
churn.

For now there is just a single batch at a time.  Note that one
outcome of this change is that rb's are allocated/freed on each
use.  But the expectation is that the bo pool in libdrm_freedreno
will save us the GEM bo alloc/free which was the initial reason
to implement a rb pool in gallium.

The purpose of the batch is to eventually facilitate out-of-order
rendering, with batches associated to framebuffer state, and
tracking the dependencies on other batches.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/Makefile.sources |  2 +
 src/gallium/drivers/freedreno/a2xx/fd2_draw.c  |  6 +-
 src/gallium/drivers/freedreno/a2xx/fd2_emit.c  |  7 +-
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c  |  8 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c  |  8 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.h  |  2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c  |  7 +-
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c  | 30 +++
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c  |  8 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.h  |  2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c  |  7 +-
 src/gallium/drivers/freedreno/freedreno_batch.c| 93 ++
 src/gallium/drivers/freedreno/freedreno_batch.h| 72 +
 src/gallium/drivers/freedreno/freedreno_context.c  | 85 +++-
 src/gallium/drivers/freedreno/freedreno_context.h  | 33 ++--
 src/gallium/drivers/freedreno/freedreno_draw.c | 17 +---
 src/gallium/drivers/freedreno/freedreno_gmem.c | 20 ++---
 src/gallium/drivers/freedreno/freedreno_query_hw.c |  4 +-
 src/gallium/drivers/freedreno/freedreno_resource.c |  4 +-
 src/gallium/drivers/freedreno/freedreno_util.h |  9 +--
 20 files changed, 247 insertions(+), 177 deletions(-)
 create mode 100644 src/gallium/drivers/freedreno/freedreno_batch.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_batch.h

diff --git a/src/gallium/drivers/freedreno/Makefile.sources 
b/src/gallium/drivers/freedreno/Makefile.sources
index edba369..4ba8c9d 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -2,6 +2,8 @@ C_SOURCES := \
adreno_common.xml.h \
adreno_pm4.xml.h \
disasm.h \
+   freedreno_batch.c \
+   freedreno_batch.h \
freedreno_context.c \
freedreno_context.h \
freedreno_draw.c \
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index 14620ac..030e6f6 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -76,13 +76,13 @@ emit_vertexbufs(struct fd_context *ctx)
// NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
// CONST(20,0) (or CONST(26,0) in soliv_vp)
 
-   fd2_emit_vertex_bufs(ctx->ring, 0x78, bufs, vtx->num_elements);
+   fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
 }
 
 static bool
 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 {
-   struct fd_ringbuffer *ring = ctx->ring;
+   struct fd_ringbuffer *ring = ctx->batch->draw;
 
if (ctx->dirty & FD_DIRTY_VTXBUF)
emit_vertexbufs(ctx);
@@ -125,7 +125,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned 
stencil)
 {
struct fd2_context *fd2_ctx = fd2_context(ctx);
-   struct fd_ringbuffer *ring = ctx->ring;
+   struct fd_ringbuffer *ring = ctx->batch->draw;
struct pipe_framebuffer_state *fb = >framebuffer;
uint32_t reg, colr = 0;
 
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index f8d03ca..0327803 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -184,7 +184,7 @@ fd2_emit_state(struct fd_context *ctx, uint32_t dirty)
 {
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
-   struct fd_ringbuffer *ring = ctx->ring;
+   struct fd_ringbuffer *ring = ctx->batch->draw;
 
/* NOTE: we probably want to eventually refactor this so each state
 * object handles emitting it's own state..  although the mapping of
@@ -443,10 +443,9 @@ fd2_emit_restore(struct fd_context *ctx, struct 
fd_ringbuffer *ring)
 }
 
 static void
-fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
-   struct fd_ringmarker *end)

[Mesa-dev] [PATCH 08/12] freedreno: add batch-cache

2016-07-02 Thread Rob Clark

Note that I originally also had a entry-point that would construct a key
and do lookup from a pipe_surface.  I ended up not needing that (yet?)
but it is easy-enough to re-introduce later if we need it for the blit
path.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/Makefile.sources |   2 +
 src/gallium/drivers/freedreno/freedreno_batch.c|   5 +
 src/gallium/drivers/freedreno/freedreno_batch.h|   6 +
 .../drivers/freedreno/freedreno_batch_cache.c  | 246 +
 .../drivers/freedreno/freedreno_batch_cache.h  |  51 +
 src/gallium/drivers/freedreno/freedreno_context.c  |   4 +
 src/gallium/drivers/freedreno/freedreno_context.h  |   3 +
 src/gallium/drivers/freedreno/freedreno_resource.c |   2 +
 src/gallium/drivers/freedreno/freedreno_resource.h |   6 +
 9 files changed, 325 insertions(+)
 create mode 100644 src/gallium/drivers/freedreno/freedreno_batch_cache.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_batch_cache.h

diff --git a/src/gallium/drivers/freedreno/Makefile.sources 
b/src/gallium/drivers/freedreno/Makefile.sources
index 4ba8c9d..92d9186 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -4,6 +4,8 @@ C_SOURCES := \
disasm.h \
freedreno_batch.c \
freedreno_batch.h \
+   freedreno_batch_cache.c \
+   freedreno_batch_cache.h \
freedreno_context.c \
freedreno_context.h \
freedreno_draw.c \
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c 
b/src/gallium/drivers/freedreno/freedreno_batch.c
index 1fbce43..5c6ae76 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -79,7 +79,10 @@ fd_batch_create(struct fd_context *ctx)
 void
 __fd_batch_destroy(struct fd_batch *batch)
 {
+   fd_bc_invalidate_batch(batch);
+
util_copy_framebuffer_state(>framebuffer, NULL);
+
fd_ringbuffer_del(batch->draw);
fd_ringbuffer_del(batch->binning);
fd_ringbuffer_del(batch->gmem);
@@ -120,6 +123,8 @@ fd_batch_flush(struct fd_batch *batch)
}
 
assert(LIST_IS_EMPTY(>used_resources));
+   batch->needs_flush = false;
+   fd_bc_invalidate_batch(batch);
 }
 
 void
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h 
b/src/gallium/drivers/freedreno/freedreno_batch.h
index 4607250..d500f95 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -119,6 +119,12 @@ struct fd_batch {
 
/** list of resources used by currently-unsubmitted batch */
struct list_head used_resources;
+
+   /** key in batch-cache (if not null): */
+   const void *key;
+
+   /** set of dependent batches.. holds refs to dependent batches: */
+   struct set *dependencies;
 };
 
 struct fd_batch * fd_batch_create(struct fd_context *ctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.c 
b/src/gallium/drivers/freedreno/freedreno_batch_cache.c
new file mode 100644
index 000..bd47251
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2016 Rob Clark <robcl...@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *Rob Clark <robcl...@freedesktop.org>
+ */
+
+#include "util/hash_table.h"
+#include "util/set.h"
+#include "util/list.h"
+#include "util/u_string.h"
+
+#include "freedreno_batch.h"
+#include "freedreno_batch_cache.h"
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+
+/* Overview:
+ *
+ *   The batch cache provides lookup for mapping pipe_framebuffer_state
+ *   to a batch.
+ *
+ *   It do

[Mesa-dev] [PATCH 10/12] freedreno: spiff up some debug traces

2016-07-02 Thread Rob Clark

Make it easier to track batches, to ensure things happen properly when
they are reordered.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_draw.c | 6 --
 src/gallium/drivers/freedreno/freedreno_gmem.c | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c 
b/src/gallium/drivers/freedreno/freedreno_draw.c
index b947762..481fb3d 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -170,7 +170,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
/* and any buffers used, need to be resolved: */
batch->resolve |= buffers;
 
-   DBG("%x num_draws=%u (%s/%s)", buffers, batch->num_draws,
+   DBG("%p: %x %ux%u num_draws=%u (%s/%s)", batch, buffers,
+   pfb->width, pfb->height, batch->num_draws,
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 
@@ -242,7 +243,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
}
 
-   DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil,
+   DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers,
+   pfb->width, pfb->height, depth, stencil,
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c 
b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 9ca7f5f..a075a8b 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -383,7 +383,8 @@ fd_gmem_render_tiles(struct fd_batch *batch)
ctx->stats.batch_total++;
 
if (sysmem) {
-   DBG("rendering sysmem (%s/%s)",
+   DBG("%p: rendering sysmem %ux%u (%s/%s)",
+   batch, pfb->width, pfb->height,

util_format_short_name(pipe_surface_format(pfb->cbufs[0])),

util_format_short_name(pipe_surface_format(pfb->zsbuf)));
fd_hw_query_prepare(ctx, 1);
@@ -392,7 +393,8 @@ fd_gmem_render_tiles(struct fd_batch *batch)
} else {
struct fd_gmem_stateobj *gmem = >gmem;
calculate_tiles(batch);
-   DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, 
gmem->nbins_y,
+   DBG("%p: rendering %dx%d tiles %ux%u (%s/%s)",
+   batch, pfb->width, pfb->height, gmem->nbins_x, 
gmem->nbins_y,

util_format_short_name(pipe_surface_format(pfb->cbufs[0])),

util_format_short_name(pipe_surface_format(pfb->zsbuf)));
fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/12] freedreno: dynamically sized/growable cmd buffers

2016-07-02 Thread Rob Clark

From: Rob Clark <robcl...@freedesktop.org>

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_batch.c | 25 
 src/gallium/drivers/freedreno/freedreno_util.h  | 31 ++---
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c 
b/src/gallium/drivers/freedreno/freedreno_batch.c
index 51a61d9..6d17a42 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -36,6 +36,7 @@ fd_batch_create(struct fd_context *ctx)
 {
struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
static unsigned seqno = 0;
+   unsigned size = 0;
 
if (!batch)
return NULL;
@@ -44,13 +45,18 @@ fd_batch_create(struct fd_context *ctx)
batch->seqno = ++seqno;
batch->ctx = ctx;
 
-   /* TODO how to pick a good size?  Or maybe we should introduce
-* fd_ringlist?  Also, make sure size is aligned with bo-cache
-* bucket size, since otherwise that will round up size..
+   /* if kernel is too old to support unlimited # of cmd buffers, we
+* have no option but to allocate large worst-case sizes so that
+* we don't need to grow the ringbuffer.  Performance is likely to
+* suffer, but there is no good alternative.
 */
-   batch->draw= fd_ringbuffer_new(ctx->screen->pipe, 0x1);
-   batch->binning = fd_ringbuffer_new(ctx->screen->pipe, 0x1);
-   batch->gmem= fd_ringbuffer_new(ctx->screen->pipe, 0x1);
+   if (fd_device_version(ctx->screen->dev) < FD_VERSION_UNLIMITED_CMDS) {
+   size = 0x10;
+   }
+
+   batch->draw= fd_ringbuffer_new(ctx->screen->pipe, size);
+   batch->binning = fd_ringbuffer_new(ctx->screen->pipe, size);
+   batch->gmem= fd_ringbuffer_new(ctx->screen->pipe, size);
 
fd_ringbuffer_set_parent(batch->gmem, NULL);
fd_ringbuffer_set_parent(batch->draw, batch->gmem);
@@ -117,10 +123,9 @@ fd_batch_resource_used(struct fd_batch *batch, struct 
fd_resource *rsc,
 void
 fd_batch_check_size(struct fd_batch *batch)
 {
-   /* TODO eventually support having a list of draw/binning rb's
-* and if we are too close to the end, add another to the
-* list.  For now we just flush.
-*/
+   if (fd_device_version(batch->ctx->screen->dev) >= 
FD_VERSION_UNLIMITED_CMDS)
+   return;
+
struct fd_ringbuffer *ring = batch->draw;
if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) ||
(fd_mesa_debug & FD_DBG_FLUSH))
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h 
b/src/gallium/drivers/freedreno/freedreno_util.h
index b6b91f9..8f125d9 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -238,13 +238,8 @@ OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
 
 static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
 {
-   if ((ring->cur + ndwords) >= ring->end) {
-   /* this probably won't really work if we have multiple tiles..
-* but it is ok for 2d..  we might need different behavior
-* depending on 2d or 3d pipe.
-*/
-   DBG("uh oh..");
-   }
+   if (ring->cur + ndwords >= ring->end)
+   fd_ringbuffer_grow(ring, ndwords);
 }
 
 static inline void
@@ -255,6 +250,13 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, 
uint16_t cnt)
 }
 
 static inline void
+OUT_PKT2(struct fd_ringbuffer *ring)
+{
+   BEGIN_RING(ring, 1);
+   OUT_RING(ring, CP_TYPE2_PKT);
+}
+
+static inline void
 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
 {
BEGIN_RING(ring, cnt+1);
@@ -271,9 +273,7 @@ OUT_WFI(struct fd_ringbuffer *ring)
 static inline void
 __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer 
*target)
 {
-   uint32_t dwords = target->cur - target->start;
-
-   assert(dwords > 0);
+   unsigned count = fd_ringbuffer_cmd_count(target);
 
/* for debug after a lock up, write a unique counter value
 * to scratch6 for each IB, to make it easier to match up
@@ -283,9 +283,14 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct 
fd_ringbuffer *target
 */
emit_marker(ring, 6);
 
-   OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : 
CP_INDIRECT_BUFFER_PFD, 2);
-   fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
-   OUT_RING(ring, dwords);
+   for (unsigned i = 0; i < count; i++) {
+   uint32_t dwords;
+   OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : 
CP_INDIRECT_BUFFER_PFD,

[Mesa-dev] [PATCH 05/12] freedreno: push resource tracking down into batch

2016-07-02 Thread Rob Clark

From: Rob Clark <robcl...@freedesktop.org>

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_batch.c| 35 ++
 src/gallium/drivers/freedreno/freedreno_batch.h|  7 +
 src/gallium/drivers/freedreno/freedreno_context.c  | 11 ---
 src/gallium/drivers/freedreno/freedreno_context.h  |  3 --
 src/gallium/drivers/freedreno/freedreno_draw.c | 31 ---
 src/gallium/drivers/freedreno/freedreno_resource.c |  3 +-
 src/gallium/drivers/freedreno/freedreno_resource.h |  3 +-
 7 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c 
b/src/gallium/drivers/freedreno/freedreno_batch.c
index c202ff0..51a61d9 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -24,10 +24,12 @@
  *    Rob Clark <robcl...@freedesktop.org>
  */
 
+#include "util/list.h"
 #include "util/u_string.h"
 
 #include "freedreno_batch.h"
 #include "freedreno_context.h"
+#include "freedreno_resource.h"
 
 struct fd_batch *
 fd_batch_create(struct fd_context *ctx)
@@ -54,6 +56,8 @@ fd_batch_create(struct fd_context *ctx)
fd_ringbuffer_set_parent(batch->draw, batch->gmem);
fd_ringbuffer_set_parent(batch->binning, batch->gmem);
 
+   list_inithead(>used_resources);
+
return batch;
 }
 
@@ -76,7 +80,38 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch)
 void
 fd_batch_flush(struct fd_batch *batch)
 {
+   struct fd_resource *rsc, *rsc_tmp;
+
fd_gmem_render_tiles(batch->ctx);
+
+   /* go through all the used resources and clear their reading flag */
+   LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, >used_resources, list) {
+   debug_assert(rsc->pending_batch == batch);
+   debug_assert(rsc->status != 0);
+   rsc->status = 0;
+   fd_batch_reference(>pending_batch, NULL);
+   list_delinit(>list);
+   }
+
+   assert(LIST_IS_EMPTY(>used_resources));
+}
+
+void
+fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
+   enum fd_resource_status status)
+{
+   rsc->status |= status;
+
+   if (rsc->stencil)
+   rsc->stencil->status |= status;
+
+   /* TODO resources can actually be shared across contexts,
+* so I'm not sure a single list-head will do the trick?
+*/
+   debug_assert((rsc->pending_batch == batch) || !rsc->pending_batch);
+   list_delinit(>list);
+   list_addtail(>list, >used_resources);
+   fd_batch_reference(>pending_batch, batch);
 }
 
 void
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h 
b/src/gallium/drivers/freedreno/freedreno_batch.h
index 2134624..69779d8 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -32,6 +32,8 @@
 #include "freedreno_util.h"
 
 struct fd_context;
+struct fd_resource;
+enum fd_resource_status;
 
 /* A batch tracks everything about a cmdstream batch/submit, including the
  * ringbuffers used for binning, draw, and gmem cmds, list of associated
@@ -48,11 +50,16 @@ struct fd_batch {
struct fd_ringbuffer *binning;
/** tiling/gmem (IB0) cmdstream: */
struct fd_ringbuffer *gmem;
+
+   /** list of resources used by currently-unsubmitted batch */
+   struct list_head used_resources;
 };
 
 struct fd_batch * fd_batch_create(struct fd_context *ctx);
 
 void fd_batch_flush(struct fd_batch *batch);
+void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
+   enum fd_resource_status status);
 void fd_batch_check_size(struct fd_batch *batch);
 
 /* not called directly: */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c 
b/src/gallium/drivers/freedreno/freedreno_context.c
index 70af52e..2c193e4 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -45,7 +45,6 @@ void
 fd_context_render(struct pipe_context *pctx)
 {
struct fd_context *ctx = fd_context(pctx);
-   struct fd_resource *rsc, *rsc_tmp;
 
DBG("needs_flush: %d", ctx->needs_flush);
 
@@ -61,16 +60,6 @@ fd_context_render(struct pipe_context *pctx)
ctx->cleared = ctx->partial_cleared = ctx->restore = ctx->resolve = 0;
ctx->gmem_reason = 0;
ctx->num_draws = 0;
-
-   /* go through all the used resources and clear their reading flag */
-   LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, >used_resources, list) {
-   debug_assert(rsc->status != 0);
-   rsc->status = 0;
-   rsc->pending_ctx = NULL;
-   list_delinit(>list);
-   }
-
-   assert(LIST_IS_EMPTY(>use

[Mesa-dev] [PATCH 03/12] list: fix list_replace() for empty lists

2016-07-02 Thread Rob Clark

Before, it would happily copy list_head next/prev (ie. pointer to the
*from* list_head), leaving things in a confused state and causing much
mayhem.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/util/list.h | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/util/list.h b/src/util/list.h
index f0dec5d..e8a99ac 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -71,12 +71,18 @@ static inline void list_addtail(struct list_head *item, 
struct list_head *list)
 list->prev = item;
 }
 
+static inline bool list_empty(struct list_head *list);
+
 static inline void list_replace(struct list_head *from, struct list_head *to)
 {
-to->prev = from->prev;
-to->next = from->next;
-from->next->prev = to;
-from->prev->next = to;
+if (list_empty(from)) {
+list_inithead(to);
+} else {
+to->prev = from->prev;
+to->next = from->next;
+from->next->prev = to;
+from->prev->next = to;
+}
 }
 
 static inline void list_del(struct list_head *item)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/12] gallium: un-inline pipe_surface_desc

2016-07-02 Thread Rob Clark

Want to re-use this struct, so un-inline it.

Signed-off-by: Rob Clark <robdcl...@gmail.com>
---
 src/gallium/include/pipe/p_state.h | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index f4bee38..ad7bf0f 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -387,6 +387,17 @@ struct pipe_sampler_state
union pipe_color_union border_color;
 };
 
+union pipe_surface_desc {
+   struct {
+  unsigned level;
+  unsigned first_layer:16;
+  unsigned last_layer:16;
+   } tex;
+   struct {
+  unsigned first_element;
+  unsigned last_element;
+   } buf;
+};
 
 /**
  * A view into a texture that can be bound to a color render target /
@@ -405,17 +416,7 @@ struct pipe_surface
 
unsigned writable:1;  /**< writable shader resource */
 
-   union {
-  struct {
- unsigned level;
- unsigned first_layer:16;
- unsigned last_layer:16;
-  } tex;
-  struct {
- unsigned first_element;
- unsigned last_element;
-  } buf;
-   } u;
+   union pipe_surface_desc u;
 };
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/12] gallium/util: make util_copy_framebuffer_state(src=NULL) work

2016-07-02 Thread Rob Clark

From: Rob Clark <robcl...@freedesktop.org>

Be more consistent with the other u_inlines util_copy_xyz_state()
helpers and support NULL src.

Signed-off-by: Rob Clark <robcl...@freedesktop.org>
---
 src/gallium/auxiliary/util/u_framebuffer.c | 37 +-
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_framebuffer.c 
b/src/gallium/auxiliary/util/u_framebuffer.c
index f2108a1..c2948a5 100644
--- a/src/gallium/auxiliary/util/u_framebuffer.c
+++ b/src/gallium/auxiliary/util/u_framebuffer.c
@@ -86,22 +86,37 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state 
*dst,
 {
unsigned i;
 
-   dst->width = src->width;
-   dst->height = src->height;
+   if (src) {
+  dst->width = src->width;
+  dst->height = src->height;
 
-   dst->samples = src->samples;
-   dst->layers  = src->layers;
+  dst->samples = src->samples;
+  dst->layers  = src->layers;
 
-   for (i = 0; i < src->nr_cbufs; i++)
-  pipe_surface_reference(>cbufs[i], src->cbufs[i]);
+  for (i = 0; i < src->nr_cbufs; i++)
+ pipe_surface_reference(>cbufs[i], src->cbufs[i]);
 
-   /* Set remaining dest cbuf pointers to NULL */
-   for ( ; i < ARRAY_SIZE(dst->cbufs); i++)
-  pipe_surface_reference(>cbufs[i], NULL);
+  /* Set remaining dest cbuf pointers to NULL */
+  for ( ; i < ARRAY_SIZE(dst->cbufs); i++)
+ pipe_surface_reference(>cbufs[i], NULL);
 
-   dst->nr_cbufs = src->nr_cbufs;
+  dst->nr_cbufs = src->nr_cbufs;
 
-   pipe_surface_reference(>zsbuf, src->zsbuf);
+  pipe_surface_reference(>zsbuf, src->zsbuf);
+   } else {
+  dst->width = 0;
+  dst->height = 0;
+
+  dst->samples = 0;
+  dst->layers  = 0;
+
+  for (i = 0 ; i < ARRAY_SIZE(dst->cbufs); i++)
+ pipe_surface_reference(>cbufs[i], NULL);
+
+  dst->nr_cbufs = 0;
+
+  pipe_surface_reference(>zsbuf, NULL);
+   }
 }
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 00/12] render reordering for optimized tile buffer usage

2016-07-02 Thread Rob Clark


med:
  normal:  batch_sysmem=0,batch_gmem=18055, batch_restore=3748
  reorder: batch_sysmem=2220, batch_gmem=14483, batch_restore=174
  (10510 frames)

high:
  normal:  batch_sysmem=63072, batch_gmem=62692, batch_restore=48384
  reorder: batch_sysmem=65429, batch_gmem=58284, batch_restore=43971
  (10510 frames)

ultra:
  normal:  batch_sysmem=63072, batch_gmem=81318, batch_restore=66863
  reorder: batch_sysmem=65869, batch_gmem=71360, batch_restore=56939
  (10510 frames)

So in all cases a nice drop in tile passes (batch_gmem) and reduction
in number of times we need to move back from system memory to tile
buffer (batch_restore).  High/ultra still has a lot of restore's per
frame, so maybe there is still some room for improvement.  Not sure
yet if it is the same sort of thing going on as supertuxkart.

I would expect to see some gains in manhattan and possibly trex, but
unfortunately it is mostly using compressed textures that util_blitter
cannot blit, so the resource shadowing back-blit ends up on the CPU
(which ends up flushing previous mipmap generation and stalling, which
kind of defeats the purpose).  I'm not entirely sure what to do here.
Since we don't need scaling/filtering/etc we could map things to a
different format which can be rendered to, but I think we end up
needing to also lie about the width/height.  Which works ok for fb
state (we take w/h from the pipe_surface, not the pipe_resource).  But
not on the src (tex state) side.  Possibly we could add w/h to
pipe_sampler_view to solve this?  Solving this should at least bring
about +15% in manhattan, and maybe a bit in trex.


At any rate, the freedreno bits end up depending on some libdrm
patches[1] which in turn depend on some kernel stuff I have queued up
for 4.8.  So it will be some time before it lands.  But I'd like to
get the first three patches reviewed and pushed.  And suggestions
about the remaining issues welcome, since there is still some room
for further gains.

[1] https://github.com/freedreno/libdrm/commits/fd-next

Rob Clark (12):
  gallium/util: make util_copy_framebuffer_state(src=NULL) work
  gallium: un-inline pipe_surface_desc
  list: fix list_replace() for empty lists
  freedreno: introduce fd_batch
  freedreno: push resource tracking down into batch
  freedreno: dynamically sized/growable cmd buffers
  freedreno: move more batch related tracking to fd_batch
  freedreno: add batch-cache
  freedreno: batch re-ordering support
  freedreno: spiff up some debug traces
  freedreno: shadow textures if possible to avoid stall/flush
  freedreno: support discarding previous rendering in special cases

 src/gallium/auxiliary/util/u_framebuffer.c |  37 ++-
 src/gallium/drivers/freedreno/Makefile.sources |   4 +
 src/gallium/drivers/freedreno/a2xx/fd2_draw.c  |  12 +-
 src/gallium/drivers/freedreno/a2xx/fd2_emit.c  |  15 +-
 src/gallium/drivers/freedreno/a2xx/fd2_gmem.c  |  63 ++---
 src/gallium/drivers/freedreno/a3xx/fd3_context.c   |   4 -
 src/gallium/drivers/freedreno/a3xx/fd3_context.h   |   5 -
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c  |  23 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c  |  23 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.h  |   2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c  | 146 +--
 src/gallium/drivers/freedreno/a4xx/fd4_draw.c  |  41 +--
 src/gallium/drivers/freedreno/a4xx/fd4_draw.h  |  13 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c  |  24 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.h  |   2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c  | 122 -
 src/gallium/drivers/freedreno/freedreno_batch.c| 280 +
 src/gallium/drivers/freedreno/freedreno_batch.h| 152 +++
 .../drivers/freedreno/freedreno_batch_cache.c  | 246 ++
 .../drivers/freedreno/freedreno_batch_cache.h  |  51 
 src/gallium/drivers/freedreno/freedreno_context.c  | 131 ++
 src/gallium/drivers/freedreno/freedreno_context.h  | 123 ++---
 src/gallium/drivers/freedreno/freedreno_draw.c | 132 +-
 src/gallium/drivers/freedreno/freedreno_draw.h |  15 +-
 src/gallium/drivers/freedreno/freedreno_gmem.c | 110 
 src/gallium/drivers/freedreno/freedreno_gmem.h |   6 +-
 src/gallium/drivers/freedreno/freedreno_query_hw.c |   8 +-
 src/gallium/drivers/freedreno/freedreno_resource.c | 242 --
 src/gallium/drivers/freedreno/freedreno_resource.h |  10 +-
 src/gallium/drivers/freedreno/freedreno_screen.c   |   9 +
 src/gallium/drivers/freedreno/freedreno_screen.h   |   2 +
 src/gallium/drivers/freedreno/freedreno_state.c|  19 +-
 src/gallium/drivers/freedreno/freedreno_util.h |  43 ++--
 src/gallium/include/pipe/p_state.h |  23 +-
 src/util/list.h|  14 +-
 35 files changed, 1486 insertions(+), 666 deletions(-)
 create mode 100644 src/gallium/drivers/freedreno/freedreno_batch.c
 cre

Re: [Mesa-dev] [PATCH v3] mesa/st: Use 'struct nir_shader' instead of 'nir_shader'.

2016-06-30 Thread Rob Clark

On Wed, Jun 29, 2016 at 11:40 PM, Vinson Lee <v...@freedesktop.org> wrote:
> Fix this build error with GCC 4.4.
>
>   CC state_tracker/st_nir_lower_builtin.lo
> In file included from state_tracker/st_nir_lower_builtin.c:61:
> state_tracker/st_nir.h:34: error: redefinition of typedef ‘nir_shader’
> ../../src/compiler/nir/nir.h:1830: note: previous declaration of ‘nir_shader’ 
> was here
>
> Suggested-by: Rob Clark <robcl...@freedesktop.org>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96235
> Signed-off-by: Vinson Lee <v...@freedesktop.org>
> Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net>

Thanks,

Reviewed-by: Rob Clark <robdcl...@gmail.com>

> ---
> v3 - Removed st_nir_lower_builtin.c changes.
>
>  src/mesa/state_tracker/st_nir.h |   12 ++--
>  1 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_nir.h b/src/mesa/state_tracker/st_nir.h
> index 49ba573..4f3c606 100644
> --- a/src/mesa/state_tracker/st_nir.h
> +++ b/src/mesa/state_tracker/st_nir.h
> @@ -31,14 +31,14 @@
>  extern "C" {
>  #endif
>
> -typedef struct nir_shader nir_shader;
> +struct nir_shader;
>
> -void st_nir_lower_builtin(nir_shader *shader);
> -nir_shader * st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
> -struct gl_shader_program *shader_program,
> -gl_shader_stage stage);
> +void st_nir_lower_builtin(struct nir_shader *shader);
> +struct nir_shader * st_glsl_to_nir(struct st_context *st, struct gl_program 
> *prog,
> +  struct gl_shader_program *shader_program,
> +  gl_shader_stage stage);
>
> -void st_finalize_nir(struct st_context *st, struct gl_program *prog, 
> nir_shader *nir);
> +void st_finalize_nir(struct st_context *st, struct gl_program *prog, struct 
> nir_shader *nir);
>
>  struct gl_program *
>  st_nir_get_mesa_program(struct gl_context *ctx,
> --
> 1.7.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] glsl: add driconf to zero-init unintialized vars

2016-06-29 Thread Rob Clark

On Wed, Jun 29, 2016 at 12:43 AM, Eirik Byrkjeflot Anonsen
<ei...@eirikba.org> wrote:
> Rob Clark <robdcl...@gmail.com> writes:
>
>> On Tue, Jun 28, 2016 at 11:28 AM, Marek Olšák <mar...@gmail.com> wrote:
>>> On Mon, Jun 27, 2016 at 9:28 PM, Rob Clark <robdcl...@gmail.com> wrote:
>>>> On Mon, Jun 27, 2016 at 3:06 PM, Kenneth Graunke <kenn...@whitecape.org> 
>>>> wrote:
>>>>> On Monday, June 27, 2016 11:43:28 AM PDT Matt Turner wrote:
>>>>>> On Mon, Jun 27, 2016 at 4:44 AM, Rob Clark <robdcl...@gmail.com> wrote:
>>>>>> > On Mon, Jun 27, 2016 at 7:13 AM, Alan Swanson 
>>>>>> > <rei...@improbability.net> wrote:
>>>>>> >> On 2016-06-25 13:37, Rob Clark wrote:
>>>>>> >>>
>>>>>> >>> Some games are sloppy.. perhaps because it is defined behavior for 
>>>>>> >>> DX or
>>>>>> >>> perhaps because nv blob driver defaults things to zero.
>>>>>> >>>
>>>>>> >>> So add driconf param to force uninitialized variables to default to 
>>>>>> >>> zero.
>>>>>> >>>
>>>>>> >>> This issue was observed with rust, from steam store.  But has 
>>>>>> >>> surfaced
>>>>>> >>> elsewhere in the past.
>>>>>> >>>
>>>>>> >>> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
>>>>>> >>> ---
>>>>>> >>> Note that I left out the drirc bit, since not entirely sure how to
>>>>>> >>> identify this game.  (I don't actually have the game, just working 
>>>>>> >>> off
>>>>>> >>> of an apitrace)
>>>>>> >>>
>>>>>> >>> Possibly worth mentioning that for the shaders using uninitialized 
>>>>>> >>> vars
>>>>>> >>> having zero-initializers lets constant-propagation get rid of a whole
>>>>>> >>> lot of instructions.  One shader I saw dropped to less than half of
>>>>>> >>> it's original instruction count.
>>>>>> >>
>>>>>> >>
>>>>>> >> If the default for uninitialised variables is undefined, then with the
>>>>>> >> reported shader optimisations why bother with the (DRI) option when
>>>>>> >> zeroing could still essentially be classed as undefined?
>>>>>> >>
>>>>>> >> Cuts the patch down to just the src/compiler/glsl/ast_to_hir.cpp 
>>>>>> >> change.
>>>>>> >
>>>>>> > I did suggest that on #dri-devel, but Jason had a theoretical example
>>>>>> > where it would hurt.. iirc something like:
>>>>>> >
>>>>>> >   float maybe_undef;
>>>>>> >   for (int i = 0; i < some_uniform_at_least_one; i++)
>>>>>> >  maybe_undef = ...
>>>>>> >
>>>>>> > also, he didn't want to hide shader bugs that app should fix.
>>>>>> >
>>>>>> > It would be interesting to rush shaderdb w/ glsl_zero_init=true and
>>>>>> > see what happens, but I didn't get around to that yet.
>>>>>>
>>>>>> Here's what I get on i965. It's not a clear win.
>>>>>>
>>>>>> total instructions in shared programs: 5249030 -> 5249002 (-0.00%)
>>>>>> instructions in affected programs: 28936 -> 28908 (-0.10%)
>>>>>> helped: 66
>>>>>> HURT: 132
>>>>>>
>>>>>> total cycles in shared programs: 57966694 -> 57956306 (-0.02%)
>>>>>> cycles in affected programs: 1136118 -> 1125730 (-0.91%)
>>>>>> helped: 78
>>>>>> HURT: 106
>>>>>
>>>>> I suspect most of the help is because we're missing undef optimizations,
>>>>> such as CSE...while zero could be CSE'd.  (I have a patch, but it hurts
>>>>> things too...)
>>>>
>>>> right, I was thinking that treating undef as zero in constant-folding
>>>> would have the same effect.. ofc it might make shader bugs less
>>>> obvious.
>>>>
>>>> Btw, does anyone know what fglrx does?  Afaiu nv blob treats undef as
>>>> zero.  If fglrx does the same, I suppose that strengthens the argument
>>>> for "just do this unconditionally".
>>>
>>> No idea what fglrx does, but LLVM does eliminate code with undefined
>>> inputs. Initializing everything to 0 might make that worse.
>>
>> hmm, treating as zero does eliminate a lot.. anyway, I guess we'll
>> stick w/ driconf.
>>
>> fwiw, with some help from the reporter, we figured out that this is
>> the bit that I need to squash into drirc:
>>
>> 
>> 
>> 
>
> Not knowing a lot about drirc, I suspect you should have a double quote
> at the end of glsl_zero_init as well?

yup, that was a typo

BR,
-R

> eirik
>
>> now, if I could talk somebody into a r-b for this and the i965 fix? ;-)
>>
>> BR,
>> -R
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] glsl: add driconf to zero-init unintialized vars

2016-06-28 Thread Rob Clark

On Tue, Jun 28, 2016 at 11:28 AM, Marek Olšák <mar...@gmail.com> wrote:
> On Mon, Jun 27, 2016 at 9:28 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> On Mon, Jun 27, 2016 at 3:06 PM, Kenneth Graunke <kenn...@whitecape.org> 
>> wrote:
>>> On Monday, June 27, 2016 11:43:28 AM PDT Matt Turner wrote:
>>>> On Mon, Jun 27, 2016 at 4:44 AM, Rob Clark <robdcl...@gmail.com> wrote:
>>>> > On Mon, Jun 27, 2016 at 7:13 AM, Alan Swanson <rei...@improbability.net> 
>>>> > wrote:
>>>> >> On 2016-06-25 13:37, Rob Clark wrote:
>>>> >>>
>>>> >>> Some games are sloppy.. perhaps because it is defined behavior for DX 
>>>> >>> or
>>>> >>> perhaps because nv blob driver defaults things to zero.
>>>> >>>
>>>> >>> So add driconf param to force uninitialized variables to default to 
>>>> >>> zero.
>>>> >>>
>>>> >>> This issue was observed with rust, from steam store.  But has surfaced
>>>> >>> elsewhere in the past.
>>>> >>>
>>>> >>> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
>>>> >>> ---
>>>> >>> Note that I left out the drirc bit, since not entirely sure how to
>>>> >>> identify this game.  (I don't actually have the game, just working off
>>>> >>> of an apitrace)
>>>> >>>
>>>> >>> Possibly worth mentioning that for the shaders using uninitialized vars
>>>> >>> having zero-initializers lets constant-propagation get rid of a whole
>>>> >>> lot of instructions.  One shader I saw dropped to less than half of
>>>> >>> it's original instruction count.
>>>> >>
>>>> >>
>>>> >> If the default for uninitialised variables is undefined, then with the
>>>> >> reported shader optimisations why bother with the (DRI) option when
>>>> >> zeroing could still essentially be classed as undefined?
>>>> >>
>>>> >> Cuts the patch down to just the src/compiler/glsl/ast_to_hir.cpp change.
>>>> >
>>>> > I did suggest that on #dri-devel, but Jason had a theoretical example
>>>> > where it would hurt.. iirc something like:
>>>> >
>>>> >   float maybe_undef;
>>>> >   for (int i = 0; i < some_uniform_at_least_one; i++)
>>>> >  maybe_undef = ...
>>>> >
>>>> > also, he didn't want to hide shader bugs that app should fix.
>>>> >
>>>> > It would be interesting to rush shaderdb w/ glsl_zero_init=true and
>>>> > see what happens, but I didn't get around to that yet.
>>>>
>>>> Here's what I get on i965. It's not a clear win.
>>>>
>>>> total instructions in shared programs: 5249030 -> 5249002 (-0.00%)
>>>> instructions in affected programs: 28936 -> 28908 (-0.10%)
>>>> helped: 66
>>>> HURT: 132
>>>>
>>>> total cycles in shared programs: 57966694 -> 57956306 (-0.02%)
>>>> cycles in affected programs: 1136118 -> 1125730 (-0.91%)
>>>> helped: 78
>>>> HURT: 106
>>>
>>> I suspect most of the help is because we're missing undef optimizations,
>>> such as CSE...while zero could be CSE'd.  (I have a patch, but it hurts
>>> things too...)
>>
>> right, I was thinking that treating undef as zero in constant-folding
>> would have the same effect.. ofc it might make shader bugs less
>> obvious.
>>
>> Btw, does anyone know what fglrx does?  Afaiu nv blob treats undef as
>> zero.  If fglrx does the same, I suppose that strengthens the argument
>> for "just do this unconditionally".
>
> No idea what fglrx does, but LLVM does eliminate code with undefined
> inputs. Initializing everything to 0 might make that worse.

hmm, treating as zero does eliminate a lot.. anyway, I guess we'll
stick w/ driconf.

fwiw, with some help from the reporter, we figured out that this is
the bit that I need to squash into drirc:





now, if I could talk somebody into a r-b for this and the i965 fix? ;-)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/st: Include nir.h for nir_shader symbol.

2016-06-28 Thread Rob Clark

On Mon, Jun 27, 2016 at 10:08 PM, Matt Turner  wrote:
> On Mon, Jun 27, 2016 at 6:45 PM, Vinson Lee  wrote:
>> Fix this build error with GCC 4.4.
>>
>>   CC state_tracker/st_nir_lower_builtin.lo
>> In file included from state_tracker/st_nir_lower_builtin.c:61:
>> state_tracker/st_nir.h:34: error: redefinition of typedef ‘nir_shader’
>> ../../src/compiler/nir/nir.h:1830: note: previous declaration of 
>> ‘nir_shader’ was here
>
> This error seems to imply that nir.h is already being included somehow.
>
> Does just removing the typedef solve the problem? Can we figure out
> how nir.h is already being included and remove that?

nir.h is coming from st_nir_lower_builtin.c which #includes st_nir.h..

Perhaps the thing to do is drop the typedef, and just fwd declare
'struct nir_shader', and use 'struct nir_shader' instead of
'nir_shader' in st_nir.h

Already half of the world gets recompiled when you touch nir.h, and
I'd rather not make that worse..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] glsl: add driconf to zero-init unintialized vars

2016-06-27 Thread Rob Clark

On Mon, Jun 27, 2016 at 3:06 PM, Kenneth Graunke <kenn...@whitecape.org> wrote:
> On Monday, June 27, 2016 11:43:28 AM PDT Matt Turner wrote:
>> On Mon, Jun 27, 2016 at 4:44 AM, Rob Clark <robdcl...@gmail.com> wrote:
>> > On Mon, Jun 27, 2016 at 7:13 AM, Alan Swanson <rei...@improbability.net> 
>> > wrote:
>> >> On 2016-06-25 13:37, Rob Clark wrote:
>> >>>
>> >>> Some games are sloppy.. perhaps because it is defined behavior for DX or
>> >>> perhaps because nv blob driver defaults things to zero.
>> >>>
>> >>> So add driconf param to force uninitialized variables to default to zero.
>> >>>
>> >>> This issue was observed with rust, from steam store.  But has surfaced
>> >>> elsewhere in the past.
>> >>>
>> >>> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
>> >>> ---
>> >>> Note that I left out the drirc bit, since not entirely sure how to
>> >>> identify this game.  (I don't actually have the game, just working off
>> >>> of an apitrace)
>> >>>
>> >>> Possibly worth mentioning that for the shaders using uninitialized vars
>> >>> having zero-initializers lets constant-propagation get rid of a whole
>> >>> lot of instructions.  One shader I saw dropped to less than half of
>> >>> it's original instruction count.
>> >>
>> >>
>> >> If the default for uninitialised variables is undefined, then with the
>> >> reported shader optimisations why bother with the (DRI) option when
>> >> zeroing could still essentially be classed as undefined?
>> >>
>> >> Cuts the patch down to just the src/compiler/glsl/ast_to_hir.cpp change.
>> >
>> > I did suggest that on #dri-devel, but Jason had a theoretical example
>> > where it would hurt.. iirc something like:
>> >
>> >   float maybe_undef;
>> >   for (int i = 0; i < some_uniform_at_least_one; i++)
>> >  maybe_undef = ...
>> >
>> > also, he didn't want to hide shader bugs that app should fix.
>> >
>> > It would be interesting to rush shaderdb w/ glsl_zero_init=true and
>> > see what happens, but I didn't get around to that yet.
>>
>> Here's what I get on i965. It's not a clear win.
>>
>> total instructions in shared programs: 5249030 -> 5249002 (-0.00%)
>> instructions in affected programs: 28936 -> 28908 (-0.10%)
>> helped: 66
>> HURT: 132
>>
>> total cycles in shared programs: 57966694 -> 57956306 (-0.02%)
>> cycles in affected programs: 1136118 -> 1125730 (-0.91%)
>> helped: 78
>> HURT: 106
>
> I suspect most of the help is because we're missing undef optimizations,
> such as CSE...while zero could be CSE'd.  (I have a patch, but it hurts
> things too...)

right, I was thinking that treating undef as zero in constant-folding
would have the same effect.. ofc it might make shader bugs less
obvious.

Btw, does anyone know what fglrx does?  Afaiu nv blob treats undef as
zero.  If fglrx does the same, I suppose that strengthens the argument
for "just do this unconditionally".

(but I'm still leaning towards "make it conditional" so far..)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] glsl: add driconf to zero-init unintialized vars

2016-06-27 Thread Rob Clark

On Mon, Jun 27, 2016 at 7:13 AM, Alan Swanson <rei...@improbability.net> wrote:
> On 2016-06-25 13:37, Rob Clark wrote:
>>
>> Some games are sloppy.. perhaps because it is defined behavior for DX or
>> perhaps because nv blob driver defaults things to zero.
>>
>> So add driconf param to force uninitialized variables to default to zero.
>>
>> This issue was observed with rust, from steam store.  But has surfaced
>> elsewhere in the past.
>>
>> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
>> ---
>> Note that I left out the drirc bit, since not entirely sure how to
>> identify this game.  (I don't actually have the game, just working off
>> of an apitrace)
>>
>> Possibly worth mentioning that for the shaders using uninitialized vars
>> having zero-initializers lets constant-propagation get rid of a whole
>> lot of instructions.  One shader I saw dropped to less than half of
>> it's original instruction count.
>
>
> If the default for uninitialised variables is undefined, then with the
> reported shader optimisations why bother with the (DRI) option when
> zeroing could still essentially be classed as undefined?
>
> Cuts the patch down to just the src/compiler/glsl/ast_to_hir.cpp change.

I did suggest that on #dri-devel, but Jason had a theoretical example
where it would hurt.. iirc something like:

  float maybe_undef;
  for (int i = 0; i < some_uniform_at_least_one; i++)
 maybe_undef = ...

also, he didn't want to hide shader bugs that app should fix.

It would be interesting to rush shaderdb w/ glsl_zero_init=true and
see what happens, but I didn't get around to that yet.

I did have the idea to try and only inject the initializer when the
compiler warns about uninitialized vars, but in the presence of flow
control there seem like some cases that glsl doesn't catch.

BR,
-R


> --
> Alan.
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vc4: add hash table look-up for exported dmabufs

2016-06-26 Thread Rob Clark

On Sat, Jun 25, 2016 at 11:33 PM, Eric Anholt  wrote:
> Rob Herring  writes:
>
>> It is necessary to reuse existing BOs when dmabufs are imported. There
>> are 2 cases that need to be handled. dmabufs can be created/exported and
>> imported by the same process and can be imported multiple times.
>> Copying other drivers, add a hash table to track exported BOs so the
>> BOs get reused.
>>
>> Cc: Eric Anholt 
>> Signed-off-by: Rob Herring 
>> ---
>> With this and the fd hashing to get a single screen, the flickery screen
>> is gone and Android is somewhat working. Several apps though hang, don't
>> render, and then exit. I also see CMA allocation errors, but not
>> correlating to the app problems.
>>
>> Also, flink names need similar hash table look-up as well. Maybe that's
>> a don't care for vc4? In any case, I don't have the setup to test that.
>>
>> Rob
>>
>>  src/gallium/drivers/vc4/vc4_bufmgr.c | 20 +++-
>>  src/gallium/drivers/vc4/vc4_bufmgr.h | 12 +++-
>>  src/gallium/drivers/vc4/vc4_screen.c | 15 +++
>>  src/gallium/drivers/vc4/vc4_screen.h |  3 +++
>>  4 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c 
>> b/src/gallium/drivers/vc4/vc4_bufmgr.c
>> index 21e3bde..d91157b 100644
>> --- a/src/gallium/drivers/vc4/vc4_bufmgr.c
>> +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
>> @@ -28,6 +28,7 @@
>>  #include 
>>  #include 
>>
>> +#include "util/u_hash_table.h"
>>  #include "util/u_memory.h"
>>  #include "util/ralloc.h"
>>
>> @@ -329,10 +330,19 @@ vc4_bo_open_handle(struct vc4_screen *screen,
>> uint32_t winsys_stride,
>> uint32_t handle, uint32_t size)
>>  {
>> -struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
>> +struct vc4_bo *bo;
>>
>>  assert(size);
>>
>> +pipe_mutex_lock(screen->bo_handles_mutex);
>> +
>> +bo = util_hash_table_get(screen->bo_handles, 
>> (void*)(uintptr_t)handle);
>> +if (bo) {
>> +pipe_reference(NULL, >reference);
>> +goto done;
>> +}
>> +
>> +bo = CALLOC_STRUCT(vc4_bo);
>>  pipe_reference_init(>reference, 1);
>>  bo->screen = screen;
>>  bo->handle = handle;
>> @@ -347,6 +357,10 @@ vc4_bo_open_handle(struct vc4_screen *screen,
>>  bo->map = malloc(bo->size);
>>  #endif
>>
>> +util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, 
>> bo);
>> +
>> +done:
>> +pipe_mutex_unlock(screen->bo_handles_mutex);
>>  return bo;
>>  }
>>
>> @@ -401,6 +415,10 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
>>  }
>>  bo->private = false;
>>
>> +pipe_mutex_lock(bo->screen->bo_handles_mutex);
>> +util_hash_table_set(bo->screen->bo_handles, (void 
>> *)(uintptr_t)bo->handle, bo);
>> +pipe_mutex_unlock(bo->screen->bo_handles_mutex);
>> +
>>  return fd;
>>  }
>>
>> diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h 
>> b/src/gallium/drivers/vc4/vc4_bufmgr.h
>> index b77506e..0896b30 100644
>> --- a/src/gallium/drivers/vc4/vc4_bufmgr.h
>> +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
>> @@ -25,6 +25,7 @@
>>  #define VC4_BUFMGR_H
>>
>>  #include 
>> +#include "util/u_hash_table.h"
>>  #include "util/u_inlines.h"
>>  #include "vc4_qir.h"
>>
>> @@ -87,11 +88,20 @@ vc4_bo_reference(struct vc4_bo *bo)
>>  static inline void
>>  vc4_bo_unreference(struct vc4_bo **bo)
>>  {
>> +struct vc4_screen *screen;
>>  if (!*bo)
>>  return;
>>
>> -if (pipe_reference(&(*bo)->reference, NULL))
>> +screen = (*bo)->screen;
>> +pipe_mutex_lock(screen->bo_handles_mutex);
>> +
>> +if (pipe_reference(&(*bo)->reference, NULL)) {
>>  vc4_bo_last_unreference(*bo);
>> +util_hash_table_remove(screen->bo_handles,
>> +   (void *)(uintptr_t)(*bo)->handle);
>
> I think you're use-after-freeing bo here.  Just stick it before
> last_unreference()?
>
>> +}
>> +
>> +pipe_mutex_unlock(screen->bo_handles_mutex);
>>  *bo = NULL;
>>  }
>
> Taking a mutex on every unref sucks -- it's a *really* hot path, and it
> kind of defeats the point of doing these pipe_reference atomics.  We
> should be able to skip the mutex in the bo->private case, since any
> flinked/dmabuf BO will be !private.  Think you could give that a shot?

or just move the mutex inside the if (pipe_reference(...)).. so you
only take it on final unref?

BR,
-R


> Thanks for debugging!  Note, I'm still on vacation, so I'll be slow at
> replying.
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [PATCH] nir: Fix copy_prop_src when src is an indirect access on a reg.

2016-06-26 Thread Rob Clark

On Sat, Jun 25, 2016 at 8:54 PM, Eric Anholt <e...@anholt.net> wrote:
> The intent was to continue down the indirect chain, not to call ourselves
> with unchanged input arguments.  Found by code inspection, and comparison
> to copy_prop_alu_src().
>
> We haven't hit this because callers of NIR's copy prop are doing so in
> SSA, before indirect variable dereferences have been lowered to registers.

Reviewed-by: Rob Clark <robdcl...@gmail.com>

> ---
>  src/compiler/nir/nir_opt_copy_propagate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_opt_copy_propagate.c 
> b/src/compiler/nir/nir_opt_copy_propagate.c
> index adca7fa6eff2..c26e07fda712 100644
> --- a/src/compiler/nir/nir_opt_copy_propagate.c
> +++ b/src/compiler/nir/nir_opt_copy_propagate.c
> @@ -103,7 +103,7 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, 
> nir_if *parent_if)
>  {
> if (!src->is_ssa) {
>if (src->reg.indirect)
> - return copy_prop_src(src, parent_instr, parent_if);
> + return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
>return false;
> }
>
> --
> 2.8.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] i965: don't drop const initializers in vector splitting

2016-06-25 Thread Rob Clark

On Sat, Jun 25, 2016 at 10:29 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
>
> On Jun 25, 2016 5:39 AM, "Rob Clark" <robdcl...@gmail.com> wrote:
>>
>> From: Rob Clark <robcl...@freedesktop.org>
>>
>> Not entirely sure how we didn't hit this before, but dropping the const
>> initializer on the floor is obviously not correct.
>>
>> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
>> ---
>> No idea why i965 even still uses this pass, vs nir scalarizing pass.
>> So might want to drop this.  But I figured fixing it first gives
>> something that can be cherry-picked to release branches, so this
>> patch should land before deleting the ir pass.
>>
>>  src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp | 12 
>>  1 file changed, 12 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
>> index 5fe24de..7c58089 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
>> @@ -372,6 +372,18 @@ brw_do_vector_splitting(exec_list *instructions)
>>
>>   ralloc_free(name);
>>
>> + if (entry->var->constant_initializer) {
>> +ir_constant_data data = {0};
>> +assert(entry->var->data.has_initializer);
>> +if (entry->var->type->is_double()) {
>> +   data.d[i] = entry->var->constant_initializer->value.d[i];
>
> I think you want data.d[0] on the l left hands side.
>
>> +} else {
>> +   data.u[i] = entry->var->constant_initializer->value.u[i];
>
> Same here

oh, yes ofc..  I should have known only having a way to test it w/
zero's would bite me :-P

fixed up locally

BR,
-R

>> +}
>> +entry->components[i]->data.has_initializer = true;
>> +entry->components[i]->constant_initializer =
>> new(entry->components[i]) ir_constant(type, );
>> + }
>> +
>>  entry->var->insert_before(entry->components[i]);
>>}
>>
>> --
>> 2.7.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] i965: don't drop const initializers in vector splitting

2016-06-25 Thread Rob Clark

From: Rob Clark <robcl...@freedesktop.org>

Not entirely sure how we didn't hit this before, but dropping the const
initializer on the floor is obviously not correct.

Signed-off-by: Rob Clark <robcl...@freedesktop.org>
---
No idea why i965 even still uses this pass, vs nir scalarizing pass.
So might want to drop this.  But I figured fixing it first gives
something that can be cherry-picked to release branches, so this
patch should land before deleting the ir pass.

 src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 5fe24de..7c58089 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -372,6 +372,18 @@ brw_do_vector_splitting(exec_list *instructions)
 
  ralloc_free(name);
 
+ if (entry->var->constant_initializer) {
+ir_constant_data data = {0};
+assert(entry->var->data.has_initializer);
+if (entry->var->type->is_double()) {
+   data.d[i] = entry->var->constant_initializer->value.d[i];
+} else {
+   data.u[i] = entry->var->constant_initializer->value.u[i];
+}
+entry->components[i]->data.has_initializer = true;
+entry->components[i]->constant_initializer = 
new(entry->components[i]) ir_constant(type, );
+ }
+
 entry->var->insert_before(entry->components[i]);
   }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] glsl: add driconf to zero-init unintialized vars

2016-06-25 Thread Rob Clark

From: Rob Clark <robcl...@freedesktop.org>

Some games are sloppy.. perhaps because it is defined behavior for DX or
perhaps because nv blob driver defaults things to zero.

So add driconf param to force uninitialized variables to default to zero.

This issue was observed with rust, from steam store.  But has surfaced
elsewhere in the past.

Signed-off-by: Rob Clark <robcl...@freedesktop.org>
---
Note that I left out the drirc bit, since not entirely sure how to
identify this game.  (I don't actually have the game, just working off
of an apitrace)

Possibly worth mentioning that for the shaders using uninitialized vars
having zero-initializers lets constant-propagation get rid of a whole
lot of instructions.  One shader I saw dropped to less than half of
it's original instruction count.

Second patch in the series is just fixing an i965 bug that was exposed
by this patch.

 src/compiler/glsl/ast_to_hir.cpp| 9 +
 src/compiler/glsl/glsl_parser_extras.cpp| 1 +
 src/compiler/glsl/glsl_parser_extras.h  | 1 +
 src/gallium/include/state_tracker/st_api.h  | 1 +
 src/gallium/state_trackers/dri/dri_screen.c | 2 ++
 src/mesa/drivers/dri/common/xmlpool/t_options.h | 5 -
 src/mesa/drivers/dri/i965/brw_context.c | 2 ++
 src/mesa/drivers/dri/i965/intel_screen.c| 4 
 src/mesa/main/mtypes.h  | 5 +
 src/mesa/state_tracker/st_extensions.c  | 2 ++
 10 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 0cfce68..d2c284f 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4697,6 +4697,15 @@ ast_declarator_list::hir(exec_list *instructions,
   apply_layout_qualifier_to_variable(>type->qualifier, var, state,
  );
 
+  if ((var->data.mode == ir_var_auto || var->data.mode == ir_var_temporary)
+  && (var->type->base_type >= GLSL_TYPE_UINT)
+  && (var->type->base_type <= GLSL_TYPE_BOOL)
+  && state->zero_init) {
+ const ir_constant_data data = {0};
+ var->data.has_initializer = true;
+ var->constant_initializer = new(var) ir_constant(var->type, );
+  }
+
   if (this->type->qualifier.flags.q.invariant) {
  if (!is_varying_var(var, state->stage)) {
 _mesa_glsl_error(, state,
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 09f7477..fc2859a 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -74,6 +74,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct 
gl_context *_ctx,
/* Set default language version and extensions */
this->language_version = 110;
this->forced_language_version = ctx->Const.ForceGLSLVersion;
+   this->zero_init = ctx->Const.GLSLZeroInit;
this->es_shader = false;
this->ARB_texture_rectangle_enable = true;
 
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 8c43292..669b3d1 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -306,6 +306,7 @@ struct _mesa_glsl_parse_state {
bool es_shader;
unsigned language_version;
unsigned forced_language_version;
+   bool zero_init;
gl_shader_stage stage;
 
/**
diff --git a/src/gallium/include/state_tracker/st_api.h 
b/src/gallium/include/state_tracker/st_api.h
index 41daa47..21d5177 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -242,6 +242,7 @@ struct st_config_options
unsigned force_glsl_version;
boolean force_s3tc_enable;
boolean allow_glsl_extension_directive_midshader;
+   boolean glsl_zero_init;
 };
 
 /**
diff --git a/src/gallium/state_trackers/dri/dri_screen.c 
b/src/gallium/state_trackers/dri/dri_screen.c
index 2ac55c8..b16585a 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -74,6 +74,7 @@ const __DRIconfigOptionsExtension gallium_config_options = {
 
   DRI_CONF_SECTION_MISCELLANEOUS
  DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
+ DRI_CONF_GLSL_ZERO_INIT("false")
   DRI_CONF_SECTION_END
DRI_CONF_END
 };
@@ -98,6 +99,7 @@ dri_fill_st_options(struct st_config_options *options,
   driQueryOptionb(optionCache, "force_s3tc_enable");
options->allow_glsl_extension_directive_midshader =
   driQueryOptionb(optionCache, "allow_glsl_extension_directive_midshader");
+   options->glsl_zero_init = driQueryOptionb(optionCache, "glsl_zero_init");
 }
 
 static const __DRIconfig **
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h 
b/src/mesa/drivers/dri/

Re: [Mesa-dev] [RFC] Coding style scripts (Was Re: [PATCH 1/2] gallium: replace [0-9].f with [0-9].0f)

2016-06-22 Thread Rob Clark

On Wed, Jun 22, 2016 at 8:25 AM, Emil Velikov  wrote:
> Hi All,
>
> Seems like we have a few people are keen on the idea of having some
> form of at least semi-automated way to handle coding style issues.
>
> Some options/ideas:
>  - Combine the emacs .dir-local.el + emacs -batch to do the checking:
> Pros: rules aren't duplicated in multiple places (like the second
> option). Cons: not everyone has emacs
>  - or, check-in a few (as needed) xa-indent style scripts based on indent.
> Pros: indent seems (imho) more widely spread. Cons: the style rules
> are duplicated.
>
> IMHO we don't have to 'enforce' one or the other throughout the tree.
> Having either one would be beneficial, but definitely not a
> requirement.
>
> Once we're happy with that, we could have a simple toplevel
> "check-all-style" script, which can be used by both developers and git
> hooks. And with time patchwork/other solution will be able to
> pre-emptively run these and provide feedback, at which time we'll
> toggle the git hooks to reject 'non-compliant' pushes (or even before
> the pw stuff is in place) ?
>
> There's a couple small catches
>  - I cannot convince emacs to honour .dir-locals.el in batch mode. Any takers 
> ?
>  - We might have a couple of initial "x: unify coding style" commits.
>
> Rob, seems like most (all?) of your mode lines in freedreno are busted
> (missing : after tab-width, typo(s) in tab-width). Considering there's
> a fdno .dir-locals.el are you ok with just nuking them all together ?

no problem

BR,
-R

> Thanks
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] New dma_buf -> EGLImage EGL extension - Final spec published!

2016-06-20 Thread Rob Clark

On Mon, Jun 20, 2016 at 8:37 AM, Pekka Paalanen <ppaala...@gmail.com> wrote:
> On Fri, 17 Jun 2016 11:44:34 -0400
> Rob Clark <robdcl...@gmail.com> wrote:
>
>> On Fri, Jun 17, 2016 at 9:31 AM, Pekka Paalanen <ppaala...@gmail.com> wrote:
>> > On Fri, 17 Jun 2016 08:26:04 -0400
>> > Rob Clark <robdcl...@gmail.com> wrote:
>> >
>> >> On Fri, Jun 17, 2016 at 3:59 AM, Pekka Paalanen <ppaala...@gmail.com> 
>> >> wrote:
>> >> > On Thu, 16 Jun 2016 10:40:51 -0400
>> >> > Rob Clark <robdcl...@gmail.com> wrote:
>> >> >
>> >> >> So, if we wanted to extend this to support the fourcc-modifiers that
>> >> >> we have on the kernel side for compressed/tiled/etc formats, what
>> >> >> would be the right approach?
>> >> >>
>> >> >> A new version of the existing extension or a new
>> >> >> EGL_EXT_image_dma_buf_import2 extension, or ??
>> >> >
>> >> > Hi Rob,
>> >> >
>> >> > there are actually several things it might be nice to add:
>> >> >
>> >> > - a fourth plane, to match what DRM AddFB2 supports
>> >> >
>> >> > - the 64-bit fb modifiers
>> >> >
>> >> > - queries for which pixel formats are supported by EGL, so a display
>> >> >   server can tell the applications that before the application goes and
>> >> >   tries with a random bunch of them, shooting in the dark
>> >> >
>> >> > - queries for which modifiers are supported for each pixel format, ditto
>> >> >
>> >> > I discussed these with Emil in the past, and it seems an appropriate
>> >> > approach might be the following.
>> >> >
>> >> > Adding the 4th plane can be done as revising the existing
>> >> > EGL_EXT_image_dma_buf_import extension. The plane count is tied to
>> >> > pixel formats (and modifiers?), so the user does not need to know
>> >> > specifically whether the EGL implementation could handle a 4th plane or
>> >> > not. It is implied by the pixel format.
>> >> >
>> >> > Adding the fb modifiers needs to be a new extension, so that users can
>> >> > tell if they are supported or not. This is to avoid the following false
>> >> > failure: if user assumes modifiers are always supported, it will (may?)
>> >> > provide zero modifiers explicitly. If EGL implementation does not
>> >> > handle modifiers this would be rejected as unrecognized attributes,
>> >> > while if the zero modifiers were not given explicitly, everything would
>> >> > just work.
>> >>
>> >> hmm, if we design it as "not passing modifier" == "zero modifier", and
>> >> "never explicitly pass a zero modifier" then modifiers could be added
>> >> without a new extension.  Although I agree that queries would need a
>> >> new extension.. so perhaps not worth being clever.
>> >
>> > Indeed.
>> >
>> >> > The queries obviously(?) need a new extension. It might make sense
>> >> > to bundle both modifier support and the queries in the same new
>> >> > extension.
>> >> >
>> >> > We have some rough old WIP code at
>> >> > https://git.collabora.com/cgit/user/lfrb/mesa.git/log/?h=T1410-modifiers
>> >> > https://git.collabora.com/cgit/user/lfrb/egl-specs.git/log/?h=T1410
>> >> >
>> >> >
>> >> >> On Mon, Feb 25, 2013 at 6:54 AM, Tom Cooksey <tom.cook...@arm.com> 
>> >> >> wrote:
>> >> >> > Hi All,
>> >> >> >
>> >> >> > The final spec has had enum values assigned and been published on 
>> >> >> > Khronos:
>> >> >> >
>> >> >> > http://www.khronos.org/registry/egl/extensions/EXT/EGL_EXT_image_dma_buf_import.txt
>> >> >> >
>> >> >> > Thanks to all who've provided input.
>> >> >
>> >> > May I also pull your attention to a detail with the existing spec and
>> >> > Mesa behaviour I am asking about in
>> >> > https://lists.freedesktop.org/archives/mesa-dev/2016-June/120249.html
>> >> > "What is EGL_EXT_image_dma_buf_import image orientation as a GL 
>> >> > texture

Re: [Mesa-dev] [RFC 2/7] pipe-loader-drm: Add common pipe_screen refcounting

2016-06-17 Thread Rob Clark

On Fri, Jun 17, 2016 at 2:23 PM, Emil Velikov  wrote:
> Hi Rob,
>
> On 17 June 2016 at 18:45, Rob Herring  wrote:
>
>>  struct pipe_screen {
>> +   int refcnt;
> Can you please use struct pipe_reference throughout and the respective
> pipe_reference API from src/gallium/auxiliary/util/u_inlines.h.

jfyi, the original per-driver implementation didn't use pipe_reference
since (iirc) there were some assumptions somewhere or other that
pipe_reference was the first struct member.  But yeah, now that we can
move it to the top of pipe_screen, we should do it properly

BR,
-R

> Thank you very much for doing this !
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] New dma_buf -> EGLImage EGL extension - Final spec published!

2016-06-17 Thread Rob Clark

On Fri, Jun 17, 2016 at 9:31 AM, Pekka Paalanen <ppaala...@gmail.com> wrote:
> On Fri, 17 Jun 2016 08:26:04 -0400
> Rob Clark <robdcl...@gmail.com> wrote:
>
>> On Fri, Jun 17, 2016 at 3:59 AM, Pekka Paalanen <ppaala...@gmail.com> wrote:
>> > On Thu, 16 Jun 2016 10:40:51 -0400
>> > Rob Clark <robdcl...@gmail.com> wrote:
>> >
>> >> So, if we wanted to extend this to support the fourcc-modifiers that
>> >> we have on the kernel side for compressed/tiled/etc formats, what
>> >> would be the right approach?
>> >>
>> >> A new version of the existing extension or a new
>> >> EGL_EXT_image_dma_buf_import2 extension, or ??
>> >
>> > Hi Rob,
>> >
>> > there are actually several things it might be nice to add:
>> >
>> > - a fourth plane, to match what DRM AddFB2 supports
>> >
>> > - the 64-bit fb modifiers
>> >
>> > - queries for which pixel formats are supported by EGL, so a display
>> >   server can tell the applications that before the application goes and
>> >   tries with a random bunch of them, shooting in the dark
>> >
>> > - queries for which modifiers are supported for each pixel format, ditto
>> >
>> > I discussed these with Emil in the past, and it seems an appropriate
>> > approach might be the following.
>> >
>> > Adding the 4th plane can be done as revising the existing
>> > EGL_EXT_image_dma_buf_import extension. The plane count is tied to
>> > pixel formats (and modifiers?), so the user does not need to know
>> > specifically whether the EGL implementation could handle a 4th plane or
>> > not. It is implied by the pixel format.
>> >
>> > Adding the fb modifiers needs to be a new extension, so that users can
>> > tell if they are supported or not. This is to avoid the following false
>> > failure: if user assumes modifiers are always supported, it will (may?)
>> > provide zero modifiers explicitly. If EGL implementation does not
>> > handle modifiers this would be rejected as unrecognized attributes,
>> > while if the zero modifiers were not given explicitly, everything would
>> > just work.
>>
>> hmm, if we design it as "not passing modifier" == "zero modifier", and
>> "never explicitly pass a zero modifier" then modifiers could be added
>> without a new extension.  Although I agree that queries would need a
>> new extension.. so perhaps not worth being clever.
>
> Indeed.
>
>> > The queries obviously(?) need a new extension. It might make sense
>> > to bundle both modifier support and the queries in the same new
>> > extension.
>> >
>> > We have some rough old WIP code at
>> > https://git.collabora.com/cgit/user/lfrb/mesa.git/log/?h=T1410-modifiers
>> > https://git.collabora.com/cgit/user/lfrb/egl-specs.git/log/?h=T1410
>> >
>> >
>> >> On Mon, Feb 25, 2013 at 6:54 AM, Tom Cooksey <tom.cook...@arm.com> wrote:
>> >> > Hi All,
>> >> >
>> >> > The final spec has had enum values assigned and been published on 
>> >> > Khronos:
>> >> >
>> >> > http://www.khronos.org/registry/egl/extensions/EXT/EGL_EXT_image_dma_buf_import.txt
>> >> >
>> >> > Thanks to all who've provided input.
>> >
>> > May I also pull your attention to a detail with the existing spec and
>> > Mesa behaviour I am asking about in
>> > https://lists.freedesktop.org/archives/mesa-dev/2016-June/120249.html
>> > "What is EGL_EXT_image_dma_buf_import image orientation as a GL texture?"
>> > Doing a dmabuf import seems to imply an y-flip AFAICT.
>>
>> I would have expected that *any* egl external image (dma-buf or
>> otherwise) should have native orientation rather than gl orientation.
>> It's somewhat useless otherwise.
>
> In that case importing dmabuf works differently than importing a
> wl_buffer (wl_drm), because for the latter, the y-invert flag is
> returned such that the orientation will match GL. And the direct
> scanout path goes through GBM since you have to import a wl_buffer, and
> I haven't looked what GBM does wrt. y-flip if anything.
>
>> I didn't read it carefully yet (would need caffeine first ;-)) but
>> EGL_KHR_image_base does say "This extension defines a new EGL resource
>> type that is suitable for sharing 2D arrays of image data between
>> client APIs" which to me implies native or

Re: [Mesa-dev] [RFC] New dma_buf -> EGLImage EGL extension - Final spec published!

2016-06-17 Thread Rob Clark

On Fri, Jun 17, 2016 at 3:59 AM, Pekka Paalanen <ppaala...@gmail.com> wrote:
> On Thu, 16 Jun 2016 10:40:51 -0400
> Rob Clark <robdcl...@gmail.com> wrote:
>
>> So, if we wanted to extend this to support the fourcc-modifiers that
>> we have on the kernel side for compressed/tiled/etc formats, what
>> would be the right approach?
>>
>> A new version of the existing extension or a new
>> EGL_EXT_image_dma_buf_import2 extension, or ??
>
> Hi Rob,
>
> there are actually several things it might be nice to add:
>
> - a fourth plane, to match what DRM AddFB2 supports
>
> - the 64-bit fb modifiers
>
> - queries for which pixel formats are supported by EGL, so a display
>   server can tell the applications that before the application goes and
>   tries with a random bunch of them, shooting in the dark
>
> - queries for which modifiers are supported for each pixel format, ditto
>
> I discussed these with Emil in the past, and it seems an appropriate
> approach might be the following.
>
> Adding the 4th plane can be done as revising the existing
> EGL_EXT_image_dma_buf_import extension. The plane count is tied to
> pixel formats (and modifiers?), so the user does not need to know
> specifically whether the EGL implementation could handle a 4th plane or
> not. It is implied by the pixel format.
>
> Adding the fb modifiers needs to be a new extension, so that users can
> tell if they are supported or not. This is to avoid the following false
> failure: if user assumes modifiers are always supported, it will (may?)
> provide zero modifiers explicitly. If EGL implementation does not
> handle modifiers this would be rejected as unrecognized attributes,
> while if the zero modifiers were not given explicitly, everything would
> just work.

hmm, if we design it as "not passing modifier" == "zero modifier", and
"never explicitly pass a zero modifier" then modifiers could be added
without a new extension.  Although I agree that queries would need a
new extension.. so perhaps not worth being clever.

> The queries obviously(?) need a new extension. It might make sense
> to bundle both modifier support and the queries in the same new
> extension.
>
> We have some rough old WIP code at
> https://git.collabora.com/cgit/user/lfrb/mesa.git/log/?h=T1410-modifiers
> https://git.collabora.com/cgit/user/lfrb/egl-specs.git/log/?h=T1410
>
>
>> On Mon, Feb 25, 2013 at 6:54 AM, Tom Cooksey <tom.cook...@arm.com> wrote:
>> > Hi All,
>> >
>> > The final spec has had enum values assigned and been published on Khronos:
>> >
>> > http://www.khronos.org/registry/egl/extensions/EXT/EGL_EXT_image_dma_buf_import.txt
>> >
>> > Thanks to all who've provided input.
>
> May I also pull your attention to a detail with the existing spec and
> Mesa behaviour I am asking about in
> https://lists.freedesktop.org/archives/mesa-dev/2016-June/120249.html
> "What is EGL_EXT_image_dma_buf_import image orientation as a GL texture?"
> Doing a dmabuf import seems to imply an y-flip AFAICT.

I would have expected that *any* egl external image (dma-buf or
otherwise) should have native orientation rather than gl orientation.
It's somewhat useless otherwise.

I didn't read it carefully yet (would need caffeine first ;-)) but
EGL_KHR_image_base does say "This extension defines a new EGL resource
type that is suitable for sharing 2D arrays of image data between
client APIs" which to me implies native orientation.  So that just
sounds like a mesa bug somehow?

Do you just get that w/ i965?  I know some linaro folks have been
using this extension to import buffers from video decoder with
freedreno/gallium and no one mentioned the video being upside down.

BR,
-R


>
> Thanks,
> pq
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] virgl and vc4 problem on Android

2016-06-16 Thread Rob Clark

On Thu, Jun 16, 2016 at 2:57 PM, Rob Herring <r...@kernel.org> wrote:
> On Thu, Jun 16, 2016 at 12:09 PM, Rob Clark <robdcl...@gmail.com> wrote:
>> On Thu, Jun 16, 2016 at 12:56 PM, Rob Herring <r...@kernel.org> wrote:
>>> On Thu, Jun 16, 2016 at 11:44 AM, Rob Clark <robdcl...@gmail.com> wrote:
>>>> On Wed, Jun 15, 2016 at 8:34 PM, Rob Herring <r...@kernel.org> wrote:
>>>>> In the process of adding RGBX (XB24) format to mesa for Android, I
>>>>> started seeing a new problem that makes the UI stop updating. It
>>>>> happens about when the splash screen is stopped and the lock screen is
>>>>> displayed. The display flickers on mouse movement, and it looks like
>>>>> the screen is flipping to old buffers (like the splash screen after
>>>>> its process exited). It is working fine for freedreno AFAICT, but I am
>>>>> running into a problem with virgl. With virgl, I get the following
>>>>> error:
>>>>>
>>>>> vrend_create_surface: context error reported 1 "surfaceflinger"
>>>>> Illegal resource 1435
>>>>> vrend_report_buffer_error: context error reported 1 "surfaceflinger"
>>>>> Illegal command buffer 329729
>>>>>
>>>>> The addition of the pixel format changes the eglconfig used for the
>>>>> splash screen. If I force the splash screen eglconfig to have an alpha
>>>>> or draw one frame of the splash screen and exit early or disable the
>>>>> splash screen, everything seems fine though I have hit the problem
>>>>> rarely navigating around. I suspect this has nothing to do with the
>>>>> pixel format other than different buffer sizes cause buffers to get
>>>>> reused differently.
>>>>>
>>>>> Now I've started working on getting RPi3 and vc4 working, and it
>>>>> appears to have a similar problem. I'm getting these errors though
>>>>> things go haywire before getting any error message:
>>>>>
>>>>> [   43.846569] [drm:vc4_submit_cl_ioctl] *ERROR* Failed to look up GEM BO 
>>>>> 0: 4
>>>>
>>>> at least in the vc4 case, I suspect you need a similar bit of winsys
>>>> magic to ensure the same pipe_screen is returned for any given drm
>>>> device fd.  (Or did someone already add that?)
>>>
>>> That problem should be gone with GBM gralloc, right?
>>
>> *maaaybe*..
>>
>> It, like the gralloc-drm-pipe approach, means we have a pipe_screen
>> (vs. the other drm-gralloc backends which were using libdrm_xyz
>> directly), so it was going through the logic to avoid duplicate
>> pipe_screen's (for the drivers which had that).
>>
>> Maybe w/ gbm, everything ends up sharing the same pipe_screen?  I'm
>> not really sure, since I guess both GL and gralloc are creating a gbm
>> device?
>>
>> I guess easy enough to put some debug print in vc4_screen_create() to
>> confirm.  But the sort of errors you are seeing make me suspicious.
>
> Uhh, well looks like that is a problem for vc4:
>
> 01-01 00:00:07.295   127   127 W VC4 : vc4_screen_create
> 01-01 00:00:07.334   127   127 W VC4 : vc4_screen_create
> 01-01 00:00:08.349   205   223 W VC4 : vc4_screen_create
> 01-01 00:00:08.352   205   223 W VC4 : vc4_screen_create
> 01-01 00:00:35.467   437   488 W VC4 : vc4_screen_create
> 01-01 00:00:35.477   437   488 W VC4 : vc4_screen_create
> 01-01 00:00:39.041   511   511 W VC4 : vc4_screen_create
> 01-01 00:00:43.385   511   798 W VC4 : vc4_screen_create
> 01-01 00:00:44.135   718   718 W VC4 : vc4_screen_create
> 01-01 00:00:44.202   718   923 W VC4 : vc4_screen_create
>
>> Possibly the "libdrm equivalent" part of vc4 needs to do more to avoid
>> re-importing the same handle multiple times?
>
> Maybe time for the common implementation.

yeah, probably

> This doesn't explain the virgl case though as I already fixed this
> problem. The log below is from virgl.

I haven't looked closely at virgl yet, but if it has some sort of bo
cache, perhaps it is allowing shared buffers into the cache??  Not
sure, but I'd be on the lookout for things like that..

Presumably it already has a hashtable to deal w/ multiple-imports of
the same flink name?

BR,
-R

>>>> In both virgl and vc4 case, you need to make sure that shared
>>>> (exported/imported) buffers don't end up in the bo cache.
>>>
>>> I've disabled the cache (in the gallium drv, right?) and still se

Re: [Mesa-dev] virgl and vc4 problem on Android

2016-06-16 Thread Rob Clark

On Thu, Jun 16, 2016 at 12:56 PM, Rob Herring <r...@kernel.org> wrote:
> On Thu, Jun 16, 2016 at 11:44 AM, Rob Clark <robdcl...@gmail.com> wrote:
>> On Wed, Jun 15, 2016 at 8:34 PM, Rob Herring <r...@kernel.org> wrote:
>>> In the process of adding RGBX (XB24) format to mesa for Android, I
>>> started seeing a new problem that makes the UI stop updating. It
>>> happens about when the splash screen is stopped and the lock screen is
>>> displayed. The display flickers on mouse movement, and it looks like
>>> the screen is flipping to old buffers (like the splash screen after
>>> its process exited). It is working fine for freedreno AFAICT, but I am
>>> running into a problem with virgl. With virgl, I get the following
>>> error:
>>>
>>> vrend_create_surface: context error reported 1 "surfaceflinger"
>>> Illegal resource 1435
>>> vrend_report_buffer_error: context error reported 1 "surfaceflinger"
>>> Illegal command buffer 329729
>>>
>>> The addition of the pixel format changes the eglconfig used for the
>>> splash screen. If I force the splash screen eglconfig to have an alpha
>>> or draw one frame of the splash screen and exit early or disable the
>>> splash screen, everything seems fine though I have hit the problem
>>> rarely navigating around. I suspect this has nothing to do with the
>>> pixel format other than different buffer sizes cause buffers to get
>>> reused differently.
>>>
>>> Now I've started working on getting RPi3 and vc4 working, and it
>>> appears to have a similar problem. I'm getting these errors though
>>> things go haywire before getting any error message:
>>>
>>> [   43.846569] [drm:vc4_submit_cl_ioctl] *ERROR* Failed to look up GEM BO 
>>> 0: 4
>>
>> at least in the vc4 case, I suspect you need a similar bit of winsys
>> magic to ensure the same pipe_screen is returned for any given drm
>> device fd.  (Or did someone already add that?)
>
> That problem should be gone with GBM gralloc, right?

*maaaybe*..

It, like the gralloc-drm-pipe approach, means we have a pipe_screen
(vs. the other drm-gralloc backends which were using libdrm_xyz
directly), so it was going through the logic to avoid duplicate
pipe_screen's (for the drivers which had that).

Maybe w/ gbm, everything ends up sharing the same pipe_screen?  I'm
not really sure, since I guess both GL and gralloc are creating a gbm
device?

I guess easy enough to put some debug print in vc4_screen_create() to
confirm.  But the sort of errors you are seeing make me suspicious.

Possibly the "libdrm equivalent" part of vc4 needs to do more to avoid
re-importing the same handle multiple times?

>> In both virgl and vc4 case, you need to make sure that shared
>> (exported/imported) buffers don't end up in the bo cache.
>
> I've disabled the cache (in the gallium drv, right?) and still see problems.
>
> I am seeing a double GEM_CLOSE. I'm not sure how that is happening.
> One of them must be hwc releasing an imported buffer, but it's all in
> the same thread.
>
> [7.024495] [drm] pid=1310, dev=0xe280, auth=0, handle=17, ret = 0,
> DRM_IOCTL_GEM_CLOSE
> [7.025379] [drm] pid=1310, dev=0xe280, auth=0, handle=23, ret = 0,
> DRM_IOCTL_PRIME_FD_TO_HANDLE
> [7.026663] [drm] pid=1310, dev=0xe280, auth=0, handle=10, ret = 0,
> DRM_IOCTL_GEM_CLOSE
> [7.027343] [drm] pid=1310, dev=0xe200, auth=1, handle=23, ret = 0,
> DRM_IOCTL_PRIME_FD_TO_HANDLE
> [7.035098] [drm] pid=1333, dev=0xe200, auth=1, handle=1, ret = 0,
> DRM_IOCTL_GEM_CLOSE
> [7.036093] [drm] pid=1310, dev=0xe280, auth=0, handle=17, ret =
> -22, DRM_IOCTL_GEM_CLOSE

sure would be nice if there was a dump_stack() that showed you the
userspace stack too ;-)

(but maybe dumb question, is pid unique per process or thread?)

BR,
-R

> Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] virgl and vc4 problem on Android

2016-06-16 Thread Rob Clark

On Wed, Jun 15, 2016 at 8:34 PM, Rob Herring  wrote:
> In the process of adding RGBX (XB24) format to mesa for Android, I
> started seeing a new problem that makes the UI stop updating. It
> happens about when the splash screen is stopped and the lock screen is
> displayed. The display flickers on mouse movement, and it looks like
> the screen is flipping to old buffers (like the splash screen after
> its process exited). It is working fine for freedreno AFAICT, but I am
> running into a problem with virgl. With virgl, I get the following
> error:
>
> vrend_create_surface: context error reported 1 "surfaceflinger"
> Illegal resource 1435
> vrend_report_buffer_error: context error reported 1 "surfaceflinger"
> Illegal command buffer 329729
>
> The addition of the pixel format changes the eglconfig used for the
> splash screen. If I force the splash screen eglconfig to have an alpha
> or draw one frame of the splash screen and exit early or disable the
> splash screen, everything seems fine though I have hit the problem
> rarely navigating around. I suspect this has nothing to do with the
> pixel format other than different buffer sizes cause buffers to get
> reused differently.
>
> Now I've started working on getting RPi3 and vc4 working, and it
> appears to have a similar problem. I'm getting these errors though
> things go haywire before getting any error message:
>
> [   43.846569] [drm:vc4_submit_cl_ioctl] *ERROR* Failed to look up GEM BO 0: 4

at least in the vc4 case, I suspect you need a similar bit of winsys
magic to ensure the same pipe_screen is returned for any given drm
device fd.  (Or did someone already add that?)

In both virgl and vc4 case, you need to make sure that shared
(exported/imported) buffers don't end up in the bo cache.

That is at least what occurs to me off the top of my head.  Beyond
that, I'd have to look at code.  But it does sound a lot like the same
old handle <-> buffer confusion..

BR,
-R

> Here's a screenshot[1]. I've also seen an error for fbo size of 4KB
> when expected size is 5MB. The RPi is a bit different in that I can
> continue to navigate around, but apps are not really working and don't
> render completely. Returning to the home screen starts the flickering
> like the pic except the old contents are the app that ran last.
>
> Any suggestions on how to debug this? I've tried a few things like
> disabling the BO cache in vc4 gallium driver and logging BO
> create/free, but didn't see anything.
>
> Rob
>
> [1] https://goo.gl/photos/zqq3ksMVXLvbA6CK8
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] New dma_buf -> EGLImage EGL extension - Final spec published!

2016-06-16 Thread Rob Clark

So, if we wanted to extend this to support the fourcc-modifiers that
we have on the kernel side for compressed/tiled/etc formats, what
would be the right approach?

A new version of the existing extension or a new
EGL_EXT_image_dma_buf_import2 extension, or ??

BR,
-R

On Mon, Feb 25, 2013 at 6:54 AM, Tom Cooksey <tom.cook...@arm.com> wrote:
> Hi All,
>
> The final spec has had enum values assigned and been published on Khronos:
>
> http://www.khronos.org/registry/egl/extensions/EXT/EGL_EXT_image_dma_buf_import.txt
>
> Thanks to all who've provided input.
>
>
> Cheers,
>
> Tom
>
>
>
>> -Original Message-
>> From: mesa-dev-bounces+tom.cooksey=arm@lists.freedesktop.org 
>> [mailto:mesa-dev-
>> bounces+tom.cooksey=arm@lists.freedesktop.org] On Behalf Of Tom Cooksey
>> Sent: 04 October 2012 13:10
>> To: mesa-dev@lists.freedesktop.org; linaro-mm-...@lists.linaro.org; dri-
>> de...@lists.freedesktop.org; linux-me...@vger.kernel.org
>> Subject: [Mesa-dev] [RFC] New dma_buf -> EGLImage EGL extension - New draft!
>>
>> Hi All,
>>
>> After receiving a fair bit of feedback (thanks!), I've updated the
>> EGL_EXT_image_dma_buf_import spec
>> and expanded it to resolve a number of the issues. Please find the latest 
>> draft below and let
>> me
>> know any additional feedback you might have, either on the lists or by 
>> private e-mail - I
>> don't mind
>> which.
>>
>> I think the only remaining issue now is if we need a mechanism whereby an 
>> application can
>> query
>> which drm_fourcc.h formats EGL supports or if just failing with 
>> EGL_BAD_MATCH when the
>> application
>> has use one EGL doesn't support is sufficient. Any thoughts?
>>
>>
>> Cheers,
>>
>> Tom
>>
>>
>> 8<
>>
>>
>> Name
>>
>> EXT_image_dma_buf_import
>>
>> Name Strings
>>
>> EGL_EXT_image_dma_buf_import
>>
>> Contributors
>>
>> Jesse Barker
>> Rob Clark
>> Tom Cooksey
>>
>> Contacts
>>
>> Jesse Barker (jesse 'dot' barker 'at' linaro 'dot' org)
>> Tom Cooksey (tom 'dot' cooksey 'at' arm 'dot' com)
>>
>> Status
>>
>> DRAFT
>>
>> Version
>>
>> Version 4, October 04, 2012
>>
>> Number
>>
>> EGL Extension ???
>>
>> Dependencies
>>
>> EGL 1.2 is required.
>>
>> EGL_KHR_image_base is required.
>>
>> The EGL implementation must be running on a Linux kernel supporting the
>> dma_buf buffer sharing mechanism.
>>
>> This extension is written against the wording of the EGL 1.2 
>> Specification.
>>
>> Overview
>>
>> This extension allows creating an EGLImage from a Linux dma_buf file
>> descriptor or multiple file descriptors in the case of multi-plane YUV
>> images.
>>
>> New Types
>>
>> None
>>
>> New Procedures and Functions
>>
>> None
>>
>> New Tokens
>>
>> Accepted by the  parameter of eglCreateImageKHR:
>>
>> EGL_LINUX_DMA_BUF_EXT
>>
>> Accepted as an attribute in the  parameter of
>> eglCreateImageKHR:
>>
>> EGL_LINUX_DRM_FOURCC_EXT
>> EGL_DMA_BUF_PLANE0_FD_EXT
>> EGL_DMA_BUF_PLANE0_OFFSET_EXT
>> EGL_DMA_BUF_PLANE0_PITCH_EXT
>> EGL_DMA_BUF_PLANE1_FD_EXT
>> EGL_DMA_BUF_PLANE1_OFFSET_EXT
>> EGL_DMA_BUF_PLANE1_PITCH_EXT
>> EGL_DMA_BUF_PLANE2_FD_EXT
>> EGL_DMA_BUF_PLANE2_OFFSET_EXT
>> EGL_DMA_BUF_PLANE2_PITCH_EXT
>> EGL_YUV_COLOR_SPACE_HINT_EXT
>> EGL_SAMPLE_RANGE_HINT_EXT
>> EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT
>> EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT
>>
>> Accepted as the value for the EGL_YUV_COLOR_SPACE_HINT_EXT attribute:
>>
>> EGL_ITU_REC601_EXT
>> EGL_ITU_REC709_EXT
>> EGL_ITU_REC2020_EXT
>>
>> Accepted as the value for the EGL_SAMPLE_RANGE_HINT_EXT attribute:
>>
>> EGL_YUV_FULL_RANGE_EXT
>> EGL_YUV_NARROW_RANGE_EXT
>>
>> Accepted as the value for the EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT &
>> EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT attributes:
>>
>> EGL_YUV_CHROMA_SITING_0_EXT
>> EGL_YUV_CHROMA_SITING_0_5_EXT
>>
>

Re: [Mesa-dev] [PATCH 01/10] gallium: cleanup set_tess_state

2016-06-14 Thread Rob Clark

On Tue, Jun 14, 2016 at 12:30 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:
> On 14.06.2016 18:02, Ilia Mirkin wrote:
>>
>> Can you explain the motivation behind this change? I'm adding a
>> ->set_window_rectangles thing which also takes multiple parameters.
>> What's the advantage of stuffing things into a struct first?
>
>
> FWIW, I tend to be mildly supportive of changes like this. At least, the
> other extreme where functions grow multiple bool or int parameters over time
> is much worse. But in this particular case, changing this around might be
> too eager.

I'd have to think about how it would work to deal w/ variants that
have params not wrapped in a struct.  It at least sounds annoying, and
I tended to think the benefits of using a struct where enough of a
justification to change this.  (Plus there are not many usages of this
API yet, so seemed like the perfect time to cleanup.)

> Perhaps teaching the script to deal with slightly more complicated cases
> will help elsewhere, too.

*maybe*, but I can't think of anything..  right now it is only the
sampler_view and stream_output_target state that I handle "manually"..
but those are also kind of different from the rest since they are
already refcnt'd.  And I figured it was easier to just deal w/ those
manually than implement a 3rd type of state (CSO vs Param) in
rsq_state.py..

BR,
-R

> Nicolai
>
>>
>>-ilia
>>
>> On Tue, Jun 14, 2016 at 11:57 AM, Rob Clark <robdcl...@gmail.com> wrote:
>>>
>>> From: Rob Clark <robcl...@freedesktop.org>
>>>
>>> The reset of the state APIs take state structs, rather than inline
>>> parameters (with the exception of a couple which just amount to a single
>>> uint).
>>>
>>> This makes the API more regular and simplifies autogeneration of the
>>> gallium state related APIs.
>>>
>>> Signed-off-by: Rob Clark <robcl...@freedesktop.org>
>>> ---
>>>   src/gallium/drivers/ddebug/dd_context.c   |  9 -
>>>   src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  7 +++
>>>   src/gallium/drivers/r600/evergreen_state.c|  7 +++
>>>   src/gallium/drivers/radeonsi/si_state.c   |  7 +++
>>>   src/gallium/drivers/trace/tr_context.c|  9 -
>>>   src/gallium/include/pipe/p_context.h  |  4 ++--
>>>   src/gallium/include/pipe/p_state.h|  8 
>>>   src/mesa/state_tracker/st_atom_tess.c | 13 ++---
>>>   8 files changed, 37 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/ddebug/dd_context.c
>>> b/src/gallium/drivers/ddebug/dd_context.c
>>> index 0f8ef18..06b7c91 100644
>>> --- a/src/gallium/drivers/ddebug/dd_context.c
>>> +++ b/src/gallium/drivers/ddebug/dd_context.c
>>> @@ -380,15 +380,14 @@ dd_context_set_viewport_states(struct pipe_context
>>> *_pipe,
>>>   }
>>>
>>>   static void dd_context_set_tess_state(struct pipe_context *_pipe,
>>> -  const float
>>> default_outer_level[4],
>>> -  const float
>>> default_inner_level[2])
>>> +  const struct pipe_tess_state
>>> *state)
>>>   {
>>>  struct dd_context *dctx = dd_context(_pipe);
>>>  struct pipe_context *pipe = dctx->pipe;
>>>
>>> -   memcpy(dctx->tess_default_levels, default_outer_level, sizeof(float)
>>> * 4);
>>> -   memcpy(dctx->tess_default_levels+4, default_inner_level,
>>> sizeof(float) * 2);
>>> -   pipe->set_tess_state(pipe, default_outer_level, default_inner_level);
>>> +   memcpy(dctx->tess_default_levels, state->default_outer_level,
>>> sizeof(float) * 4);
>>> +   memcpy(dctx->tess_default_levels+4, state->default_inner_level,
>>> sizeof(float) * 2);
>>> +   pipe->set_tess_state(pipe, state);
>>>   }
>>>
>>>
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>>> index 92161ec..a9c1830 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>>> @@ -1001,13 +1001,12 @@ nvc0_set_viewport_states(struct pipe_context
>>> *pipe,
>>>
>>>   static void
>>>   nvc0_set_tess_state(struct pipe_context *pipe,
>>> -const float default_tess_outer[4],
>>> -const float

Re: [Mesa-dev] [PATCH 0/7] Fix ralloc/rzalloc usage v2

2016-06-14 Thread Rob Clark

I (and I expect Eric too) would appreciate it if you went ahead and
replaced the current use of non-"z" versions in code that you can't
test w/ the "z" versions.  That way we can switch over to non-zero'ing
on our own time, rather than getting a surprise next time we
pull/rebase

I think it's only a couple spots in freedreno, and pre-emptive r-b for
that change ;-)

BR,
-R

On Tue, Jun 14, 2016 at 11:07 AM, Ilia Mirkin  wrote:
> I assume you've only tested this with i965? ralloc is also used by
> st/mesa, freedreno, and vc4. Should probably try to coordinate with
> the responsible developers before making the big switch.
>
>   -ilia
>
> On Tue, Jun 14, 2016 at 10:58 AM, Juha-Pekka Heikkila
>  wrote:
>> Here is fixed version of this ralloc set. Now I got to run this on many
>> different machines thanks to Mark Janes. There didn't show up any
>> regressions on different gen hw. On my IVB I've been running also many
>> different traces with Apitrace while having Valgrind running on background
>> but Valgrind did seem to be happy with my changes.
>>
>> As a performance test I did shader-db compile runs 10 times and compare
>> timing results against what Mesa master does on my IVB. To my surprise this
>> does bring reasonable gain which also seem to be repeatable, on my IVB
>> shader compile time is around 5% faster with these changes.
>>
>> /Juha-Pekka
>>
>> Juha-Pekka Heikkila (7):
>>   glsl: Fix reading of uninitialized memory
>>   util: use rzalloc instead on ralloc in _mesa_hash_table_create()
>>   util: use rzalloc instead on ralloc in _mesa_set_create(()
>>   nir: zero allocated memory where needed
>>   i965/vec4: zero allocated memory where needed
>>   i965/fs: fill allocated memory with zeros where needed
>>   util: Fix ralloc to use malloc instead of calloc
>>
>>  src/compiler/glsl/ast_to_hir.cpp   |  2 +-
>>  src/compiler/glsl/glcpp/glcpp-parse.y  |  4 +-
>>  src/compiler/glsl/link_uniform_blocks.cpp  |  2 +-
>>  src/compiler/glsl_types.cpp|  2 +-
>>  src/compiler/nir/nir.c |  6 +--
>>  src/compiler/nir/nir_opt_dce.c |  2 +-
>>  src/compiler/nir/nir_phi_builder.c |  2 +-
>>  src/compiler/nir/nir_search.c  |  2 +-
>>  src/compiler/nir/nir_to_ssa.c  |  2 +-
>>  src/compiler/nir/nir_worklist.c|  2 +-
>>  .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  2 +-
>>  .../dri/i965/brw_fs_dead_code_eliminate.cpp|  4 +-
>>  .../dri/i965/brw_vec4_dead_code_eliminate.cpp  |  4 +-
>>  src/util/hash_table.c  |  2 +-
>>  src/util/ralloc.c  | 49 
>> +++---
>>  src/util/ralloc.h  |  2 +-
>>  src/util/set.c |  2 +-
>>  17 files changed, 54 insertions(+), 37 deletions(-)
>>
>> --
>> 1.9.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

< 4 5 6 7 8 9 10 11 12 13 >

801 - 900 of 1947 matches

Mail list logo