date:20170509

On Tue, May 09, 2017 at 04:10:29PM -0700, Kenneth Graunke wrote:
> The BRW_NEW_CURBE_OFFSETS dirty bit is signalled when changing the
> partitioning of the Constant Buffer URB section between the various
> shader stages, on Gen4-5.
> 
> BRW_NEW_PUSH_CONSTANT_ALLOCATION is basically the same thing on Gen7+.
> 
> So, save a bit, and use the new name.

Nice, patches 3 and 4:

Reviewed-by: Topi Pohjolainen 

> ---
>  src/mesa/drivers/dri/i965/brw_clip_state.c   |  4 ++--
>  src/mesa/drivers/dri/i965/brw_context.h  |  5 +
>  src/mesa/drivers/dri/i965/brw_curbe.c| 10 +-
>  src/mesa/drivers/dri/i965/brw_gs_state.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_state_upload.c |  1 -
>  src/mesa/drivers/dri/i965/brw_urb.c  |  2 +-
>  src/mesa/drivers/dri/i965/brw_vs_state.c |  4 ++--
>  src/mesa/drivers/dri/i965/brw_wm_state.c |  4 ++--
>  8 files changed, 14 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c 
> b/src/mesa/drivers/dri/i965/brw_clip_state.c
> index d5fe2b547fa..35ccd2fe74f 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
> @@ -86,7 +86,7 @@ brw_upload_clip_unit(struct brw_context *brw)
> clip->thread3.const_urb_entry_read_length =
>brw->clip.prog_data->curb_read_length;
>  
> -   /* BRW_NEW_CURBE_OFFSETS */
> +   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
> clip->thread3.dispatch_grf_start_reg = 1;
> clip->thread3.urb_entry_read_offset = 0;
> @@ -171,7 +171,7 @@ const struct brw_tracked_state brw_clip_unit = {
>.brw   = BRW_NEW_BATCH |
> BRW_NEW_BLORP |
> BRW_NEW_CLIP_PROG_DATA |
> -   BRW_NEW_CURBE_OFFSETS |
> +   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> BRW_NEW_PROGRAM_CACHE |
> BRW_NEW_URB_FENCE,
> },
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 723c5d6926d..100bd74f214 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -173,7 +173,6 @@ enum brw_state_id {
> BRW_STATE_GEOMETRY_PROGRAM,
> BRW_STATE_TESS_PROGRAMS,
> BRW_STATE_VERTEX_PROGRAM,
> -   BRW_STATE_CURBE_OFFSETS,
> BRW_STATE_REDUCED_PRIMITIVE,
> BRW_STATE_PATCH_PRIMITIVE,
> BRW_STATE_PRIMITIVE,
> @@ -259,7 +258,6 @@ enum brw_state_id {
>  #define BRW_NEW_GEOMETRY_PROGRAM(1ull << BRW_STATE_GEOMETRY_PROGRAM)
>  #define BRW_NEW_TESS_PROGRAMS   (1ull << BRW_STATE_TESS_PROGRAMS)
>  #define BRW_NEW_VERTEX_PROGRAM  (1ull << BRW_STATE_VERTEX_PROGRAM)
> -#define BRW_NEW_CURBE_OFFSETS   (1ull << BRW_STATE_CURBE_OFFSETS)
>  #define BRW_NEW_REDUCED_PRIMITIVE   (1ull << BRW_STATE_REDUCED_PRIMITIVE)
>  #define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE)
>  #define BRW_NEW_PRIMITIVE   (1ull << BRW_STATE_PRIMITIVE)
> @@ -955,8 +953,7 @@ struct brw_context
> } urb;
>  
>  
> -   /* BRW_NEW_CURBE_OFFSETS:
> -*/
> +   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> struct {
>GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
>GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
> diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c 
> b/src/mesa/drivers/dri/i965/brw_curbe.c
> index 7d58efb622f..139a3bcdf86 100644
> --- a/src/mesa/drivers/dri/i965/brw_curbe.c
> +++ b/src/mesa/drivers/dri/i965/brw_curbe.c
> @@ -41,7 +41,7 @@
>   * quickly at thread setup time.  Each individual fixed function unit's state
>   * (brw_vs_state.c for example) tells the hardware which subset of the CURBE
>   * it wants in its register space, and we calculate those areas here under 
> the
> - * BRW_NEW_CURBE_OFFSETS state flag.  The brw_urb.c allocation will control
> + * BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag.  The brw_urb.c allocation 
> will control
>   * how many CURBEs can be loaded into the hardware at once before a pipeline
>   * stall occurs at CMD_CONST_BUFFER time.
>   *
> @@ -135,7 +135,7 @@ static void calculate_curbe_offsets( struct brw_context 
> *brw )
>   brw->curbe.vs_start,
>   brw->curbe.vs_size );
>  
> -  brw->ctx.NewDriverState |= BRW_NEW_CURBE_OFFSETS;
> +  brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
> }
>  }
>  
> @@ -196,7 +196,7 @@ static void
>  brw_upload_constant_buffer(struct brw_context *brw)
>  {
> struct gl_context *ctx = >ctx;
> -   /* BRW_NEW_CURBE_OFFSETS */
> +   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
> const GLuint sz = brw->curbe.total_size;
> const GLuint bufsz = sz * 16 * sizeof(GLfloat);
> gl_constant_value *buf;
> @@ -216,7 +216,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
> if (brw->curbe.wm_size) {
>

[Mesa-dev] Bug in 17.1.0-rc4 source packaging for swr?

2017-05-09 Thread Chuck Atkins

I just tried to build 17.0.4-rc4 from the tarball with swr enabled and got
errors about my python not having mako:

make[5]: Entering directory
'/tmp/atkins3/mesa/build/mesa-17.1.0-rc4_gcc-6.3.0_haswell/src/gallium/drivers/swr'
  GEN  rasterizer/jitter/gen_builder.hpp
Traceback (most recent call last):
  File
"../../../../../../mesa-17.1.0-rc4/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py",
line 24, in 
from gen_common import MakoTemplateWriter, ArgumentParser
  File
"/tmp/atkins3/mesa/mesa-17.1.0-rc4/src/gallium/drivers/swr/rasterizer/codegen/gen_common.py",
line 27, in 
from mako.template import Template
ImportError: No module named mako.template
Makefile:2424: recipe for target 'rasterizer/jitter/gen_builder.hpp' failed

As I understood it, mako should only be required when building out of git.
Unless this has changed, it seems there are some generated files missing
from the source tarball.

- Chuck
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] radv patches for 17.1

2017-05-09 Thread Dave Airlie

In order to pass CTS for 17.1 with llvm 4.0, radv needs some extra
patches cherry-picked.

09034aab64c7a6022a2c508658fead1442f08576
3c730639740f9b1243e95d06e6608cb54649be9a
a52470402515c46cd9f33a5d83dc8d2bc9f7bae9
83e58b036e1c34f26c99d04615df2b530f3045d9
3bf3f9866c1387872521242921bb00c7fb7c2834
efa19f5a542709cab7c6aa7f03af959f4394962f

These are also on
https://github.com/airlied/mesa/tree/radv-17.1-conform

Thanks,
Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

On 09/05/17 07:28 PM, Thomas Hellstrom wrote:
> On 05/09/2017 11:29 AM, Michel Dänzer wrote:
>> On 09/05/17 06:12 PM, Thomas Hellstrom wrote:
>>> On 05/09/2017 10:47 AM, Michel Dänzer wrote:
 On 09/05/17 05:32 PM, Thomas Hellstrom wrote:
> On 05/09/2017 10:05 AM, Michel Dänzer wrote:
>> On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
>>> Increases performance on vmwgfx since we're avoiding full buffer damage 
>>> and
>>> since we can't sync to vertical retrace anyway.
>>>
>>> Signed-off-by: Thomas Hellstrom 
>>> ---
>>>  src/mesa/drivers/dri/common/drirc | 7 +++
>>>  1 file changed, 7 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/common/drirc 
>>> b/src/mesa/drivers/dri/common/drirc
>>> index 14d7713..a8f2ccf 100644
>>> --- a/src/mesa/drivers/dri/common/drirc
>>> +++ b/src/mesa/drivers/dri/common/drirc
>>> @@ -137,4 +137,11 @@ TODO: document the other workarounds.
>>>  
>>>  
>>>  
>>> +
>>> +
>>> +
>>> +
>>> +
>>> +
>>> +
>>>  
>>>
>> Why restrict this to gnome-shell? Wouldn't any application using these
>> extensions be affected by the same issues?
> So far I've only looked into gnome-shell because we had a noticeable
> slowdown. The special thing with gnome-shell is that if
> GLX_EXT_buffer_age is available, it prefers a "swapbuffer with damage"
> path over copy_sub_buffer. Unfortunately the "swapbuffer with damage"
> path is implemented as ordinary swapbuffer on GLX. Not so on EGL. For
> real hardware this is a gain, I believe, since they end up page-flipping
> so I can't really claim gnome-shell is doing something wrong.
 I don't think it's directly related to "SwapBuffers with damage" or page
 flipping. The idea of GLX_EXT_buffer_age is that it lets the application
 know if and when the current back buffer was already used as a back
 buffer previously. Based on this, the application can know that it
 doesn't need to draw to some areas of the back buffer, because they
 already have the desired contents.

 Is the problem maybe that you need to read back the back buffer contents
 from the host if the application doesn't fully clear it?
>>> I don't think so. My understanding of the code
>>>
>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__git.gnome.org_browse_mutter_tree_clutter_clutter_cogl_clutter-2Dstage-2Dcogl.c=DwIDaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=Vv2TIj9fJv0pacKZnnUx2Rt4X0PVVVwfADJnlVmGevM=EtclRrb_yCMinIRtc71MvU1RnMFslfRza8lwX3vc1cM=
>>>  
>>>
>>> is that when gnome-shell (based on what you write above) decides it
>>> doesn't need to draw some areas of the back buffer, it eventually ends
>>> up calling  cogl_onscreen_swap_buffers_with_damage(). And the cogl GLX
>>> backend ends up calling glxSwapBuffers(), in effect damaging the full
>>> back buffer.
>>>
>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__git.gnome.org_browse_mutter_tree_cogl_cogl_winsys_cogl-2Dwinsys-2Dglx.c-23n2321=DwIDaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=Vv2TIj9fJv0pacKZnnUx2Rt4X0PVVVwfADJnlVmGevM=JtgpIf2My3zGO2lUWC7-3py5Hgpk5QOOxlUYOjhg-zE=
>>>  
>> Yes, this is as intended with GLX_EXT_buffer_age; if there was actual
>> "SwapBuffers with damage" functionality, GLX_EXT_buffer_age would be
>> kind of pointless, since the application could just only draw and swap
>> the areas which changed since last time.
>>
>> Without page flipping, the "undamaged" areas will be unnecessarily
>> blitted from back to front buffer, but GLX_EXT_buffer_age still provides
>> some savings (compared to drawing the whole buffer and calling
>> glXSwapBuffers) via the application not drawing those areas in the first
>> place.
>>
>> In summary, gnome-shell is using GLX_EXT_buffer_age exactly as intended,
>> I wouldn't expect other applications to use it any differently.
> 
> True, but OTOH I'm not sure other applications would provide such a
> performance benefit by switching to copy_sub_buffer()
> if it's *not* present.

Yes, good point, that occurred to me as well in the meantime.

> Also (although I'm not sure dri3 server-side implements this), but I guess
> even vmware could to true "page-flipping" in the sense of saving a copy, if
> rendering to a redirected window.

The xserver Present code currently only supports flipping for
unredirected fullscreen windows, everything else uses blits.


> For the OML_sync_control extension, some applications may actually notcd
> behave worse with this extension present, even if the time source is
> provided by the Xserver present extension "fake" backend.
> In the gnome-shell case it's a slowdown, though.
 The Present fake CRTC ticks at 1 Hz, so it'll presumably slow down any
 application which is otherwise

[Mesa-dev] [PATCH] mesa/i965: remove _CurrentFragmentProgram from gl_pipeline_object

This was added in b527dd65c830a as a work around because fixed function
fragment shaders were tracked in ctx->FragmentProgram._Current as
a gl_program rather than gl_shader_program.

However after my refactoring of the program and shader structs
at the end of 2016 which culminated in c505d6d85222, we no longer
need gl_shader_program to track the current program making
_CurrentFragmentProgram obsolete.
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  3 +--
 src/mesa/main/api_validate.c |  2 +-
 src/mesa/main/mtypes.h   |  2 --
 src/mesa/main/pipelineobj.c  |  2 --
 src/mesa/main/shaderapi.c| 22 --
 src/mesa/main/state.c|  8 
 6 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 49383c7..cad9c1c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1434,23 +1434,22 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct 
gl_program *prog,
   }
}
 
if (prog->info.num_ubos || prog->info.num_ssbos)
   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 }
 
 static void
 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 {
-   struct gl_context *ctx = >ctx;
/* _NEW_PROGRAM */
-   struct gl_program *prog = ctx->_Shader->_CurrentFragmentProgram;
+   struct gl_program *prog = brw->fragment_program;
 
/* BRW_NEW_FS_PROG_DATA */
brw_upload_ubo_surfaces(brw, prog, >wm.base, brw->wm.base.prog_data);
 }
 
 const struct brw_tracked_state brw_wm_ubo_surfaces = {
.dirty = {
   .mesa = _NEW_PROGRAM,
   .brw = BRW_NEW_BATCH |
  BRW_NEW_BLORP |
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 4694c36..cbb2361 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -92,21 +92,21 @@ check_blend_func_error(struct gl_context *ctx)
 
   /* The KHR_blend_equation_advanced spec says:
*
*"Advanced blending equations require the use of a fragment shader
* with a matching "blend_support" layout qualifier.  If the current
* blend equation is found in table X.1 or X.2, and the active
* fragment shader does not include the layout qualifier matching
* the blend equation or "blend_support_all_equations", the error
* INVALID_OPERATION is generated [...]"
*/
-  const struct gl_program *prog = ctx->_Shader->_CurrentFragmentProgram;
+  const struct gl_program *prog = ctx->FragmentProgram._Current;
   const GLbitfield blend_support = !prog ? 0 : prog->sh.fs.BlendSupport;
 
   if ((blend_support & ctx->Color._AdvancedBlendMode) == 0) {
  _mesa_error(ctx, GL_INVALID_OPERATION,
  "fragment shader does not allow advanced blending mode "
  "(%s)",
   _mesa_enum_to_string(ctx->Color.Blend[0].EquationRGB));
   }
}
 
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 0d594bf..dff4192 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2996,22 +2996,20 @@ struct gl_pipeline_object
 
/**
 * Programs used for rendering
 *
 * There is a separate program set for each shader stage.
 */
struct gl_program *CurrentProgram[MESA_SHADER_STAGES];
 
struct gl_shader_program *ReferencedPrograms[MESA_SHADER_STAGES];
 
-   struct gl_program *_CurrentFragmentProgram;
-
/**
 * Program used by glUniform calls.
 *
 * Explicitly set by \c glUseProgram and \c glActiveProgramEXT.
 */
struct gl_shader_program *ActiveProgram;
 
GLbitfield Flags;/**< Mask of GLSL_x flags */
 
GLboolean EverBound; /**< Has the pipeline object been 
created */
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index f7c911f..67a8fc8 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -51,22 +51,20 @@
 
 /**
  * Delete a pipeline object.
  */
 void
 _mesa_delete_pipeline_object(struct gl_context *ctx,
  struct gl_pipeline_object *obj)
 {
unsigned i;
 
-   _mesa_reference_program(ctx, >_CurrentFragmentProgram, NULL);
-
for (i = 0; i < MESA_SHADER_STAGES; i++) {
   _mesa_reference_program(ctx, >CurrentProgram[i], NULL);
   _mesa_reference_shader_program(ctx, >ReferencedPrograms[i], NULL);
}
 
_mesa_reference_shader_program(ctx, >ActiveProgram, NULL);
free(obj->Label);
ralloc_free(obj);
 }
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index f91cc89..ef0941d 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -150,21 +150,20 @@ _mesa_init_shader_state(struct gl_context *ctx)
 
 /**
  * Free the per-context

[Mesa-dev] [PATCH V2 11/15] mesa: add KHR_no_error support for FramebufferTextureLayer

V2: use frame_buffer_texture_layer_no_error() helper
---
 src/mapi/glapi/gen/ARB_framebuffer_object.xml |  2 +-
 src/mesa/main/fbobject.c  | 15 +++
 src/mesa/main/fbobject.h  |  4 
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/ARB_framebuffer_object.xml 
b/src/mapi/glapi/gen/ARB_framebuffer_object.xml
index ce5e45d..76114eb 100644
--- a/src/mapi/glapi/gen/ARB_framebuffer_object.xml
+++ b/src/mapi/glapi/gen/ARB_framebuffer_object.xml
@@ -239,21 +239,21 @@
 
 
 
 
 
 
 

 
 
-
+
 
 
 
 
 

 
 
 
 
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 3338974..c209be0 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3452,20 +3452,35 @@ frame_buffer_texture_layer_no_error(struct gl_context 
*ctx,
  layer = 0;
   }
}
 
_mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
  level, layer, GL_FALSE);
 }
 
 
 void GLAPIENTRY
+_mesa_FramebufferTextureLayer_no_error(GLenum target, GLenum attachment,
+   GLuint texture, GLint level,
+   GLint layer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* Get the framebuffer object */
+   struct gl_framebuffer *fb = get_framebuffer_target(ctx, target);
+
+   frame_buffer_texture_layer_no_error(ctx, fb, attachment, texture, level,
+   layer, "glFramebufferTextureLayer");
+}
+
+
+void GLAPIENTRY
 _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment,
   GLuint texture, GLint level, GLint layer)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
GLenum textarget = 0;
 
const char *func = "glFramebufferTextureLayer";
 
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index df1696d..7c32b87 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -234,20 +234,24 @@ _mesa_FramebufferTexture2D(GLenum target, GLenum 
attachment,
 extern void GLAPIENTRY
 _mesa_FramebufferTexture3D_no_error(GLenum target, GLenum attachment,
 GLenum textarget, GLuint texture,
 GLint level, GLint layer);
 extern void GLAPIENTRY
 _mesa_FramebufferTexture3D(GLenum target, GLenum attachment,
   GLenum textarget, GLuint texture,
   GLint level, GLint layer);
 
 extern void GLAPIENTRY
+_mesa_FramebufferTextureLayer_no_error(GLenum target, GLenum attachment,
+   GLuint texture, GLint level,
+   GLint layer);
+extern void GLAPIENTRY
 _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment,
  GLuint texture, GLint level, GLint layer);
 
 extern void GLAPIENTRY
 _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment,
GLuint texture, GLint level, GLint layer);
 
 extern void GLAPIENTRY
 _mesa_FramebufferTexture(GLenum target, GLenum attachment,
  GLuint texture, GLint level);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V2 14/15] mesa: add KHR_no_error support for FramebufferTexture

V2: use the framebuffer_texture_no_error() helper
---
 src/mapi/glapi/gen/GL3x.xml |  2 +-
 src/mesa/main/fbobject.c| 14 ++
 src/mesa/main/fbobject.h|  3 +++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml
index d2c768d..10c157e 100644
--- a/src/mapi/glapi/gen/GL3x.xml
+++ b/src/mapi/glapi/gen/GL3x.xml
@@ -600,21 +600,21 @@
 
 
   
 
   
 
 
 
   
 
-  
+  
 
 
 
 
   
 
 
 
 
 
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index f4df151..6dcd547 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3604,20 +3604,34 @@ framebuffer_texture_no_error(struct gl_context *ctx,
if (texObj) {
   check_layered_texture_target(ctx, texObj->Target, func, );
}
 
_mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, 0, level,
  0, layered);
 }
 
 
 void GLAPIENTRY
+_mesa_FramebufferTexture_no_error(GLenum target, GLenum attachment,
+  GLuint texture, GLint level)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* Get the framebuffer object */
+   struct gl_framebuffer *fb = get_framebuffer_target(ctx, target);
+
+   framebuffer_texture_no_error(ctx, fb, attachment, texture, level,
+"FramebufferTexture");
+}
+
+
+void GLAPIENTRY
 _mesa_FramebufferTexture(GLenum target, GLenum attachment,
  GLuint texture, GLint level)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
GLboolean layered = GL_FALSE;
 
const char *func = "FramebufferTexture";
 
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index a2f9264..1d064f8 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -251,20 +251,23 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum 
attachment,
 extern void GLAPIENTRY
 _mesa_NamedFramebufferTextureLayer_no_error(GLuint framebuffer,
 GLenum attachment,
 GLuint texture, GLint level,
 GLint layer);
 extern void GLAPIENTRY
 _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment,
GLuint texture, GLint level, GLint layer);
 
 extern void GLAPIENTRY
+_mesa_FramebufferTexture_no_error(GLenum target, GLenum attachment,
+  GLuint texture, GLint level);
+extern void GLAPIENTRY
 _mesa_FramebufferTexture(GLenum target, GLenum attachment,
  GLuint texture, GLint level);
 
 extern void GLAPIENTRY
 _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment,
   GLuint texture, GLint level);
 
 extern void GLAPIENTRY
 _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment,
  GLenum renderbuffertarget,
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V2 15/15] mesa: add KHR_no_error support for NamedFramebufferTexture

V2: use framebuffer_texture_no_error() helper
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  2 +-
 src/mesa/main/fbobject.c   | 13 +
 src/mesa/main/fbobject.h   |  3 +++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 2f3d60f..2f2ba20 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -164,21 +164,21 @@
   
   

 

   
   
   

 
-   
+   
   
   
   
   

 

   
   
   
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 6dcd547..baa4b58 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3664,20 +3664,33 @@ _mesa_FramebufferTexture(GLenum target, GLenum 
attachment,
 
struct gl_renderbuffer_attachment *att =
   _mesa_get_and_validate_attachment(ctx, fb, attachment, func);
if (!att)
   return;
 
_mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, 0, level,
  0, layered);
 }
 
+void GLAPIENTRY
+_mesa_NamedFramebufferTexture_no_error(GLuint framebuffer, GLenum attachment,
+   GLuint texture, GLint level)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* Get the framebuffer object */
+   struct gl_framebuffer *fb = _mesa_lookup_framebuffer(ctx, framebuffer);
+
+   framebuffer_texture_no_error(ctx, fb, attachment, texture, level,
+"NamedFramebufferTexture");
+}
+
 
 void GLAPIENTRY
 _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment,
   GLuint texture, GLint level)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
GLboolean layered = GL_FALSE;
 
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index 1d064f8..1c9056d 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -258,20 +258,23 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, 
GLenum attachment,
GLuint texture, GLint level, GLint layer);
 
 extern void GLAPIENTRY
 _mesa_FramebufferTexture_no_error(GLenum target, GLenum attachment,
   GLuint texture, GLint level);
 extern void GLAPIENTRY
 _mesa_FramebufferTexture(GLenum target, GLenum attachment,
  GLuint texture, GLint level);
 
 extern void GLAPIENTRY
+_mesa_NamedFramebufferTexture_no_error(GLuint framebuffer, GLenum attachment,
+   GLuint texture, GLint level);
+extern void GLAPIENTRY
 _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment,
   GLuint texture, GLint level);
 
 extern void GLAPIENTRY
 _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment,
  GLenum renderbuffertarget,
  GLuint renderbuffer);
 
 extern void GLAPIENTRY
 _mesa_NamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment,
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V2 13/15] mesa: add framebuffer_texture_no_error() helper

To be used to add KHR_no_error support while sharing code between
the DSA and non-DSA OpenGL function.
---
 src/mesa/main/fbobject.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 7f16597..f4df151 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3581,20 +3581,42 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, 
GLenum attachment,
struct gl_renderbuffer_attachment *att =
   _mesa_get_and_validate_attachment(ctx, fb, attachment, func);
if (!att)
   return;
 
_mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
  level, layer, GL_FALSE);
 }
 
 
+static void
+framebuffer_texture_no_error(struct gl_context *ctx,
+ struct gl_framebuffer *fb, GLenum attachment,
+ GLuint texture, GLint level, const char *func)
+{
+   /* Get the texture object */
+   struct gl_texture_object *texObj =
+  get_texture_for_framebuffer(ctx, texture);
+
+   struct gl_renderbuffer_attachment *att =
+  get_attachment(ctx, fb, attachment, NULL);
+
+   GLboolean layered = GL_FALSE;
+   if (texObj) {
+  check_layered_texture_target(ctx, texObj->Target, func, );
+   }
+
+   _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, 0, level,
+ 0, layered);
+}
+
+
 void GLAPIENTRY
 _mesa_FramebufferTexture(GLenum target, GLenum attachment,
  GLuint texture, GLint level)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
GLboolean layered = GL_FALSE;
 
const char *func = "FramebufferTexture";
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V2 12/15] mesa: add KHR_no_error support for NamedFramebufferTextureLayer

v2: use frame_buffer_texture_layer_no_error() helper
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  2 +-
 src/mesa/main/fbobject.c   | 17 +
 src/mesa/main/fbobject.h   |  5 +
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 3cb486e..2f3d60f 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -171,21 +171,21 @@
   

 

   
   
   
   

 
-   
+   
   
   
   
   
   

 

   
   
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index c209be0..7f16597 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3518,20 +3518,37 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum 
attachment,
   _mesa_get_and_validate_attachment(ctx, fb, attachment, func);
if (!att)
   return;
 
_mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
  level, layer, GL_FALSE);
 }
 
 
 void GLAPIENTRY
+_mesa_NamedFramebufferTextureLayer_no_error(GLuint framebuffer,
+GLenum attachment,
+GLuint texture, GLint level,
+GLint layer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* Get the framebuffer object */
+   struct gl_framebuffer *fb = _mesa_lookup_framebuffer(ctx, framebuffer);
+
+   frame_buffer_texture_layer_no_error(ctx, fb, attachment, texture, level,
+   layer,
+   "glNamedFramebufferTextureLayer");
+}
+
+
+void GLAPIENTRY
 _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment,
GLuint texture, GLint level, GLint layer)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
GLenum textarget = 0;
 
const char *func = "glNamedFramebufferTextureLayer";
 
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index 7c32b87..a2f9264 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -242,20 +242,25 @@ _mesa_FramebufferTexture3D(GLenum target, GLenum 
attachment,
 
 extern void GLAPIENTRY
 _mesa_FramebufferTextureLayer_no_error(GLenum target, GLenum attachment,
GLuint texture, GLint level,
GLint layer);
 extern void GLAPIENTRY
 _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment,
  GLuint texture, GLint level, GLint layer);
 
 extern void GLAPIENTRY
+_mesa_NamedFramebufferTextureLayer_no_error(GLuint framebuffer,
+GLenum attachment,
+GLuint texture, GLint level,
+GLint layer);
+extern void GLAPIENTRY
 _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment,
GLuint texture, GLint level, GLint layer);
 
 extern void GLAPIENTRY
 _mesa_FramebufferTexture(GLenum target, GLenum attachment,
  GLuint texture, GLint level);
 
 extern void GLAPIENTRY
 _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment,
   GLuint texture, GLint level);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V2 10/16] mesa: add frame_buffer_texture_layer_no_error() helper

To be used to add KHR_no_error support while sharing code between
the DSA and non-DSA OpenGL function.
---
 src/mesa/main/fbobject.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index ba01d0c..3338974 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3423,20 +3423,48 @@ _mesa_FramebufferTexture3D_no_error(GLenum target, 
GLenum attachment,
 void GLAPIENTRY
 _mesa_FramebufferTexture3D(GLenum target, GLenum attachment,
GLenum textarget, GLuint texture,
GLint level, GLint layer)
 {
framebuffer_texture_with_dims(3, target, attachment, textarget, texture,
  level, layer, "glFramebufferTexture3D");
 }
 
 
+static void
+frame_buffer_texture_layer_no_error(struct gl_context *ctx,
+struct gl_framebuffer *fb,
+GLenum attachment, GLuint texture,
+GLint level, GLint layer,
+const char *func)
+{
+   /* Get the texture object */
+   struct gl_texture_object *texObj =
+  get_texture_for_framebuffer(ctx, texture);
+
+   struct gl_renderbuffer_attachment *att =
+  get_attachment(ctx, fb, attachment, NULL);
+
+   GLenum textarget = 0;
+   if (texObj) {
+  if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+ assert(layer >= 0 && layer < 6);
+ textarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + layer;
+ layer = 0;
+  }
+   }
+
+   _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
+ level, layer, GL_FALSE);
+}
+
+
 void GLAPIENTRY
 _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment,
   GLuint texture, GLint level, GLint layer)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
GLenum textarget = 0;
 
const char *func = "glFramebufferTextureLayer";
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: use u_bit_scan() in update_program_texture_state()

I made this same change a while back but found I couldn't really measure 
any performance increase in practice. I suspect it's because we already
limit the max loops with (1 << s) <= prog[i]->SamplersUsed and there is 
generally no big gaps in used samples.


Did you manage to measure any improvement here?

On 10/05/17 07:33, Samuel Pitoiset wrote:

The check in update_single_program_texture() can also be
removed.

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/texstate.c | 13 +
  1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 830b230b5d..c11a2e95fe 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -620,9 +620,6 @@ update_single_program_texture(struct gl_context *ctx, 
struct gl_program *prog,
 struct gl_sampler_object *sampler;
 int unit;
  
-   if (!(prog->SamplersUsed & (1 << s)))

-  return NULL;
-
 unit = prog->SamplerUnits[s];
 texUnit = >Texture.Unit[unit];
  
@@ -676,16 +673,16 @@ update_program_texture_state(struct gl_context *ctx, struct gl_program **prog,

 int i;
  
 for (i = 0; i < MESA_SHADER_STAGES; i++) {

+  GLbitfield mask;
int s;
  
if (!prog[i])

   continue;
  
-  /* We can't only do the shifting trick as the loop condition because if

-   * sampler 31 is active, the next iteration tries to shift by 32, which 
is
-   * undefined.
-   */
-  for (s = 0; s < MAX_SAMPLERS && (1 << s) <= prog[i]->SamplersUsed; s++) {
+  mask = prog[i]->SamplersUsed;
+
+  while (mask) {
+ const int s = u_bit_scan();
   struct gl_texture_object *texObj;
  
   texObj = update_single_program_texture(ctx, prog[i], s);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 10/13] mesa: add KHR_no_error support for FramebufferTextureLayer

Timothy Arceri  writes:

> ---
>  src/mapi/glapi/gen/ARB_framebuffer_object.xml |  2 +-
>  src/mesa/main/fbobject.c  | 31 
> +++
>  src/mesa/main/fbobject.h  |  4 
>  3 files changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/src/mapi/glapi/gen/ARB_framebuffer_object.xml 
> b/src/mapi/glapi/gen/ARB_framebuffer_object.xml
> index ce5e45d..76114eb 100644
> --- a/src/mapi/glapi/gen/ARB_framebuffer_object.xml
> +++ b/src/mapi/glapi/gen/ARB_framebuffer_object.xml
> @@ -239,21 +239,21 @@
>  
>  
>  
>  
>  
>  
>  
>   
>  
>  
> -
> +
>  
>  
>  
>  
>  
>   
>  
>  
>  
>  
> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
> index ba01d0c..08e7347 100644
> --- a/src/mesa/main/fbobject.c
> +++ b/src/mesa/main/fbobject.c
> @@ -3424,20 +3424,51 @@ void GLAPIENTRY
>  _mesa_FramebufferTexture3D(GLenum target, GLenum attachment,
> GLenum textarget, GLuint texture,
> GLint level, GLint layer)
>  {
> framebuffer_texture_with_dims(3, target, attachment, textarget, texture,
>   level, layer, "glFramebufferTexture3D");
>  }
>  
>  
>  void GLAPIENTRY
> +_mesa_FramebufferTextureLayer_no_error(GLenum target, GLenum attachment,
> +   GLuint texture, GLint level,
> +   GLint layer)
> +{
> +   GET_CURRENT_CONTEXT(ctx);
> +
> +   /* Get the framebuffer object */
> +   struct gl_framebuffer *fb = get_framebuffer_target(ctx, target);
> +
> +   /* Get the texture object */
> +   struct gl_texture_object *texObj =
> +  get_texture_for_framebuffer(ctx, texture);
> +
> +   struct gl_renderbuffer_attachment *att =
> +  get_attachment(ctx, fb, attachment, NULL);
> +
> +   GLenum textarget = 0;
> +   if (texObj) {
> +  if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
> + assert(layer >= 0 && layer < 6);
> + textarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + layer;
> + layer = 0;
> +  }
> +   }
> +
> +   _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
> + level, layer, GL_FALSE);
> +}

I've got stuck on reading this series a couple of times because this
felt like too much code duplication.  I understand that you're
replicating what the current error-validating code does, but it seems
like for no_error we can do a lot better, quite easily.

What I'd like for these last 4 patches is to have two patches, each of
which implements a Named and non-Named no_error function by doing the
appropriate lookup of the fb and then calling a helper function that
does the rest of their current bodies.

Assuming patch 4.5 is to be squashed into 4, then other than the ';'
comment patches 1-9 are:

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/5] genxml: Add alias for MOCS.

Use an alias for this field on 3DSTATE_INDEX_BUFFER on gen6+, so we can set
the same value as the defines.

Signed-off-by: Rafael Antognolli 
---
 src/intel/genxml/gen6.xml  | 1 +
 src/intel/genxml/gen7.xml  | 1 +
 src/intel/genxml/gen75.xml | 1 +
 src/intel/genxml/gen8.xml  | 1 +
 src/intel/genxml/gen9.xml  | 1 +
 5 files changed, 5 insertions(+)

diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml
index 5aa19a58..1294929 100644
--- a/src/intel/genxml/gen6.xml
+++ b/src/intel/genxml/gen6.xml
@@ -1047,6 +1047,7 @@
 
 
 
+
 
 
   
diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml
index fd70414..c98327a 100644
--- a/src/intel/genxml/gen7.xml
+++ b/src/intel/genxml/gen7.xml
@@ -1220,6 +1220,7 @@
 
 
 
+
 
 
   
diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml
index ac2dbc3..11f1462 100644
--- a/src/intel/genxml/gen75.xml
+++ b/src/intel/genxml/gen75.xml
@@ -1506,6 +1506,7 @@
 
 
 
+
 
   
   
diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index 1392466..541a788 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -1583,6 +1583,7 @@
   
 
 
+
 
 
   
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 445a366..a2c2020 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -1696,6 +1696,7 @@
   
 
 
+
 
 
   
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] i965: Port brw_cs_state tracked state to genxml.

Emit the respective commands using genxml code.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_state.h |   1 -
 src/mesa/drivers/dri/i965/gen7_cs_state.c | 162 --
 src/mesa/drivers/dri/i965/genX_state_upload.c | 146 ++-
 3 files changed, 145 insertions(+), 164 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 4727e2a..90f7677 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -100,7 +100,6 @@ extern const struct brw_tracked_state brw_psp_urb_cbs;
 
 extern const struct brw_tracked_state brw_indices;
 extern const struct brw_tracked_state brw_index_buffer;
-extern const struct brw_tracked_state brw_cs_state;
 extern const struct brw_tracked_state gen7_cs_push_constants;
 extern const struct brw_tracked_state gen6_binding_table_pointers;
 extern const struct brw_tracked_state gen6_gs_binding_table;
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c 
b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 48b3d87..cbb6bb1 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -33,168 +33,6 @@
 #include "compiler/glsl/ir_uniform.h"
 #include "main/shaderapi.h"
 
-static void
-brw_upload_cs_state(struct brw_context *brw)
-{
-   if (!brw->cs.base.prog_data)
-  return;
-
-   uint32_t offset;
-   uint32_t *desc = (uint32_t*) brw_state_batch(brw, 8 * 4, 64, );
-   struct brw_stage_state *stage_state = >cs.base;
-   struct brw_stage_prog_data *prog_data = stage_state->prog_data;
-   struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
-   const struct gen_device_info *devinfo = >screen->devinfo;
-
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
-  brw_emit_buffer_surface_state(
- brw, _state->surf_offset[
- prog_data->binding_table.shader_time_start],
- brw->shader_time.bo, 0, ISL_FORMAT_RAW,
- brw->shader_time.bo->size, 1, true);
-   }
-
-   uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
-32, _state->bind_bo_offset);
-
-   uint32_t dwords = brw->gen < 8 ? 8 : 9;
-   BEGIN_BATCH(dwords);
-   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
-
-   if (prog_data->total_scratch) {
-  if (brw->gen >= 8) {
- /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
-  * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
-  */
- OUT_RELOC64(stage_state->scratch_bo,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(stage_state->per_thread_scratch) - 11);
-  } else if (brw->is_haswell) {
- /* Haswell's Per Thread Scratch Space is in the range [0, 10]
-  * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
-  */
- OUT_RELOC(stage_state->scratch_bo,
-   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-   ffs(stage_state->per_thread_scratch) - 12);
-  } else {
- /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
-  * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
-  */
- OUT_RELOC(stage_state->scratch_bo,
-   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-   stage_state->per_thread_scratch / 1024 - 1);
-  }
-   } else {
-  OUT_BATCH(0);
-  if (brw->gen >= 8)
- OUT_BATCH(0);
-   }
-
-   const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0;
-   const uint32_t vfe_gpgpu_mode =
-  brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
-   const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
-   OUT_BATCH(SET_FIELD(devinfo->max_cs_threads * subslices - 1,
-   MEDIA_VFE_STATE_MAX_THREADS) |
- SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
- SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
- SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) |
- vfe_gpgpu_mode);
-
-   OUT_BATCH(0);
-   const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;
-
-   /* We are uploading duplicated copies of push constant uniforms for each
-* thread. Although the local id data needs to vary per thread, it won't
-* change for other uniform data. Unfortunately this duplication is
-* required for gen7. As of Haswell, this duplication can be avoided, but
-* this older mechanism with duplicated data continues to work.
-*
-* FINISHME: As of Haswell, we could make use of the
-* INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
-* to only store one copy of uniform data.
-*
-* FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
-* which is described in the GPGPU_WALKER command and in the Broadwell PRM
-* Volume 7: 3D Media GPGPU, under Media GPGPU

[Mesa-dev] [PATCH 1/5] i965/genxml: Mostly style fixes for emit_vertices code.

Several issues were caught on review after the original patch landed.
This commit fixes them.

v2:
   - Fix padding (Topi)
   - Remove .DestinationElementOffset change from this patch (Topi)

Signed-off-by: Rafael Antognolli 
Reviewed-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 42 +++
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index f3a7e72..ef62c5b 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -361,7 +361,7 @@ is_passthru_format(uint32_t format)
 }
 
 UNUSED static int
-genX(uploads_needed)(uint32_t format)
+uploads_needed(uint32_t format)
 {
if (!is_passthru_format(format))
   return 1;
@@ -443,8 +443,8 @@ genX(emit_vertices)(struct brw_context *brw)
 
 #if GEN_GEN >= 8
struct gl_context *ctx = >ctx;
-   bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
-  ctx->Polygon.BackMode != GL_FILL);
+   const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+ctx->Polygon.BackMode != GL_FILL);
 
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
   unsigned vue = brw->vb.nr_enabled;
@@ -513,7 +513,7 @@ genX(emit_vertices)(struct brw_context *brw)
   struct brw_vertex_element *input = brw->vb.enabled[i];
   uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
 
-  if (genX(uploads_needed(format)) > 1)
+  if (uploads_needed(format) > 1)
  nr_elements++;
}
 #endif
@@ -526,7 +526,8 @@ genX(emit_vertices)(struct brw_context *brw)
 * a VE loads from them.
 */
if (nr_elements == 0) {
-  dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 1 + 
GENX(VERTEX_ELEMENT_STATE_length));
+  dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
+   1 + GENX(VERTEX_ELEMENT_STATE_length));
   struct GENX(VERTEX_ELEMENT_STATE) elem = {
  .Valid = true,
  .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
@@ -547,11 +548,6 @@ genX(emit_vertices)(struct brw_context *brw)
   uses_draw_params + vs_prog_data->uses_drawid;
 
if (nr_buffers) {
-#if GEN_GEN >= 6
-  assert(nr_buffers <= 33);
-#else
-  assert(nr_buffers <= 17);
-#endif
   assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
 
   dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS),
@@ -564,11 +560,12 @@ genX(emit_vertices)(struct brw_context *brw)
   * half-float and 8 and 16-bit integer formats.  This means that the
   * vertex element may poke over the end of the buffer by 2 bytes.
   */
- unsigned padding =
+ const unsigned padding =
 (GEN_GEN <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
+ const unsigned end = buffer->offset + buffer->size + padding;
  dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
  buffer->offset,
- buffer->offset + buffer->size + 
padding,
+ end,
  buffer->stride,
  buffer->step_rate);
   }
@@ -597,7 +594,7 @@ genX(emit_vertices)(struct brw_context *brw)
 */
 #if GEN_GEN >= 6
assert(nr_elements <= 34);
-   struct brw_vertex_element *gen6_edgeflag_input = NULL;
+   const struct brw_vertex_element *gen6_edgeflag_input = NULL;
 #else
assert(nr_elements <= 18);
 #endif
@@ -606,13 +603,13 @@ genX(emit_vertices)(struct brw_context *brw)
 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements);
unsigned i;
for (i = 0; i < brw->vb.nr_enabled; i++) {
-  struct brw_vertex_element *input = brw->vb.enabled[i];
+  const struct brw_vertex_element *input = brw->vb.enabled[i];
   uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
   uint32_t comp0 = VFCOMP_STORE_SRC;
   uint32_t comp1 = VFCOMP_STORE_SRC;
   uint32_t comp2 = VFCOMP_STORE_SRC;
   uint32_t comp3 = VFCOMP_STORE_SRC;
-  unsigned num_uploads = 1;
+  const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1;
 
 #if GEN_GEN >= 8
   /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
@@ -639,21 +636,16 @@ genX(emit_vertices)(struct brw_context *brw)
   }
 #endif
 
-#if GEN_GEN < 8
-  num_uploads = genX(uploads_needed(format));
-#endif
-
   for (unsigned c = 0; c < num_uploads; c++) {
- uint32_t upload_format = GEN_GEN >= 8 ? format :
+ const uint32_t upload_format = GEN_GEN >= 8 ? format :
 downsize_format_if_needed(format, c);
  /* If we need more that one upload, the offset stride would be 128

[Mesa-dev] [PATCH 5/5] i965: Port 3DSTATE_VF_TOPOLOGY on gen8+ to genxml.

With this last state ported, we can get rid of gen8_draw_upload.c.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/Makefile.sources|  1 -
 src/mesa/drivers/dri/i965/brw_state.h |  1 -
 src/mesa/drivers/dri/i965/gen8_draw_upload.c  | 53 ---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 22 ++-
 4 files changed, 21 insertions(+), 56 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 9e567cb..0221a97 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -92,7 +92,6 @@ i965_FILES = \
gen7_urb.c \
gen7_wm_surface_state.c \
gen8_depth_state.c \
-   gen8_draw_upload.c \
gen8_multisample_state.c \
gen8_surface_state.c \
hsw_queryobj.c \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index a9c3bf2..2682286 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -113,7 +113,6 @@ extern const struct brw_tracked_state gen7_l3_state;
 extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen8_pma_fix;
-extern const struct brw_tracked_state gen8_vf_topology;
 extern const struct brw_tracked_state brw_cs_work_groups_surface;
 
 static inline bool
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index 01c3ac3..e69de29 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -1,53 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/macros.h"
-
-#include "brw_draw.h"
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "intel_batchbuffer.h"
-#include "intel_buffer_objects.h"
-
-static void
-gen8_emit_vf_topology(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2));
-   OUT_BATCH(brw->primitive);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen8_vf_topology = {
-   .dirty = {
-  .mesa = 0,
-  .brw = BRW_NEW_BLORP |
- BRW_NEW_PRIMITIVE,
-   },
-   .emit = gen8_emit_vf_topology,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 96cad92..eef8fa7 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -3991,7 +3991,27 @@ static const struct brw_tracked_state genX(ps_blend) = {
},
.emit = genX(upload_ps_blend)
 };
+#endif
+
+/* -- */
+
+#if GEN_GEN >= 8
+static void
+genX(emit_vf_topology)(struct brw_context *brw)
+{
+   brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) {
+  vftopo.PrimitiveTopologyType = brw->primitive;
+   }
+}
 
+static const struct brw_tracked_state genX(vf_topology) = {
+   .dirty = {
+  .mesa = 0,
+  .brw = BRW_NEW_BLORP |
+ BRW_NEW_PRIMITIVE,
+   },
+   .emit = genX(emit_vf_topology),
+};
 #endif
 
 /* -- */
@@ -4329,7 +4349,7 @@ genX(init_atoms)(struct brw_context *brw)
 
   (drawing_rect),
 
-  _vf_topology,
+  (vf_topology),
 
   _indices,
   (index_buffer),
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] i965: Port 3DSTATE_INDEX_BUFFER to genxml.

Also make the brw_get_index_type() function not shift its return, since that
is genxml's job now.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_context.h   |  6 ++--
 src/mesa/drivers/dri/i965/brw_draw_upload.c   | 39 -
 src/mesa/drivers/dri/i965/brw_state.h |  1 -
 src/mesa/drivers/dri/i965/gen8_draw_upload.c  | 27 --
 src/mesa/drivers/dri/i965/genX_state_upload.c | 41 ---
 5 files changed, 40 insertions(+), 74 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 723c5d6..f833c3a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1402,10 +1402,10 @@ unsigned brw_get_vertex_surface_type(struct brw_context 
*brw,
 static inline unsigned
 brw_get_index_type(unsigned index_size)
 {
-   /* The hw needs 0x, 0x0100, and 0x0200 for ubyte, ushort,
-* and uint, respectively.
+   /* The hw needs 0x00, 0x01, and 0x02 for ubyte, ushort, and uint,
+* respectively.
 */
-   return (index_size >> 1) << 8;
+   return index_size >> 1;
 }
 
 void brw_prepare_vertices(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 8b30151..2ec9a01 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -791,42 +791,3 @@ const struct brw_tracked_state brw_indices = {
},
.emit = brw_upload_indices,
 };
-
-static void
-brw_emit_index_buffer(struct brw_context *brw)
-{
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   GLuint cut_index_setting;
-
-   if (index_buffer == NULL)
-  return;
-
-   if (brw->prim_restart.enable_cut_index && !brw->is_haswell) {
-  cut_index_setting = BRW_CUT_INDEX_ENABLE;
-   } else {
-  cut_index_setting = 0;
-   }
-
-   BEGIN_BATCH(3);
-   OUT_BATCH(CMD_INDEX_BUFFER << 16 |
- cut_index_setting |
- brw_get_index_type(index_buffer->index_size) |
- 1);
-   OUT_RELOC(brw->ib.bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- 0);
-   OUT_RELOC(brw->ib.bo,
- I915_GEM_DOMAIN_VERTEX, 0,
-brw->ib.size - 1);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state brw_index_buffer = {
-   .dirty = {
-  .mesa = 0,
-  .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_INDEX_BUFFER,
-   },
-   .emit = brw_emit_index_buffer,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 90f7677..a9c3bf2 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -112,7 +112,6 @@ extern const struct brw_tracked_state gen7_depthbuffer;
 extern const struct brw_tracked_state gen7_l3_state;
 extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_urb;
-extern const struct brw_tracked_state gen8_index_buffer;
 extern const struct brw_tracked_state gen8_pma_fix;
 extern const struct brw_tracked_state gen8_vf_topology;
 extern const struct brw_tracked_state brw_cs_work_groups_surface;
diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index 8db160b..01c3ac3 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -35,33 +35,6 @@
 #include "intel_buffer_objects.h"
 
 static void
-gen8_emit_index_buffer(struct brw_context *brw)
-{
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
-
-   if (index_buffer == NULL)
-  return;
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(CMD_INDEX_BUFFER << 16 | (5 - 2));
-   OUT_BATCH(brw_get_index_type(index_buffer->index_size) | mocs_wb);
-   OUT_RELOC64(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
-   OUT_BATCH(brw->ib.size);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen8_index_buffer = {
-   .dirty = {
-  .mesa = 0,
-  .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_INDEX_BUFFER,
-   },
-   .emit = gen8_emit_index_buffer,
-};
-
-static void
 gen8_emit_vf_topology(struct brw_context *brw)
 {
BEGIN_BATCH(2);
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index ac5c879..96cad92 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -830,6 +830,39 @@ static const struct brw_tracked_state genX(vertices) = {
.emit = genX(emit_vertices),
 };
 
+static void
+genX(emit_index_buffer)(struct brw_context *brw)
+{
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+
+   if (index_buffer == NULL)
+  return;
+
+   brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
+#if GEN_GEN <

Re: [Mesa-dev] [PATCH 07/13] mesa: add error version of get_texture_for_framebuffer()

Timothy Arceri  writes:

> This is a step towards KHR_no_error support.
> ---
>  src/mesa/main/fbobject.c | 26 ++
>  1 file changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
> index fb8fbe9..85744e3 100644
> --- a/src/mesa/main/fbobject.c
> +++ b/src/mesa/main/fbobject.c
> @@ -2903,33 +2903,43 @@ reuse_framebuffer_texture_attachment(struct 
> gl_framebuffer *fb,
> _mesa_reference_renderbuffer(_att->Renderbuffer, 
> src_att->Renderbuffer);
> dst_att->Type = src_att->Type;
> dst_att->Complete = src_att->Complete;
> dst_att->TextureLevel = src_att->TextureLevel;
> dst_att->CubeMapFace = src_att->CubeMapFace;
> dst_att->Zoffset = src_att->Zoffset;
> dst_att->Layered = src_att->Layered;
>  }
>  
>  
> +static struct gl_texture_object *
> +get_texture_for_framebuffer(struct gl_context *ctx, GLuint texture)
> +{
> +   if (!texture)
> +  return NULL;
> +
> +   return _mesa_lookup_texture(ctx, texture);;
> +}

Stray ';'


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 18/21] anv: Implement support for exporting semaphores as FENCE_FD

On Tue, May 9, 2017 at 4:49 PM, Chad Versace 
wrote:

> I learned a lot by reviewing this patch. Before reviewing it, some parts
> of the external_semaphore spec still confused me. But reviewing this
> forced me to really learn the behavior of external semaphores.
>
> Other than some small security nits, the only real issue I found was
> the error behavior of vkQueueSubmit. Maybe I'm wrong and your error
> behavior is fine, though. Let's discuss.
>
> A little complaint against the current spec... The patch was hard to
> review because the 1.0.49 has incorrect language regarding external
> semaphores. I had to go review the unreleased spec to get to the truth.
> Anyway, on with review...
>
> On Fri 14 Apr 2017, Jason Ekstrand wrote:
> > ---
> >  src/intel/vulkan/anv_batch_chain.c | 96 ++
> ++--
> >  src/intel/vulkan/anv_device.c  | 25 ++
> >  src/intel/vulkan/anv_gem.c | 36 ++
> >  src/intel/vulkan/anv_private.h | 24 +++---
> >  src/intel/vulkan/anv_queue.c   | 73 +++--
> >  5 files changed, 240 insertions(+), 14 deletions(-)
>
> > diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> > index 0529f22..ec37c81 100644
> > --- a/src/intel/vulkan/anv_batch_chain.c
> > +++ b/src/intel/vulkan/anv_batch_chain.c
> > @@ -1387,6 +1387,23 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf
> *execbuf,
> > return VK_SUCCESS;
> >  }
> >
> > +static void
> > +setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device
> *device)
> > +{
> > +   anv_execbuf_add_bo(execbuf, >trivial_batch_bo, NULL, 0,
> > +  >alloc);
> > +
> > +   execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
> > +  .buffers_ptr = (uintptr_t) execbuf->objects,
> > +  .buffer_count = execbuf->bo_count,
> > +  .batch_start_offset = 0,
> > +  .batch_len = 8, /* GEN8_MI_BATCH_BUFFER_END and NOOP */
>
> Instead of hard-coding a magic number here, I think
> .batch_len = device->trivial_batch_bo.next - device->trivial_batch_bo.
> start,
> is better. With that, the comment never becomes stale.
>
> > +  .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
> > +  .rsvd1 = device->context_id,
> > +  .rsvd2 = 0,
> > +   };
> > +}
> > +
> >  VkResult
> >  anv_cmd_buffer_execbuf(struct anv_device *device,
> > struct anv_cmd_buffer *cmd_buffer,
> > @@ -1398,11 +1415,13 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
> > struct anv_execbuf execbuf;
> > anv_execbuf_init();
> >
> > +   int in_fence = -1;
> > VkResult result = VK_SUCCESS;
> > for (uint32_t i = 0; i < num_in_semaphores; i++) {
> >ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
> > -  assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
> > -  struct anv_semaphore_impl *impl = >permanent;
> > +  struct anv_semaphore_impl *impl =
> > + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
> > + >temporary : >permanent;
> >
> >switch (impl->type) {
> >case ANV_SEMAPHORE_TYPE_BO:
> > @@ -1411,13 +1430,42 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
> >   if (result != VK_SUCCESS)
> >  return result;
> >   break;
> > +
> > +  case ANV_SEMAPHORE_TYPE_SYNC_FILE:
> > + if (in_fence == -1) {
> > +in_fence = impl->fd;
> > + } else {
> > +int merge = anv_gem_sync_file_merge(device, in_fence,
> impl->fd);
> > +if (merge == -1)
> > +   return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX);
>
> Because we immediately close the semaphore's fd, the spec does not allow
> us to return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX here. We must either
> defer closing the fd until immediately before VK_SUCCESS, or return
> VK_ERROR_DEVICE_LOST.
>
> From the 1.0.49 spec:
>
> If vkQueueSubmit fails, it may return VK_ERROR_OUT_OF_HOST_MEMORY or
> VK_ERROR_OUT_OF_DEVICE_MEMORY.  If it does, the implementation must
> ensure that the state and contents of any resources or
> synchronization primitives referenced by the submitted command
> buffers and any semaphores referenced by pSubmits is unaffected by
> the call or its failure. If vkQueueSubmit fails in such a way that
> the implementation can not make that guarantee, the implementation
> must return VK_ERROR_DEVICE_LOST
>

Yes, I wrote that text. :-)  anv_QueueSubmit already turns all errors into
VK_DEVICE_LOST so we can return as descriptive an error as we want here.
Really, we should probably use the sync_file_info ioctl on import to see if
it's a sync file and fail then.


>
> > +
> > +close(impl->fd);
> > +close(in_fence);
> > +in_fence = merge;
> > + }
> > +
> > + impl->fd = -1;
> > + break;
> > +
> >default:
> >   break;
> >}

Re: [Mesa-dev] [PATCH 18/21] anv: Implement support for exporting semaphores as FENCE_FD

2017-05-09 Thread Chad Versace

I learned a lot by reviewing this patch. Before reviewing it, some parts
of the external_semaphore spec still confused me. But reviewing this
forced me to really learn the behavior of external semaphores.

Other than some small security nits, the only real issue I found was
the error behavior of vkQueueSubmit. Maybe I'm wrong and your error
behavior is fine, though. Let's discuss.

A little complaint against the current spec... The patch was hard to
review because the 1.0.49 has incorrect language regarding external
semaphores. I had to go review the unreleased spec to get to the truth.
Anyway, on with review...

On Fri 14 Apr 2017, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_batch_chain.c | 96 
> --
>  src/intel/vulkan/anv_device.c  | 25 ++
>  src/intel/vulkan/anv_gem.c | 36 ++
>  src/intel/vulkan/anv_private.h | 24 +++---
>  src/intel/vulkan/anv_queue.c   | 73 +++--
>  5 files changed, 240 insertions(+), 14 deletions(-)

> diff --git a/src/intel/vulkan/anv_batch_chain.c 
> b/src/intel/vulkan/anv_batch_chain.c
> index 0529f22..ec37c81 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -1387,6 +1387,23 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf 
> *execbuf,
> return VK_SUCCESS;
>  }
>  
> +static void
> +setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
> +{
> +   anv_execbuf_add_bo(execbuf, >trivial_batch_bo, NULL, 0,
> +  >alloc);
> +
> +   execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
> +  .buffers_ptr = (uintptr_t) execbuf->objects,
> +  .buffer_count = execbuf->bo_count,
> +  .batch_start_offset = 0,
> +  .batch_len = 8, /* GEN8_MI_BATCH_BUFFER_END and NOOP */

Instead of hard-coding a magic number here, I think
.batch_len = device->trivial_batch_bo.next - device->trivial_batch_bo.start,
is better. With that, the comment never becomes stale.

> +  .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
> +  .rsvd1 = device->context_id,
> +  .rsvd2 = 0,
> +   };
> +}
> +
>  VkResult
>  anv_cmd_buffer_execbuf(struct anv_device *device,
> struct anv_cmd_buffer *cmd_buffer,
> @@ -1398,11 +1415,13 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
> struct anv_execbuf execbuf;
> anv_execbuf_init();
>  
> +   int in_fence = -1;
> VkResult result = VK_SUCCESS;
> for (uint32_t i = 0; i < num_in_semaphores; i++) {
>ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
> -  assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
> -  struct anv_semaphore_impl *impl = >permanent;
> +  struct anv_semaphore_impl *impl =
> + semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
> + >temporary : >permanent;
>  
>switch (impl->type) {
>case ANV_SEMAPHORE_TYPE_BO:
> @@ -1411,13 +1430,42 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
>   if (result != VK_SUCCESS)
>  return result;
>   break;
> +
> +  case ANV_SEMAPHORE_TYPE_SYNC_FILE:
> + if (in_fence == -1) {
> +in_fence = impl->fd;
> + } else {
> +int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd);
> +if (merge == -1)
> +   return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX);

Because we immediately close the semaphore's fd, the spec does not allow
us to return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX here. We must either
defer closing the fd until immediately before VK_SUCCESS, or return
VK_ERROR_DEVICE_LOST.

From the 1.0.49 spec:

If vkQueueSubmit fails, it may return VK_ERROR_OUT_OF_HOST_MEMORY or
VK_ERROR_OUT_OF_DEVICE_MEMORY.  If it does, the implementation must
ensure that the state and contents of any resources or
synchronization primitives referenced by the submitted command
buffers and any semaphores referenced by pSubmits is unaffected by
the call or its failure. If vkQueueSubmit fails in such a way that
the implementation can not make that guarantee, the implementation
must return VK_ERROR_DEVICE_LOST

> +
> +close(impl->fd);
> +close(in_fence);
> +in_fence = merge;
> + }
> +
> + impl->fd = -1;
> + break;
> +
>default:
>   break;
>}
> +
> +  /* Waiting on a semaphore with temporary state implicitly resets it 
> back
> +   * to the permanent state.
> +   */
> +  if (semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE) {
> + assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
> + semaphore->temporary.type = ANV_SEMAPHORE_TYPE_NONE;
> +  }
> }
>  
> +   bool need_out_fence = false;
> for (uint32_t i = 0; i < num_out_semaphores; i++) {
>ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
> +  /* Out

Re: [Mesa-dev] [PATCH 2/4] i965: Only #if...#endif a single function or related section at a time.

On Tue, May 09, 2017 at 04:10:27PM -0700, Kenneth Graunke wrote:
> Previously we guarded large swathes of code with #if GEN ... #endif
> blocks.  This made it difficult to see which generations include what.
> 
> This patch splits up the #if..#endif sections so they surround a small
> section of code - usually a single function/atom, or sometimes a group
> of related functions.  It should make the code easier to work on.

Reviewed-by: Rafael Antognolli 

> ---
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 41 
> +--
>  1 file changed, 38 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index ebfcdd46e04..f3a7e72a373 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -1088,9 +1088,11 @@ genX(calculate_attr_overrides)(const struct 
> brw_context *brw,
>  */
> *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
>  }
> +#endif
>  
>  /* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_depth_stencil_state)(struct brw_context *brw)
>  {
> @@ -1181,9 +1183,11 @@ static const struct brw_tracked_state 
> genX(depth_stencil_state) = {
> },
> .emit = genX(upload_depth_stencil_state),
>  };
> +#endif
>  
>  /* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_clip_state)(struct brw_context *brw)
>  {
> @@ -1311,9 +1315,11 @@ static const struct brw_tracked_state genX(clip_state) 
> = {
> },
> .emit = genX(upload_clip_state),
>  };
> +#endif
>  
>  /* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_sf)(struct brw_context *brw)
>  {
> @@ -1502,9 +1508,11 @@ static const struct brw_tracked_state genX(sf_state) = 
> {
> },
> .emit = genX(upload_sf),
>  };
> +#endif
>  
>  /* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_wm)(struct brw_context *brw)
>  {
> @@ -1683,6 +1691,7 @@ static const struct brw_tracked_state genX(wm_state) = {
> },
> .emit = genX(upload_wm),
>  };
> +#endif
>  
>  /* -- */
>  
> @@ -1709,7 +1718,7 @@ static const struct brw_tracked_state genX(wm_state) = {
> pkt.StatisticsEnable = true;   \
> pkt.Enable   = true;
>  
> -
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_vs_state)(struct brw_context *brw)
>  {
> @@ -1801,9 +1810,11 @@ static const struct brw_tracked_state genX(vs_state) = 
> {
> },
> .emit = genX(upload_vs_state),
>  };
> +#endif
>  
>  /* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  brw_calculate_guardband_size(const struct gen_device_info *devinfo,
>   uint32_t fb_width, uint32_t fb_height,
> @@ -2002,7 +2013,11 @@ static const struct brw_tracked_state 
> genX(sf_clip_viewport) = {
> },
> .emit = genX(upload_sf_clip_viewport),
>  };
> +#endif
> +
> +/* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_gs_state)(struct brw_context *brw)
>  {
> @@ -2174,12 +2189,14 @@ static const struct brw_tracked_state genX(gs_state) 
> = {
> },
> .emit = genX(upload_gs_state),
>  };
> +#endif
>  
>  /* -- */
>  
>  #define blend_factor(x) brw_translate_blend_factor(x)
>  #define blend_eqn(x) brw_translate_blend_equation(x)
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_blend_state)(struct brw_context *brw)
>  {
> @@ -2389,9 +2406,11 @@ static const struct brw_tracked_state 
> genX(blend_state) = {
> },
> .emit = genX(upload_blend_state),
>  };
> +#endif
>  
>  /* -- */
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_scissor_state)(struct brw_context *brw)
>  {
> @@ -2473,6 +2492,9 @@ static const struct brw_tracked_state 
> genX(scissor_state) = {
> },
> .emit = genX(upload_scissor_state),
>  };
> +#endif
> +
> +/* -- */
>  
>  #if GEN_GEN >= 7
>  UNUSED static const uint32_t push_constant_opcodes[] = {
> @@ -2513,6 +2535,7 @@ upload_constant_state(struct brw_context *brw,
>  }
>  #endif
>  
> +#if GEN_GEN >= 6
>  static void
>  genX(upload_vs_push_constants)(struct brw_context *brw)
>  {
> @@ -2611,9 +2634,11 @@ static const struct brw_tracked_state 
> genX(wm_push_constants) = {
> },
> .emit = genX(upload_wm_push_constants),
>  };
> +#endif
>  
>  /*

Re: [Mesa-dev] [PATCH 1/4] i965: Turn brw_get_line_width_float() into brw_get_line_width().

On Tue, May 09, 2017 at 04:10:26PM -0700, Kenneth Graunke wrote:
> Drop the old brw_get_line_width() helper which return the unsigned
> fixed-point encoding of the line width - it's been dead since the
> conversion to GENXML (which does the encoding for us).
> 
> Then rename brw_get_line_width_float() to the shorter name.

Hmm... I should have looked for other users of this function before creating
the _float() version.

Reviewed-by: Rafael Antognolli 

> ---
>  src/mesa/drivers/dri/i965/brw_util.h  | 10 +-
>  src/mesa/drivers/dri/i965/genX_state_upload.c |  6 +++---
>  2 files changed, 4 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_util.h 
> b/src/mesa/drivers/dri/i965/brw_util.h
> index 7395d345a57..814286007de 100644
> --- a/src/mesa/drivers/dri/i965/brw_util.h
> +++ b/src/mesa/drivers/dri/i965/brw_util.h
> @@ -41,7 +41,7 @@ extern GLuint brw_translate_blend_equation( GLenum mode );
>  extern GLenum brw_fix_xRGB_alpha(GLenum function);
>  
>  static inline float
> -brw_get_line_width_float(struct brw_context *brw)
> +brw_get_line_width(struct brw_context *brw)
>  {
> /* From the OpenGL 4.4 spec:
>  *
> @@ -72,12 +72,4 @@ brw_get_line_width_float(struct brw_context *brw)
> return line_width;
>  }
>  
> -static inline uint32_t
> -brw_get_line_width(struct brw_context *brw)
> -{
> -   float line_width = brw_get_line_width_float(brw);
> -
> -   return U_FIXED(line_width, 7);
> -}
> -
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 77b9f93b5e3..ebfcdd46e04 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -1405,11 +1405,11 @@ genX(upload_sf)(struct brw_context *brw)
>/* _NEW_LINE */
>  #if GEN_GEN == 8
>if (brw->is_cherryview)
> - sf.CHVLineWidth = brw_get_line_width_float(brw);
> + sf.CHVLineWidth = brw_get_line_width(brw);
>else
> - sf.LineWidth = brw_get_line_width_float(brw);
> + sf.LineWidth = brw_get_line_width(brw);
>  #else
> -  sf.LineWidth = brw_get_line_width_float(brw);
> +  sf.LineWidth = brw_get_line_width(brw);
>  #endif
>  
>if (ctx->Line.SmoothFlag) {
> -- 
> 2.12.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] i965: Switch BRW_NEW_CURBE_OFFSETS to BRW_NEW_PUSH_CONSTANT_ALLOCATION.

The BRW_NEW_CURBE_OFFSETS dirty bit is signalled when changing the
partitioning of the Constant Buffer URB section between the various
shader stages, on Gen4-5.

BRW_NEW_PUSH_CONSTANT_ALLOCATION is basically the same thing on Gen7+.

So, save a bit, and use the new name.
---
 src/mesa/drivers/dri/i965/brw_clip_state.c   |  4 ++--
 src/mesa/drivers/dri/i965/brw_context.h  |  5 +
 src/mesa/drivers/dri/i965/brw_curbe.c| 10 +-
 src/mesa/drivers/dri/i965/brw_gs_state.c |  2 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c |  1 -
 src/mesa/drivers/dri/i965/brw_urb.c  |  2 +-
 src/mesa/drivers/dri/i965/brw_vs_state.c |  4 ++--
 src/mesa/drivers/dri/i965/brw_wm_state.c |  4 ++--
 8 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c 
b/src/mesa/drivers/dri/i965/brw_clip_state.c
index d5fe2b547fa..35ccd2fe74f 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -86,7 +86,7 @@ brw_upload_clip_unit(struct brw_context *brw)
clip->thread3.const_urb_entry_read_length =
   brw->clip.prog_data->curb_read_length;
 
-   /* BRW_NEW_CURBE_OFFSETS */
+   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
clip->thread3.dispatch_grf_start_reg = 1;
clip->thread3.urb_entry_read_offset = 0;
@@ -171,7 +171,7 @@ const struct brw_tracked_state brw_clip_unit = {
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_CLIP_PROG_DATA |
-   BRW_NEW_CURBE_OFFSETS |
+   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_URB_FENCE,
},
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 723c5d6926d..100bd74f214 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -173,7 +173,6 @@ enum brw_state_id {
BRW_STATE_GEOMETRY_PROGRAM,
BRW_STATE_TESS_PROGRAMS,
BRW_STATE_VERTEX_PROGRAM,
-   BRW_STATE_CURBE_OFFSETS,
BRW_STATE_REDUCED_PRIMITIVE,
BRW_STATE_PATCH_PRIMITIVE,
BRW_STATE_PRIMITIVE,
@@ -259,7 +258,6 @@ enum brw_state_id {
 #define BRW_NEW_GEOMETRY_PROGRAM(1ull << BRW_STATE_GEOMETRY_PROGRAM)
 #define BRW_NEW_TESS_PROGRAMS   (1ull << BRW_STATE_TESS_PROGRAMS)
 #define BRW_NEW_VERTEX_PROGRAM  (1ull << BRW_STATE_VERTEX_PROGRAM)
-#define BRW_NEW_CURBE_OFFSETS   (1ull << BRW_STATE_CURBE_OFFSETS)
 #define BRW_NEW_REDUCED_PRIMITIVE   (1ull << BRW_STATE_REDUCED_PRIMITIVE)
 #define BRW_NEW_PATCH_PRIMITIVE (1ull << BRW_STATE_PATCH_PRIMITIVE)
 #define BRW_NEW_PRIMITIVE   (1ull << BRW_STATE_PRIMITIVE)
@@ -955,8 +953,7 @@ struct brw_context
} urb;
 
 
-   /* BRW_NEW_CURBE_OFFSETS:
-*/
+   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
struct {
   GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
   GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c 
b/src/mesa/drivers/dri/i965/brw_curbe.c
index 7d58efb622f..139a3bcdf86 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -41,7 +41,7 @@
  * quickly at thread setup time.  Each individual fixed function unit's state
  * (brw_vs_state.c for example) tells the hardware which subset of the CURBE
  * it wants in its register space, and we calculate those areas here under the
- * BRW_NEW_CURBE_OFFSETS state flag.  The brw_urb.c allocation will control
+ * BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag.  The brw_urb.c allocation will 
control
  * how many CURBEs can be loaded into the hardware at once before a pipeline
  * stall occurs at CMD_CONST_BUFFER time.
  *
@@ -135,7 +135,7 @@ static void calculate_curbe_offsets( struct brw_context 
*brw )
  brw->curbe.vs_start,
  brw->curbe.vs_size );
 
-  brw->ctx.NewDriverState |= BRW_NEW_CURBE_OFFSETS;
+  brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
}
 }
 
@@ -196,7 +196,7 @@ static void
 brw_upload_constant_buffer(struct brw_context *brw)
 {
struct gl_context *ctx = >ctx;
-   /* BRW_NEW_CURBE_OFFSETS */
+   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
const GLuint sz = brw->curbe.total_size;
const GLuint bufsz = sz * 16 * sizeof(GLfloat);
gl_constant_value *buf;
@@ -216,7 +216,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
if (brw->curbe.wm_size) {
   _mesa_load_state_parameters(ctx, brw->fragment_program->Parameters);
 
-  /* BRW_NEW_CURBE_OFFSETS */
+  /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
   GLuint offset = brw->curbe.wm_start * 16;
 
   /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
@@ -338,7 +338,7 @@ const struct brw_tracked_state brw_constant_buffer = {
   .mesa = _NEW_PROGRAM_CONSTANTS,

[Mesa-dev] [PATCH 3/4] i965: Drop BRW_NEW_PUSH_CONSTANT_ALLOCATION from Gen6 code.

Gen6 doesn't have a configurable push constant region.  This is only
used on Gen7+.
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index f3a7e72a373..6619d4d7ab1 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1685,9 +1685,7 @@ static const struct brw_tracked_state genX(wm_state) = {
(GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
   .brw   = BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
-   (GEN_GEN < 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-  BRW_NEW_BATCH
-: BRW_NEW_CONTEXT),
+   (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
},
.emit = genX(upload_wm),
 };
@@ -1804,9 +1802,7 @@ static const struct brw_tracked_state genX(vs_state) = {
BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
BRW_NEW_VS_PROG_DATA |
-   (GEN_GEN < 7 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-  BRW_NEW_VERTEX_PROGRAM
-: 0),
+   (GEN_GEN < 7 ? BRW_NEW_VERTEX_PROGRAM : 0),
},
.emit = genX(upload_vs_state),
 };
@@ -2183,9 +2179,7 @@ static const struct brw_tracked_state genX(gs_state) = {
BRW_NEW_CONTEXT |
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_GS_PROG_DATA |
-   (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA |
-  BRW_NEW_PUSH_CONSTANT_ALLOCATION
-: 0),
+   (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0),
},
.emit = genX(upload_gs_state),
 };
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] i965: Turn brw_get_line_width_float() into brw_get_line_width().

Drop the old brw_get_line_width() helper which return the unsigned
fixed-point encoding of the line width - it's been dead since the
conversion to GENXML (which does the encoding for us).

Then rename brw_get_line_width_float() to the shorter name.
---
 src/mesa/drivers/dri/i965/brw_util.h  | 10 +-
 src/mesa/drivers/dri/i965/genX_state_upload.c |  6 +++---
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_util.h 
b/src/mesa/drivers/dri/i965/brw_util.h
index 7395d345a57..814286007de 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -41,7 +41,7 @@ extern GLuint brw_translate_blend_equation( GLenum mode );
 extern GLenum brw_fix_xRGB_alpha(GLenum function);
 
 static inline float
-brw_get_line_width_float(struct brw_context *brw)
+brw_get_line_width(struct brw_context *brw)
 {
/* From the OpenGL 4.4 spec:
 *
@@ -72,12 +72,4 @@ brw_get_line_width_float(struct brw_context *brw)
return line_width;
 }
 
-static inline uint32_t
-brw_get_line_width(struct brw_context *brw)
-{
-   float line_width = brw_get_line_width_float(brw);
-
-   return U_FIXED(line_width, 7);
-}
-
 #endif
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 77b9f93b5e3..ebfcdd46e04 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1405,11 +1405,11 @@ genX(upload_sf)(struct brw_context *brw)
   /* _NEW_LINE */
 #if GEN_GEN == 8
   if (brw->is_cherryview)
- sf.CHVLineWidth = brw_get_line_width_float(brw);
+ sf.CHVLineWidth = brw_get_line_width(brw);
   else
- sf.LineWidth = brw_get_line_width_float(brw);
+ sf.LineWidth = brw_get_line_width(brw);
 #else
-  sf.LineWidth = brw_get_line_width_float(brw);
+  sf.LineWidth = brw_get_line_width(brw);
 #endif
 
   if (ctx->Line.SmoothFlag) {
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] i965: Only #if...#endif a single function or related section at a time.

Previously we guarded large swathes of code with #if GEN ... #endif
blocks.  This made it difficult to see which generations include what.

This patch splits up the #if..#endif sections so they surround a small
section of code - usually a single function/atom, or sometimes a group
of related functions.  It should make the code easier to work on.
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 41 +--
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index ebfcdd46e04..f3a7e72a373 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1088,9 +1088,11 @@ genX(calculate_attr_overrides)(const struct brw_context 
*brw,
 */
*urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
 }
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static void
 genX(upload_depth_stencil_state)(struct brw_context *brw)
 {
@@ -1181,9 +1183,11 @@ static const struct brw_tracked_state 
genX(depth_stencil_state) = {
},
.emit = genX(upload_depth_stencil_state),
 };
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static void
 genX(upload_clip_state)(struct brw_context *brw)
 {
@@ -1311,9 +1315,11 @@ static const struct brw_tracked_state genX(clip_state) = 
{
},
.emit = genX(upload_clip_state),
 };
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static void
 genX(upload_sf)(struct brw_context *brw)
 {
@@ -1502,9 +1508,11 @@ static const struct brw_tracked_state genX(sf_state) = {
},
.emit = genX(upload_sf),
 };
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static void
 genX(upload_wm)(struct brw_context *brw)
 {
@@ -1683,6 +1691,7 @@ static const struct brw_tracked_state genX(wm_state) = {
},
.emit = genX(upload_wm),
 };
+#endif
 
 /* -- */
 
@@ -1709,7 +1718,7 @@ static const struct brw_tracked_state genX(wm_state) = {
pkt.StatisticsEnable = true;   \
pkt.Enable   = true;
 
-
+#if GEN_GEN >= 6
 static void
 genX(upload_vs_state)(struct brw_context *brw)
 {
@@ -1801,9 +1810,11 @@ static const struct brw_tracked_state genX(vs_state) = {
},
.emit = genX(upload_vs_state),
 };
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static void
 brw_calculate_guardband_size(const struct gen_device_info *devinfo,
  uint32_t fb_width, uint32_t fb_height,
@@ -2002,7 +2013,11 @@ static const struct brw_tracked_state 
genX(sf_clip_viewport) = {
},
.emit = genX(upload_sf_clip_viewport),
 };
+#endif
+
+/* -- */
 
+#if GEN_GEN >= 6
 static void
 genX(upload_gs_state)(struct brw_context *brw)
 {
@@ -2174,12 +2189,14 @@ static const struct brw_tracked_state genX(gs_state) = {
},
.emit = genX(upload_gs_state),
 };
+#endif
 
 /* -- */
 
 #define blend_factor(x) brw_translate_blend_factor(x)
 #define blend_eqn(x) brw_translate_blend_equation(x)
 
+#if GEN_GEN >= 6
 static void
 genX(upload_blend_state)(struct brw_context *brw)
 {
@@ -2389,9 +2406,11 @@ static const struct brw_tracked_state genX(blend_state) 
= {
},
.emit = genX(upload_blend_state),
 };
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static void
 genX(upload_scissor_state)(struct brw_context *brw)
 {
@@ -2473,6 +2492,9 @@ static const struct brw_tracked_state genX(scissor_state) 
= {
},
.emit = genX(upload_scissor_state),
 };
+#endif
+
+/* -- */
 
 #if GEN_GEN >= 7
 UNUSED static const uint32_t push_constant_opcodes[] = {
@@ -2513,6 +2535,7 @@ upload_constant_state(struct brw_context *brw,
 }
 #endif
 
+#if GEN_GEN >= 6
 static void
 genX(upload_vs_push_constants)(struct brw_context *brw)
 {
@@ -2611,9 +2634,11 @@ static const struct brw_tracked_state 
genX(wm_push_constants) = {
},
.emit = genX(upload_wm_push_constants),
 };
+#endif
 
 /* -- */
 
+#if GEN_GEN >= 6
 static unsigned
 genX(determine_sample_mask)(struct brw_context *brw)
 {
@@ -2646,8 +2671,6 @@ genX(determine_sample_mask)(struct brw_context *brw)
}
 }
 
-/* -- */
-
 static void
 genX(emit_3dstate_multisample2)(struct brw_context *brw,
 unsigned num_samples)
@@ -2701,9

Re: [Mesa-dev] [PATCH v2 1/2] i965: Use helper function for modifier -> tiling

2017-05-09 Thread Dave Airlie

On 6 May 2017 at 02:35, Daniel Stone  wrote:
> Hi Emil,
>
> On 4 May 2017 at 15:14, Emil Velikov  wrote:
>> On 4 May 2017 at 14:43, Daniel Stone  wrote:
>>> IOW, there is no change as far as I can see, but perhaps for the
>>> meantime, we could use an unreachable() at the bottom of
>>> modifier_to_tiling(). Would that help?
>>>
>> Yes the default statement and return I915_TILING_NONE in
>> modifier_to_tiling() seem unreachable.
>>
>> unreachable or assert - I am leaning slightly towards the latter since
>> it will catch fallouts in the future.
>> I'll leave the decision to you and the Intel devs.
>>
>> In either case:
>> Reviewed-by: Emil Velikov 
>
> Cool, I've pushed these both now with an assert - thanks!

unreachable() is causing compiler errors here.

Need something inside the ().

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: use u_bit_scan() in update_program_texture_state()

2017-05-09 Thread Samuel Pitoiset

The check in update_single_program_texture() can also be
removed.

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/main/texstate.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 830b230b5d..c11a2e95fe 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -620,9 +620,6 @@ update_single_program_texture(struct gl_context *ctx, 
struct gl_program *prog,
struct gl_sampler_object *sampler;
int unit;
 
-   if (!(prog->SamplersUsed & (1 << s)))
-  return NULL;
-
unit = prog->SamplerUnits[s];
texUnit = >Texture.Unit[unit];
 
@@ -676,16 +673,16 @@ update_program_texture_state(struct gl_context *ctx, 
struct gl_program **prog,
int i;
 
for (i = 0; i < MESA_SHADER_STAGES; i++) {
+  GLbitfield mask;
   int s;
 
   if (!prog[i])
  continue;
 
-  /* We can't only do the shifting trick as the loop condition because if
-   * sampler 31 is active, the next iteration tries to shift by 32, which 
is
-   * undefined.
-   */
-  for (s = 0; s < MAX_SAMPLERS && (1 << s) <= prog[i]->SamplersUsed; s++) {
+  mask = prog[i]->SamplersUsed;
+
+  while (mask) {
+ const int s = u_bit_scan();
  struct gl_texture_object *texObj;
 
  texObj = update_single_program_texture(ctx, prog[i], s);
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] renderonly: Initialize fields of struct winsys_handle.

2017-05-09 Thread Christian Gmeiner

2017-05-09 20:24 GMT+02:00 Eric Anholt :
> vc4 was rejecting renderonly's import, because the offset field was
> nonzero.

Reviewed-by: Christian Gmeiner 

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Embed the shader_info in the nir_shader again

On Monday, May 8, 2017 10:54:25 AM PDT Jason Ekstrand wrote:
> Commit e1af20f18a86f52a9640faf2d4ff8a71b0a4fa9b changed the shader_info
> from being embedded into being just a pointer.  The idea was that
> sharing the shader_info between NIR and GLSL would be easier if it were
> a pointer pointing to the same shader_info struct.  This, however, has
> caused a few problems:
> 
>  1) There are many things which generate NIR without GLSL.  This means
> we have to support both NIR shaders which come from GLSL and ones
> that don't and need to have an info elsewhere.
> 
>  2) The solution to (1) raises all sorts of ownership issues which have
> to be resolved with ralloc_parent checks.
> 
>  3) Ever since 00620782c92100d77c660f9783504c6d80fa1d58, we've been
> using nir_gather_info to fill out the final shader_info.  Thanks to
> cloning and the above ownership issues, the nir_shader::info may not
> point back to the gl_shader anymore and so we have to do a copy of
> the shader_info from NIR back to GLSL anyway.
> 
> All of these issues go away if we just embed the shader_info in the
> nir_shader.  There's a little downside of having to copy it back after
> calling nir_gather_info but, as explained above, we have to do that
> anyway.

  4) The i965 passthrough TCS code alters the NIR shader info for the
 TCS and TES, thinking each shader specialization had its own copy.
 This could have unintended consequences...

Thanks for doing this.  It seemed like a decent plan at the time, but
I think just copying the info is better - various layers of compilation
may want to alter it as we go (turn system values to/from
inputs/outputs, add bonus things to get copied through, ...)

Reviewed-by: Kenneth Graunke 

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] renderonly: Initialize fields of struct winsys_handle.

vc4 was rejecting renderonly's import, because the offset field was
nonzero.
---
 src/gallium/auxiliary/renderonly/renderonly.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/renderonly/renderonly.c 
b/src/gallium/auxiliary/renderonly/renderonly.c
index 2fe10090163e..d3ed214f4e43 100644
--- a/src/gallium/auxiliary/renderonly/renderonly.c
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -117,6 +117,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct 
pipe_resource *rsc,
}
 
/* import dumb buffer */
+   memset(, 0, sizeof(handle));
handle.type = DRM_API_HANDLE_TYPE_FD;
handle.handle = prime_fd;
handle.stride = create_dumb.pitch;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] How does FakeGLX work since Mesa 10?

2017-05-09 Thread Tom Hudson

I'm trying to upgrade a Mesa 10 installation to Mesa 17. There have been
plenty of changes, but only one breakage that's left me perplexed.

https://cgit.freedesktop.org/mesa/mesa/commit/?id=c00b250c8061d042d9905e61b9077462ee91008b

In this CL, Mesa stopped maintaining a __GLXcontext for FakeGLX; there's
just an XMesaContext.

Fake_glXGetCurrentContext() casts the XMesaContext to a __GLXContext and
returns it. Perhaps the reasonable assumption is that a context is an
opaque object.

However, glXGetCurrentDisplay() doesn't respect that assumption; it assumes
it is getting a valid __GLXContext and returns __GLXContext::currentDpy,
the first pointer.

The first pointer in an XMesaContext is *not* a pointer to a Display, and
so any code that relies on this code path seems doomed to disappointment.

This is hitting us in open-source projects based on Ogre, but seems like a
breakage in the public API that other users would have run into in the 3
years since that commit?

Is this really a bug (which presumably I should file), or am I missing
something?
Does anyone have suggestions for working around it?

Tom
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 0/4] Disable glthread if libX11 isn't thread-safe

2017-05-09 Thread gregory hainaut

On Tue, 09 May 2017 09:31:18 -0700
Eric Anholt  wrote:

> Gregory Hainaut  writes:
> 
> > On 5/8/17, Emil Velikov  wrote:  
>  [...]  
> >
> > Hello Emil,
> >
> > Yes you're right. And potentially it can be back-ported to stable
> > branch. Besides it would allow to know which applications aren't X
> > thread safe. And potentially app owners can fix the issue too.
> >
> > By the way, I don't have commit access so feel free to push the series :)
> >
> > On 5/8/17, Eric Anholt  wrote:  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
>  [...]  
> >
> > Hello Eric,
> >
> > Indeed I saw this behavior on Nouveau. I got a crash on
> > GetBuffersWithFormat on DRI2 but it was fine on DRI3. I still kept the
> > check on GLX/DRI3 because I don't know if we have a strong guarantee
> > that X11 is never used.  
> 
> I would be surprised if there was a path that hit X through GL calls as
> opposed to the window system, with DRI3.  GetBuffersWithFormat was the
> problem.

Tbh, I'm a noob. If you're sure, we can always return true for GLX/DRI3. 
Otherwise
I will put a comment that it is likely fine. Your call.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: Use isl for hiz and stencil

On Tue, May 9, 2017 at 9:43 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Tue, May 09, 2017 at 08:45:55AM -0700, Jason Ekstrand wrote:
> > On Mon, May 8, 2017 at 11:10 PM, Pohjolainen, Topi <
> > topi.pohjolai...@gmail.com> wrote:
> >
> > > On Mon, May 08, 2017 at 04:51:35PM -0700, Jason Ekstrand wrote:
> > > > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen <
> > > topi.pohjolai...@gmail.com
> > > > > wrote:
> > > >
> > > > > Patches 1-17 are revision that
> > > > >
> > > > >   - rework hiz on gen6 to use on-demand offset calculator allowing
> > > > > one to drop dependency to miptree structure and
> > > > >   - rework all auxiliary surfaces to be created against isl
> directly.
> > > > >
> > > > > Patches 18 and 19 introduce new surface layout in ISL. This is
> called
> > > > > back-to-back and similar to layout ALL_SLICES_AT_EACH_LOD found in
> > > > > i965 for gen6 hiz and stencil. This layout stacks slices for each
> level
> > > > > after one and other, or back to back. All slices ate each lod is
> almost
> > > > > the same except that it places levels one and two side-by-side
> trying
> > > > > to preserve space. Back-to-back wastes a little more memory but
> aligns
> > > > > each level on page boundary simplifying driver logic.
> > > > >
> > > >
> > > > My primary gripe here is that you seem to have half-added
> back-to-back to
> > > > ISL.  If this layout is a long-term thing, then we should add a new
> > > > ISL_DIM_LAYOUT_GEN6_BACK_TO_BACK layout and plumb your offset
> function
> > > > through isl_surf_get_image_offset_sa.  Is this intended to be a
> permanent
> > > > solution?  I think eventually, I'd like us to go with one surf per
> > > miplevel
> > > > (which is almost the same) but I can see how this is easier at the
> > > moment.
> > > > However, I think this works sufficiently well that I'm ok with doing
> the
> > > > back-to-back thing for a while.
> > >
> > > I thought about adding new layout type but couldn't decide which way is
> > > better. It is easy to buy your arguments in favor, and I'm happy to
> give it
> > > a go.
> > > If miptree per level is your number one choice, then lets go with that.
> >
> >
> > I said "one surf per miplevel".  I see no reason why we need N miptrees.
>
> Ah, right, my mistake. We need a little more than isl_surf instance though,
> at least we need offset per level (unless we calculate that on-demand).
> Of course we could use the current level/slice table but I'm hoping to get
> rid
> of that.
>

Yeah, something similar to anv_surface is probably needed.  Though I would
be inclined to call it brw_per_lod_surf or something more descriptive like
that.


> >
> >
> > > I just
> > > need to check a few things first about the actual solution. I would see
> > > something in these lines:
> > >
> > > 1) Add a dynamically allocated array of miptrees into miptree. This
> would
> > >contain miptree instance per level.
> > >
> > > 2) Still uses one buffer object containing space for all levels. The
> > > instances
> > >in the array would either have their ::bo pointer zero or pointing
> to
> > > the
> > >parent ::bo. In both cases ::offset would point the start of the
> level.
> > >
> >
> > Yes
> >
> >
> > > 3) Instances in the array are not reference counted and therefore
> deleted
> > >simply by deallocating the malloced chunk underneath.
> > >
> >
> > If we have one isl_surf per miplevel and not a miptree per level, then I
> > don't think this is an issue.
> >
> >
> > > 4) Add similar dynamically allocated array of intel_miptree_aux_buffer
> > >instances for hiz. Here also use one ::bo which would need to added
> to
> > >miptree I think cause there ins't one in miptree. Or perhaps add the
> > >array of aux buffers to aux buffer?
> > >
> >
> > Looking at intel_miptree_aux_buffer, I think what we would end up with is
> > an array of aux_buffers
> >
> >
> > > 5) ISL doesn't need to know about this and hence we would add the total
> > > space
> > >calculator along with ::offset initialization in i965
> (brw_tex_layout,
> > >I think).
> > >
> >
> > That's fine.  We already do that in Vulkan with anv_surface.  ::offset
> > calculation can be done easily enough by just adding sizes.
> >
> >
> > > 6) In Vulkan <-> GL interop, we'd pass single level arrays only as ISL
> > > didn't
> > >know about back-2-back. Or we simply don't care about gen6 as Vulkan
> > >doesn't support it anyhow?
> > >
> >
> > Yeah, we don't care about gen6.
> >
> > --Jason
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC] intel/isl: Rewrite gen7_choose_image_alignment_el

The Ivy Bridge PRM provides a nice table that handles most of the
alignment cases in one place.  For standard color buffers we have a
little freedom of choice but for most depth, stencil and compressed it's
hard-coded.  Chad's original functions split halign and valign apart and
implemented them almost entirely based on restrictions and not the
table.  This makes things way more confusing than they need to be.  This
commit gets rid of the split and makes us implement the exact table
up-front.  If our surface isn't one of the ones in the table we then to
on to make real choices.

The only functional change here is that we now return a vertical
alignment of 8 for stencil because that's what the table says to do.
That change should probably go in it's own patch.

If people like this change, I'm happy to do the same refactoring for
gen6, gen8, and gen9.

Cc: Chad Versace 
Cc: Topi Pohjolainen 
Cc: Nanley Chery 
---
 src/intel/isl/isl_gen7.c | 178 ---
 1 file changed, 76 insertions(+), 102 deletions(-)

diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
index 18687b5..7607fde 100644
--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -289,123 +289,97 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
   *flags &= ~ISL_TILING_Y0_BIT;
 }
 
-/**
- * Choose horizontal subimage alignment, in units of surface elements.
- */
-static uint32_t
-gen7_choose_halign_el(const struct isl_device *dev,
-  const struct isl_surf_init_info *restrict info)
+void
+isl_gen7_choose_image_alignment_el(const struct isl_device *dev,
+   const struct isl_surf_init_info *restrict 
info,
+   enum isl_tiling tiling,
+   enum isl_dim_layout dim_layout,
+   enum isl_msaa_layout msaa_layout,
+   struct isl_extent3d *image_align_el)
 {
-   if (isl_format_is_compressed(info->format))
-  return 1;
+   assert(ISL_DEV_GEN(dev) == 7);
 
-   /* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
-* RENDER_SURFACE_STATE Surface Hoizontal Alignment:
+   /* Handled by isl_choose_image_alignment_el */
+   assert(info->format != ISL_FORMAT_HIZ);
+
+   /* IVB+ does not support combined depthstencil. */
+   assert(!isl_surf_usage_is_depth_and_stencil(info->usage));
+
+   /* From the Ivy Bridge PRM, Vol. 2, Part 2, Section 6.18.4.4,
+* "Alignment unit size", the alignment parameters are summarized in the
+* following table:
 *
-*- This field is intended to be set to HALIGN_8 only if the surface
-*  was rendered as a depth buffer with Z16 format or a stencil buffer,
-*  since these surfaces support only alignment of 8. Use of HALIGN_8
-*  for other surfaces is supported, but uses more memory.
+* Surface Defined By | Surface Format  | Align Width | Align Height
+*+-+-+--
+*   DEPTH_BUFFER |   D16_UNORM |  8  |  4
+*| other   |  4  |  4
+*+-+-+--
+*   STENCIL_BUFFER   |  N/A|  8  |  8
+*+-+-+--
+*   SURFACE_STATE| BC*, ETC*, EAC* |  4  |  4
+*|  FXT1   |  8  |  4
+*|   all others|   HALIGN|   VALIGN
+*---
 */
-   if (isl_surf_info_is_z16(info) ||
-   isl_surf_usage_is_stencil(info->usage))
-  return 8;
-
-   return 4;
-}
+   if (isl_surf_usage_is_depth(info->usage)) {
+  if (info->format == ISL_FORMAT_R16_UNORM) {
+ return isl_extent3d(8, 4, 1);
+  } else {
+ return isl_extent3d(4, 4, 1);
+  }
+   } else if (isl_surf_usage_is_stencil(info->usage)) {
+  return isl_extent3d(8, 8, 1);
+   } else if (isl_format_is_compressed(info->format)) {
+  /* Compressed formats all have alignment equal to block size. */
+  return isl_extent3d(1, 1, 1);
+   }
 
-/**
- * Choose vertical subimage alignment, in units of surface elements.
- */
-static uint32_t
-gen7_choose_valign_el(const struct isl_device *dev,
-  const struct isl_surf_init_info *restrict info,
-  enum isl_tiling tiling)
-{
-   MAYBE_UNUSED bool require_valign2 = false;
-   bool require_valign4 = false;
+   /* Everything after this point is in the "set by Surface Horizontal or
+* Vertical Alignment" case.  Now it's just a matter of applying
+* restrictions.
+*/
 
-   if (isl_format_is_compressed(info->format))
-

[Mesa-dev] [PATCH] freedreno: fix clang error in fd_get_compute_param

2017-05-09 Thread Rob Herring

With commit 10c17f23b752 ("freedreno: core compute state support"),
Android builds fail with the following error:

external/mesa3d/src/gallium/drivers/freedreno/freedreno_screen.c:610:17: error: 
format string is not a string literal (potentially insecure) 
[-Werror,-Wformat-security]
sprintf(ret, ir);
 ^~

Signed-off-by: Rob Herring 
---
 src/gallium/drivers/freedreno/freedreno_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 052565dcbdc9..57010d64587f 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -589,7 +589,7 @@ fd_get_compute_param(struct pipe_screen *pscreen, enum 
pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *ret)
 {
struct fd_screen *screen = fd_screen(pscreen);
-   const char *ir = "ir3";
+   const char * const ir = "ir3";
 
if (!has_compute(screen))
return 0;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: Use isl for hiz and stencil

On Tue, May 09, 2017 at 08:45:55AM -0700, Jason Ekstrand wrote:
> On Mon, May 8, 2017 at 11:10 PM, Pohjolainen, Topi <
> topi.pohjolai...@gmail.com> wrote:
> 
> > On Mon, May 08, 2017 at 04:51:35PM -0700, Jason Ekstrand wrote:
> > > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen <
> > topi.pohjolai...@gmail.com
> > > > wrote:
> > >
> > > > Patches 1-17 are revision that
> > > >
> > > >   - rework hiz on gen6 to use on-demand offset calculator allowing
> > > > one to drop dependency to miptree structure and
> > > >   - rework all auxiliary surfaces to be created against isl directly.
> > > >
> > > > Patches 18 and 19 introduce new surface layout in ISL. This is called
> > > > back-to-back and similar to layout ALL_SLICES_AT_EACH_LOD found in
> > > > i965 for gen6 hiz and stencil. This layout stacks slices for each level
> > > > after one and other, or back to back. All slices ate each lod is almost
> > > > the same except that it places levels one and two side-by-side trying
> > > > to preserve space. Back-to-back wastes a little more memory but aligns
> > > > each level on page boundary simplifying driver logic.
> > > >
> > >
> > > My primary gripe here is that you seem to have half-added back-to-back to
> > > ISL.  If this layout is a long-term thing, then we should add a new
> > > ISL_DIM_LAYOUT_GEN6_BACK_TO_BACK layout and plumb your offset function
> > > through isl_surf_get_image_offset_sa.  Is this intended to be a permanent
> > > solution?  I think eventually, I'd like us to go with one surf per
> > miplevel
> > > (which is almost the same) but I can see how this is easier at the
> > moment.
> > > However, I think this works sufficiently well that I'm ok with doing the
> > > back-to-back thing for a while.
> >
> > I thought about adding new layout type but couldn't decide which way is
> > better. It is easy to buy your arguments in favor, and I'm happy to give it
> > a go.
> > If miptree per level is your number one choice, then lets go with that.
> 
> 
> I said "one surf per miplevel".  I see no reason why we need N miptrees.

Ah, right, my mistake. We need a little more than isl_surf instance though,
at least we need offset per level (unless we calculate that on-demand).
Of course we could use the current level/slice table but I'm hoping to get rid
of that.

> 
> 
> > I just
> > need to check a few things first about the actual solution. I would see
> > something in these lines:
> >
> > 1) Add a dynamically allocated array of miptrees into miptree. This would
> >contain miptree instance per level.
> >
> > 2) Still uses one buffer object containing space for all levels. The
> > instances
> >in the array would either have their ::bo pointer zero or pointing to
> > the
> >parent ::bo. In both cases ::offset would point the start of the level.
> >
> 
> Yes
> 
> 
> > 3) Instances in the array are not reference counted and therefore deleted
> >simply by deallocating the malloced chunk underneath.
> >
> 
> If we have one isl_surf per miplevel and not a miptree per level, then I
> don't think this is an issue.
> 
> 
> > 4) Add similar dynamically allocated array of intel_miptree_aux_buffer
> >instances for hiz. Here also use one ::bo which would need to added to
> >miptree I think cause there ins't one in miptree. Or perhaps add the
> >array of aux buffers to aux buffer?
> >
> 
> Looking at intel_miptree_aux_buffer, I think what we would end up with is
> an array of aux_buffers
> 
> 
> > 5) ISL doesn't need to know about this and hence we would add the total
> > space
> >calculator along with ::offset initialization in i965 (brw_tex_layout,
> >I think).
> >
> 
> That's fine.  We already do that in Vulkan with anv_surface.  ::offset
> calculation can be done easily enough by just adding sizes.
> 
> 
> > 6) In Vulkan <-> GL interop, we'd pass single level arrays only as ISL
> > didn't
> >know about back-2-back. Or we simply don't care about gen6 as Vulkan
> >doesn't support it anyhow?
> >
> 
> Yeah, we don't care about gen6.
> 
> --Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: Use isl for hiz and stencil

On Tue, May 09, 2017 at 08:48:08AM -0700, Jason Ekstrand wrote:
> All of the below being said, I'm fine with landing things with a
> BACK_TO_BACK layout and then making the switch to one isl_surf per miplevel
> later if that's an easier path.

I can give the "array of ISL surfaces" approach a spin to see how it looks,
I'm curious to see if few things become a little cleaner.

> 
> On Tue, May 9, 2017 at 8:45 AM, Jason Ekstrand  wrote:
> 
> > On Mon, May 8, 2017 at 11:10 PM, Pohjolainen, Topi <
> > topi.pohjolai...@gmail.com> wrote:
> >
> >> On Mon, May 08, 2017 at 04:51:35PM -0700, Jason Ekstrand wrote:
> >> > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen <
> >> topi.pohjolai...@gmail.com
> >> > > wrote:
> >> >
> >> > > Patches 1-17 are revision that
> >> > >
> >> > >   - rework hiz on gen6 to use on-demand offset calculator allowing
> >> > > one to drop dependency to miptree structure and
> >> > >   - rework all auxiliary surfaces to be created against isl directly.
> >> > >
> >> > > Patches 18 and 19 introduce new surface layout in ISL. This is called
> >> > > back-to-back and similar to layout ALL_SLICES_AT_EACH_LOD found in
> >> > > i965 for gen6 hiz and stencil. This layout stacks slices for each
> >> level
> >> > > after one and other, or back to back. All slices ate each lod is
> >> almost
> >> > > the same except that it places levels one and two side-by-side trying
> >> > > to preserve space. Back-to-back wastes a little more memory but aligns
> >> > > each level on page boundary simplifying driver logic.
> >> > >
> >> >
> >> > My primary gripe here is that you seem to have half-added back-to-back
> >> to
> >> > ISL.  If this layout is a long-term thing, then we should add a new
> >> > ISL_DIM_LAYOUT_GEN6_BACK_TO_BACK layout and plumb your offset function
> >> > through isl_surf_get_image_offset_sa.  Is this intended to be a
> >> permanent
> >> > solution?  I think eventually, I'd like us to go with one surf per
> >> miplevel
> >> > (which is almost the same) but I can see how this is easier at the
> >> moment.
> >> > However, I think this works sufficiently well that I'm ok with doing the
> >> > back-to-back thing for a while.
> >>
> >> I thought about adding new layout type but couldn't decide which way is
> >> better. It is easy to buy your arguments in favor, and I'm happy to give
> >> it
> >> a go.
> >> If miptree per level is your number one choice, then lets go with that.
> >
> >
> > I said "one surf per miplevel".  I see no reason why we need N miptrees.
> >
> >
> >> I just
> >> need to check a few things first about the actual solution. I would see
> >> something in these lines:
> >>
> >> 1) Add a dynamically allocated array of miptrees into miptree. This would
> >>contain miptree instance per level.
> >>
> >> 2) Still uses one buffer object containing space for all levels. The
> >> instances
> >>in the array would either have their ::bo pointer zero or pointing to
> >> the
> >>parent ::bo. In both cases ::offset would point the start of the level.
> >>
> >
> > Yes
> >
> >
> >> 3) Instances in the array are not reference counted and therefore deleted
> >>simply by deallocating the malloced chunk underneath.
> >>
> >
> > If we have one isl_surf per miplevel and not a miptree per level, then I
> > don't think this is an issue.
> >
> >
> >> 4) Add similar dynamically allocated array of intel_miptree_aux_buffer
> >>instances for hiz. Here also use one ::bo which would need to added to
> >>miptree I think cause there ins't one in miptree. Or perhaps add the
> >>array of aux buffers to aux buffer?
> >>
> >
> > Looking at intel_miptree_aux_buffer, I think what we would end up with is
> > an array of aux_buffers
> >
> >
> >> 5) ISL doesn't need to know about this and hence we would add the total
> >> space
> >>calculator along with ::offset initialization in i965 (brw_tex_layout,
> >>I think).
> >>
> >
> > That's fine.  We already do that in Vulkan with anv_surface.  ::offset
> > calculation can be done easily enough by just adding sizes.
> >
> >
> >> 6) In Vulkan <-> GL interop, we'd pass single level arrays only as ISL
> >> didn't
> >>know about back-2-back. Or we simply don't care about gen6 as Vulkan
> >>doesn't support it anyhow?
> >>
> >
> > Yeah, we don't care about gen6.
> >
> > --Jason
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 0/4] Disable glthread if libX11 isn't thread-safe

Gregory Hainaut  writes:

> On 5/8/17, Emil Velikov  wrote:
>> Having a look at Xlib might be good indeed.
>>
>> Then again, the solution you've proposed looks perfectly reasonable,
>> IMHO. It handles the problem _now_ and should also work when/if we
>> address Xlib.
>> I'll take another look today/tomorrow, but I think the series is
>> perfectly fine to land as-is.
>>
>> Thanks
>> Emil
>
> Hello Emil,
>
> Yes you're right. And potentially it can be back-ported to stable
> branch. Besides it would allow to know which applications aren't X
> thread safe. And potentially app owners can fix the issue too.
>
> By the way, I don't have commit access so feel free to push the series :)
>
> On 5/8/17, Eric Anholt  wrote:
>> gregory hainaut  writes:
>>
>>> On Fri, 5 May 2017 17:45:01 +0200
>>> Axel Davy  wrote:
>>>
 Hi,

 There should be very few X11 calls while rendering (basically only at
 the beginning or end of a frame).

 Why not just always run these calls in the main thread (and wait for
 glthread work to finish) ?

 That's basically what we do for gallium nine.

 Yours,

 Axel
>>>
>>> Hello Axel,
>>>
>>> Yes it is another possibility. It would requires to track gl calls that
>>> end up in X11.
>>> I'm not sure if there is an easy way to list all those gl functions. There
>>> are at least the
>>> draw calls and maybe the clear operations. Besides I'm afraid that we will
>>> need to handle
>>> various corner cases of the OpenGL API. It is doable but likely more
>>> complicated.
>>
>> General GL calls (draws, clears) won't call X11 except for DRI2's
>> GetBuffers.  If you're in DRI3, I believe you won't need to worry about
>> that at all.
>>
>> I think this patch is a good start, though.
>>
>
> Hello Eric,
>
> Indeed I saw this behavior on Nouveau. I got a crash on
> GetBuffersWithFormat on DRI2 but it was fine on DRI3. I still kept the
> check on GLX/DRI3 because I don't know if we have a strong guarantee
> that X11 is never used.

I would be surprised if there was a path that hit X through GL calls as
opposed to the window system, with DRI3.  GetBuffersWithFormat was the
problem.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [v2 37/39] intel/isl/gen7/hack: Use stencil vertical alignment of 8 instead of 4

Woah, wall of e-mails. :-)

On Tue, May 9, 2017 at 5:02 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Tue, May 09, 2017 at 02:44:35PM +0300, Pohjolainen, Topi wrote:
> > On Tue, May 09, 2017 at 02:36:47PM +0300, Pohjolainen, Topi wrote:
> > > On Tue, May 09, 2017 at 02:29:13PM +0300, Pohjolainen, Topi wrote:
> > > > On Tue, May 09, 2017 at 08:25:28AM +0300, Pohjolainen, Topi wrote:
> > > > > On Mon, May 08, 2017 at 03:31:55PM -0700, Jason Ekstrand wrote:
> > > > > > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen <
> topi.pohjolai...@gmail.com
> > > > > > > wrote:
> > > > > >
> > > > > > > Signed-off-by: Topi Pohjolainen 
> > > > > > > ---
> > > > > > >  src/intel/isl/isl_gen7.c | 6 +-
> > > > > > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > > > > > >
> > > > > > > diff --git a/src/intel/isl/isl_gen7.c
> b/src/intel/isl/isl_gen7.c
> > > > > > > index 18687b5..cf5b377 100644
> > > > > > > --- a/src/intel/isl/isl_gen7.c
> > > > > > > +++ b/src/intel/isl/isl_gen7.c
> > > > > > > @@ -375,7 +375,11 @@ gen7_choose_valign_el(const struct
> isl_device *dev,
> > > > > > > * FINISHME(chadv): Decide to set valign=4 or valign=8
> after isl's
> > > > > > > API
> > > > > > > * is more polished.
> > > > > > > */
> > > > > > > -  require_valign4 = true;
> > > > > > > +
> > > > > > > +  /* Using valign 4 upsets all
> depthstencil-render-miplevels on IVB
> > > > > > > and
> > > > > > > +   * HSW. Use alignment 8 instead.
> > > > > > > +   */
> > > > > > > +  return 8;
> > > > > > >
> > > > > >
> > > > > > I'm confused.  This seems to contradict the documentation which
> says that
> > > > > > valign for depth is hard-coded to 4 in the hardware.
> > > > >
> > > > > Just as I am. It took me a while to actually try increasing the
> alignment - it
> > > > > was just after I compared what the current logic in i965 does. The
> thing here
> > > > > is not the alignment itself but the amount of space that gets
> allocated. I'll
> > > > > get back with more details shortly.
> > > >
> > > > In intel_miptree_set_alignment() we have:
> > > >
> > > >} else if (mt->format == MESA_FORMAT_S_UINT8) {
> > > >   mt->halign = 8;
> > > >   mt->valign = brw->gen >= 7 ? 8 : 4;
> > > >} else {
> > > >
> > > > There are a few refactors it seems but I'll try to find the original
> commit.
> > >
> > > Well that looks to go a long way back to a file named
> intel_tex_layout.c.
> > > Some documentation is still left:
> > >
> > >/**
> > > * +---
> ---+
> > > * || alignment unit height
> ("j") |
> > > * | Surface Property
>  |-|
> > > * || 915 | 965 | ILK | SNB
> | IVB |
> > > * +---
> ---+
> > > * | BC1-5 compressed format (DXTn/S3TC)|  4  |  4  |  4  |  4
> |  4  |
> > > * | FXT1  compressed format|  4  |  4  |  4  |  4
> |  4  |
> > > * | Depth Buffer   |  2  |  2  |  2  |  4
> |  4  |
> > > * | Separate Stencil Buffer| N/A | N/A | N/A |  4
> |  8  |
> > > * | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4
> |  4  |
> > > * | All Others |  2  |  2  |  2  |  *
> |  *  |
> > > * +---
> ---+
> > > *
> > > * Where "*" means either VALIGN_2 or VALIGN_4 depending on the
> setting of
> > > * the SURFACE_STATE "Surface Vertical Alignment" field.
> > > */
> > >
> > > Original included also a reference to bspec:
> > >
> > > * From the "Alignment Unit Size" section of various specs, namely:
> > > * - Gen3 Spec: "Memory Data Formats" Volume, Section
> 1.20.1.4
> > > * - i965 and G45 PRMs: Volume 1, Section
> 6.17.3.4.
> > > * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section
> 7.18.3.4
> > > * - BSpec (for Ivybridge and slight variations in separate stencil)
> > >
> > >
> >
> > Original commit:
> >
> > commit e7e81714f3ecf67a975d35e74bdb7fd15d924e4d
> > Author: Kenneth Graunke 
> > Date:   Mon Nov 7 15:58:43 2011 -0800
> >
> > i965: Implement the actual tables for texture alignment units [v2]
> >
> > I implemented functions for horizontal/vertical alignment units
> separately
> > because I find it easier to read that way...especially with all the
> > corner-cases.
> >
> > [chad] Corrected the vertical alignment calculation by checking for
> > depthstencil formats.
> >
> > v2:
> >- Fix typos in intel_horizontal_texture_alignment_unit():
> >  s/height/width/ and s/VALIGN/HALIGN.
> >- Remove special case for compressed formats in
> >

[Mesa-dev] [Bug 100928] exported opengl function in generated dll was mangled when building with msvc 2015 Update 3

2017-05-09 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=100928

George Kyriazis  changed:

   What|Removed |Added

 Resolution|--- |NOTABUG
 Status|NEW |RESOLVED
 CC||george.kyria...@intel.com

--- Comment #4 from George Kyriazis  ---
Alternatively, you could use wglCreateContextAttribsARB().

Closing as not a bug.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: Use isl for hiz and stencil

All of the below being said, I'm fine with landing things with a
BACK_TO_BACK layout and then making the switch to one isl_surf per miplevel
later if that's an easier path.

On Tue, May 9, 2017 at 8:45 AM, Jason Ekstrand  wrote:

> On Mon, May 8, 2017 at 11:10 PM, Pohjolainen, Topi <
> topi.pohjolai...@gmail.com> wrote:
>
>> On Mon, May 08, 2017 at 04:51:35PM -0700, Jason Ekstrand wrote:
>> > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen <
>> topi.pohjolai...@gmail.com
>> > > wrote:
>> >
>> > > Patches 1-17 are revision that
>> > >
>> > >   - rework hiz on gen6 to use on-demand offset calculator allowing
>> > > one to drop dependency to miptree structure and
>> > >   - rework all auxiliary surfaces to be created against isl directly.
>> > >
>> > > Patches 18 and 19 introduce new surface layout in ISL. This is called
>> > > back-to-back and similar to layout ALL_SLICES_AT_EACH_LOD found in
>> > > i965 for gen6 hiz and stencil. This layout stacks slices for each
>> level
>> > > after one and other, or back to back. All slices ate each lod is
>> almost
>> > > the same except that it places levels one and two side-by-side trying
>> > > to preserve space. Back-to-back wastes a little more memory but aligns
>> > > each level on page boundary simplifying driver logic.
>> > >
>> >
>> > My primary gripe here is that you seem to have half-added back-to-back
>> to
>> > ISL.  If this layout is a long-term thing, then we should add a new
>> > ISL_DIM_LAYOUT_GEN6_BACK_TO_BACK layout and plumb your offset function
>> > through isl_surf_get_image_offset_sa.  Is this intended to be a
>> permanent
>> > solution?  I think eventually, I'd like us to go with one surf per
>> miplevel
>> > (which is almost the same) but I can see how this is easier at the
>> moment.
>> > However, I think this works sufficiently well that I'm ok with doing the
>> > back-to-back thing for a while.
>>
>> I thought about adding new layout type but couldn't decide which way is
>> better. It is easy to buy your arguments in favor, and I'm happy to give
>> it
>> a go.
>> If miptree per level is your number one choice, then lets go with that.
>
>
> I said "one surf per miplevel".  I see no reason why we need N miptrees.
>
>
>> I just
>> need to check a few things first about the actual solution. I would see
>> something in these lines:
>>
>> 1) Add a dynamically allocated array of miptrees into miptree. This would
>>contain miptree instance per level.
>>
>> 2) Still uses one buffer object containing space for all levels. The
>> instances
>>in the array would either have their ::bo pointer zero or pointing to
>> the
>>parent ::bo. In both cases ::offset would point the start of the level.
>>
>
> Yes
>
>
>> 3) Instances in the array are not reference counted and therefore deleted
>>simply by deallocating the malloced chunk underneath.
>>
>
> If we have one isl_surf per miplevel and not a miptree per level, then I
> don't think this is an issue.
>
>
>> 4) Add similar dynamically allocated array of intel_miptree_aux_buffer
>>instances for hiz. Here also use one ::bo which would need to added to
>>miptree I think cause there ins't one in miptree. Or perhaps add the
>>array of aux buffers to aux buffer?
>>
>
> Looking at intel_miptree_aux_buffer, I think what we would end up with is
> an array of aux_buffers
>
>
>> 5) ISL doesn't need to know about this and hence we would add the total
>> space
>>calculator along with ::offset initialization in i965 (brw_tex_layout,
>>I think).
>>
>
> That's fine.  We already do that in Vulkan with anv_surface.  ::offset
> calculation can be done easily enough by just adding sizes.
>
>
>> 6) In Vulkan <-> GL interop, we'd pass single level arrays only as ISL
>> didn't
>>know about back-2-back. Or we simply don't care about gen6 as Vulkan
>>doesn't support it anyhow?
>>
>
> Yeah, we don't care about gen6.
>
> --Jason
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: Use isl for hiz and stencil

On Mon, May 8, 2017 at 11:10 PM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Mon, May 08, 2017 at 04:51:35PM -0700, Jason Ekstrand wrote:
> > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen <
> topi.pohjolai...@gmail.com
> > > wrote:
> >
> > > Patches 1-17 are revision that
> > >
> > >   - rework hiz on gen6 to use on-demand offset calculator allowing
> > > one to drop dependency to miptree structure and
> > >   - rework all auxiliary surfaces to be created against isl directly.
> > >
> > > Patches 18 and 19 introduce new surface layout in ISL. This is called
> > > back-to-back and similar to layout ALL_SLICES_AT_EACH_LOD found in
> > > i965 for gen6 hiz and stencil. This layout stacks slices for each level
> > > after one and other, or back to back. All slices ate each lod is almost
> > > the same except that it places levels one and two side-by-side trying
> > > to preserve space. Back-to-back wastes a little more memory but aligns
> > > each level on page boundary simplifying driver logic.
> > >
> >
> > My primary gripe here is that you seem to have half-added back-to-back to
> > ISL.  If this layout is a long-term thing, then we should add a new
> > ISL_DIM_LAYOUT_GEN6_BACK_TO_BACK layout and plumb your offset function
> > through isl_surf_get_image_offset_sa.  Is this intended to be a permanent
> > solution?  I think eventually, I'd like us to go with one surf per
> miplevel
> > (which is almost the same) but I can see how this is easier at the
> moment.
> > However, I think this works sufficiently well that I'm ok with doing the
> > back-to-back thing for a while.
>
> I thought about adding new layout type but couldn't decide which way is
> better. It is easy to buy your arguments in favor, and I'm happy to give it
> a go.
> If miptree per level is your number one choice, then lets go with that.


I said "one surf per miplevel".  I see no reason why we need N miptrees.


> I just
> need to check a few things first about the actual solution. I would see
> something in these lines:
>
> 1) Add a dynamically allocated array of miptrees into miptree. This would
>contain miptree instance per level.
>
> 2) Still uses one buffer object containing space for all levels. The
> instances
>in the array would either have their ::bo pointer zero or pointing to
> the
>parent ::bo. In both cases ::offset would point the start of the level.
>

Yes


> 3) Instances in the array are not reference counted and therefore deleted
>simply by deallocating the malloced chunk underneath.
>

If we have one isl_surf per miplevel and not a miptree per level, then I
don't think this is an issue.


> 4) Add similar dynamically allocated array of intel_miptree_aux_buffer
>instances for hiz. Here also use one ::bo which would need to added to
>miptree I think cause there ins't one in miptree. Or perhaps add the
>array of aux buffers to aux buffer?
>

Looking at intel_miptree_aux_buffer, I think what we would end up with is
an array of aux_buffers


> 5) ISL doesn't need to know about this and hence we would add the total
> space
>calculator along with ::offset initialization in i965 (brw_tex_layout,
>I think).
>

That's fine.  We already do that in Vulkan with anv_surface.  ::offset
calculation can be done easily enough by just adding sizes.


> 6) In Vulkan <-> GL interop, we'd pass single level arrays only as ISL
> didn't
>know about back-2-back. Or we simply don't care about gen6 as Vulkan
>doesn't support it anyhow?
>

Yeah, we don't care about gen6.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/11] radeonsi: dump compute descriptor lists

2017-05-09 Thread Marek Olšák

Reviewed-by: Marek Olšák 

Marek

On May 8, 2017 5:39 PM, "Nicolai Hähnle"  wrote:

From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_debug.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/
radeonsi/si_debug.c
index d39b303..d08a8fc 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -490,20 +490,29 @@ static void si_dump_descriptors(struct si_context
*sctx,
 static void si_dump_gfx_descriptors(struct si_context *sctx,
const struct si_shader_ctx_state *state,
FILE *f)
 {
if (!state->cso || !state->current)
return;

si_dump_descriptors(sctx, state->cso->type, >cso->info, f);
 }

+static void si_dump_compute_descriptors(struct si_context *sctx, FILE *f)
+{
+   if (!sctx->cs_shader_state.program ||
+   sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_
program)
+   return;
+
+   si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, f);
+}
+
 struct si_shader_inst {
char text[160];  /* one disasm line */
unsigned offset; /* instruction offset */
unsigned size;   /* instruction size = 4 or 8 */
 };

 /* Split a disassembly string into lines and add them to the array pointed
  * to by "instructions". */
 static void si_add_split_disasm(const char *disasm,
uint64_t start_addr,
@@ -793,20 +802,21 @@ static void si_dump_debug_state(struct pipe_context
*ctx, FILE *f,
si_dump_command("Wave information", "umr -O bits
-wa", f);
}

si_dump_descriptor_list(>descriptors[SI_DESCS_RW_
BUFFERS],
"", "RW buffers",
SI_NUM_RW_BUFFERS, f);
si_dump_gfx_descriptors(sctx, >vs_shader, f);
si_dump_gfx_descriptors(sctx, >tcs_shader, f);
si_dump_gfx_descriptors(sctx, >tes_shader, f);
si_dump_gfx_descriptors(sctx, >gs_shader, f);
si_dump_gfx_descriptors(sctx, >ps_shader, f);
+   si_dump_compute_descriptors(sctx, f);
}

if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
si_dump_bo_list(sctx, >last_gfx, f);
si_dump_last_ib(sctx, f);

fprintf(f, "Done.\n");

/* dump only once */
radeon_clear_saved_cs(>last_gfx);
--
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [v2 37/39] intel/isl/gen7/hack: Use stencil vertical alignment of 8 instead of 4

On Tue, May 09, 2017 at 02:44:35PM +0300, Pohjolainen, Topi wrote:
> On Tue, May 09, 2017 at 02:36:47PM +0300, Pohjolainen, Topi wrote:
> > On Tue, May 09, 2017 at 02:29:13PM +0300, Pohjolainen, Topi wrote:
> > > On Tue, May 09, 2017 at 08:25:28AM +0300, Pohjolainen, Topi wrote:
> > > > On Mon, May 08, 2017 at 03:31:55PM -0700, Jason Ekstrand wrote:
> > > > > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen 
> > > > >  > > > > > wrote:
> > > > > 
> > > > > > Signed-off-by: Topi Pohjolainen 
> > > > > > ---
> > > > > >  src/intel/isl/isl_gen7.c | 6 +-
> > > > > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
> > > > > > index 18687b5..cf5b377 100644
> > > > > > --- a/src/intel/isl/isl_gen7.c
> > > > > > +++ b/src/intel/isl/isl_gen7.c
> > > > > > @@ -375,7 +375,11 @@ gen7_choose_valign_el(const struct isl_device 
> > > > > > *dev,
> > > > > > * FINISHME(chadv): Decide to set valign=4 or valign=8 after 
> > > > > > isl's
> > > > > > API
> > > > > > * is more polished.
> > > > > > */
> > > > > > -  require_valign4 = true;
> > > > > > +
> > > > > > +  /* Using valign 4 upsets all depthstencil-render-miplevels 
> > > > > > on IVB
> > > > > > and
> > > > > > +   * HSW. Use alignment 8 instead.
> > > > > > +   */
> > > > > > +  return 8;
> > > > > >
> > > > > 
> > > > > I'm confused.  This seems to contradict the documentation which says 
> > > > > that
> > > > > valign for depth is hard-coded to 4 in the hardware.
> > > > 
> > > > Just as I am. It took me a while to actually try increasing the 
> > > > alignment - it
> > > > was just after I compared what the current logic in i965 does. The 
> > > > thing here
> > > > is not the alignment itself but the amount of space that gets 
> > > > allocated. I'll
> > > > get back with more details shortly.
> > > 
> > > In intel_miptree_set_alignment() we have:
> > > 
> > >} else if (mt->format == MESA_FORMAT_S_UINT8) {
> > >   mt->halign = 8;
> > >   mt->valign = brw->gen >= 7 ? 8 : 4;
> > >} else {
> > > 
> > > There are a few refactors it seems but I'll try to find the original 
> > > commit.
> > 
> > Well that looks to go a long way back to a file named intel_tex_layout.c.
> > Some documentation is still left:
> > 
> >/**
> > * 
> > +--+
> > * || alignment unit height 
> > ("j") |
> > * | Surface Property   
> > |-|
> > * || 915 | 965 | ILK | SNB | 
> > IVB |
> > * 
> > +--+
> > * | BC1-5 compressed format (DXTn/S3TC)|  4  |  4  |  4  |  4  |  4 
> >  |
> > * | FXT1  compressed format|  4  |  4  |  4  |  4  |  4 
> >  |
> > * | Depth Buffer   |  2  |  2  |  2  |  4  |  4 
> >  |
> > * | Separate Stencil Buffer| N/A | N/A | N/A |  4  |  8 
> >  |
> > * | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4  |  4 
> >  |
> > * | All Others |  2  |  2  |  2  |  *  |  * 
> >  |
> > * 
> > +--+
> > *
> > * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting 
> > of
> > * the SURFACE_STATE "Surface Vertical Alignment" field.
> > */
> > 
> > Original included also a reference to bspec:
> > 
> > * From the "Alignment Unit Size" section of various specs, namely:
> > * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
> > * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
> > * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
> > * - BSpec (for Ivybridge and slight variations in separate stencil)
> > 
> > 
> 
> Original commit:
> 
> commit e7e81714f3ecf67a975d35e74bdb7fd15d924e4d
> Author: Kenneth Graunke 
> Date:   Mon Nov 7 15:58:43 2011 -0800
> 
> i965: Implement the actual tables for texture alignment units [v2]
> 
> I implemented functions for horizontal/vertical alignment units separately
> because I find it easier to read that way...especially with all the
> corner-cases.
> 
> [chad] Corrected the vertical alignment calculation by checking for
> depthstencil formats.
> 
> v2:
>- Fix typos in intel_horizontal_texture_alignment_unit():
>  s/height/width/ and s/VALIGN/HALIGN.
>- Remove special case for compressed formats in
>  intel_get_texture_alignment unit(). Compressed formats are already
>  handled in the halign and valign functions.
>- Replace check

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL

Hi Daniel,

Hi Volker,

On 9 May 2017 at 12:32, Volker Vogelhuber
wrote:

On 09.05.2017 12:59, Philipp Zabel wrote:

You create two separate EGLImages, calling eglCreateImage once for each
plane. See for example:

https://cgit.freedesktop.org/gstreamer/gst-plugins-bad/tree/gst-libs/gst/gl/gstglupload.c#n646

https://cgit.freedesktop.org/wayland/weston/tree/libweston/gl-renderer.c#n1536

That assumes I have two planes available. But as mentioned the planes
are created internally by dri2_create_image_dma_buf during my call of
eglCreateImage.
So from the API point of view I only have one FD from the V4L2 buffer that
is of type
FOURCC('Y','U','Y','V'). Passing that FD to eglCreateImage returns only one
EGLImage.
Although understanding gstreamer code is always a bit hard, I think in their
case they have
a mulit plane V4L2 buffer, where I only have a single plane buffer.

What Philipp is suggesting is that you import the same plane to an
EGLImage twice: once as DRM_FORMAT_RG8 and once as
DRM_FORMAT_ARGB. This gives you two EGLImages, which you bind to
two separate texture units / samplers: you sample the Y channel from
the DRM_FORMAT_RG8 image (ignoring the other channel), and you sample
the U and V channels from the DRM_FORMAT_ARGB image (ignoring the
other two channels). This is what GStreamer and Weston do when
confronted with such a buffer, and it does work.

That's why I wonder if there is a way to retrieve the "second" EGLImage
after eglCreateImage
has been called for the single V4L2 DMABUF file descriptor.

There is not a way to do this.

Ah, OK, now I got it. Thanks! I guess the DRM_FORMAT_ARGB
width parameter has to be half the original width, as I only have 16bit
per pixel.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL

2017-05-09 Thread Daniel Stone

Hi Volker,

On 9 May 2017 at 12:32, Volker Vogelhuber
 wrote:
> On 09.05.2017 12:59, Philipp Zabel wrote:
>> You create two separate EGLImages, calling eglCreateImage once for each
>> plane. See for example:
>>
>> https://cgit.freedesktop.org/gstreamer/gst-plugins-bad/tree/gst-libs/gst/gl/gstglupload.c#n646
>>
>> or
>>
>> https://cgit.freedesktop.org/wayland/weston/tree/libweston/gl-renderer.c#n1536
>
> That assumes I have two planes available. But as mentioned the planes
> are created internally by dri2_create_image_dma_buf during my call of
> eglCreateImage.
> So from the API point of view I only have one FD from the V4L2 buffer that
> is of type
> FOURCC('Y','U','Y','V'). Passing that FD to eglCreateImage returns only one
> EGLImage.
> Although understanding gstreamer code is always a bit hard, I think in their
> case they have
> a mulit plane V4L2 buffer, where I only have a single plane buffer.

What Philipp is suggesting is that you import the same plane to an
EGLImage twice: once as DRM_FORMAT_RG8 and once as
DRM_FORMAT_ARGB. This gives you two EGLImages, which you bind to
two separate texture units / samplers: you sample the Y channel from
the DRM_FORMAT_RG8 image (ignoring the other channel), and you sample
the U and V channels from the DRM_FORMAT_ARGB image (ignoring the
other two channels). This is what GStreamer and Weston do when
confronted with such a buffer, and it does work.

> That's why I wonder if there is a way to retrieve the "second" EGLImage
> after eglCreateImage
> has been called for the single V4L2 DMABUF file descriptor.

There is not a way to do this.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [v2 37/39] intel/isl/gen7/hack: Use stencil vertical alignment of 8 instead of 4

On Tue, May 09, 2017 at 02:36:47PM +0300, Pohjolainen, Topi wrote:
> On Tue, May 09, 2017 at 02:29:13PM +0300, Pohjolainen, Topi wrote:
> > On Tue, May 09, 2017 at 08:25:28AM +0300, Pohjolainen, Topi wrote:
> > > On Mon, May 08, 2017 at 03:31:55PM -0700, Jason Ekstrand wrote:
> > > > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen 
> > > >  > > > > wrote:
> > > > 
> > > > > Signed-off-by: Topi Pohjolainen 
> > > > > ---
> > > > >  src/intel/isl/isl_gen7.c | 6 +-
> > > > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
> > > > > index 18687b5..cf5b377 100644
> > > > > --- a/src/intel/isl/isl_gen7.c
> > > > > +++ b/src/intel/isl/isl_gen7.c
> > > > > @@ -375,7 +375,11 @@ gen7_choose_valign_el(const struct isl_device 
> > > > > *dev,
> > > > > * FINISHME(chadv): Decide to set valign=4 or valign=8 after 
> > > > > isl's
> > > > > API
> > > > > * is more polished.
> > > > > */
> > > > > -  require_valign4 = true;
> > > > > +
> > > > > +  /* Using valign 4 upsets all depthstencil-render-miplevels on 
> > > > > IVB
> > > > > and
> > > > > +   * HSW. Use alignment 8 instead.
> > > > > +   */
> > > > > +  return 8;
> > > > >
> > > > 
> > > > I'm confused.  This seems to contradict the documentation which says 
> > > > that
> > > > valign for depth is hard-coded to 4 in the hardware.
> > > 
> > > Just as I am. It took me a while to actually try increasing the alignment 
> > > - it
> > > was just after I compared what the current logic in i965 does. The thing 
> > > here
> > > is not the alignment itself but the amount of space that gets allocated. 
> > > I'll
> > > get back with more details shortly.
> > 
> > In intel_miptree_set_alignment() we have:
> > 
> >} else if (mt->format == MESA_FORMAT_S_UINT8) {
> >   mt->halign = 8;
> >   mt->valign = brw->gen >= 7 ? 8 : 4;
> >} else {
> > 
> > There are a few refactors it seems but I'll try to find the original commit.
> 
> Well that looks to go a long way back to a file named intel_tex_layout.c.
> Some documentation is still left:
> 
>/**
> * +--+
> * || alignment unit height ("j") |
> * | Surface Property   |-|
> * || 915 | 965 | ILK | SNB | IVB |
> * +--+
> * | BC1-5 compressed format (DXTn/S3TC)|  4  |  4  |  4  |  4  |  4  |
> * | FXT1  compressed format|  4  |  4  |  4  |  4  |  4  |
> * | Depth Buffer   |  2  |  2  |  2  |  4  |  4  |
> * | Separate Stencil Buffer| N/A | N/A | N/A |  4  |  8  |
> * | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4  |  4  |
> * | All Others |  2  |  2  |  2  |  *  |  *  |
> * +--+
> *
> * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
> * the SURFACE_STATE "Surface Vertical Alignment" field.
> */
> 
> Original included also a reference to bspec:
> 
> * From the "Alignment Unit Size" section of various specs, namely:
> * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
> * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
> * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
> * - BSpec (for Ivybridge and slight variations in separate stencil)
> 
> 

Original commit:

commit e7e81714f3ecf67a975d35e74bdb7fd15d924e4d
Author: Kenneth Graunke 
Date:   Mon Nov 7 15:58:43 2011 -0800

i965: Implement the actual tables for texture alignment units [v2]

I implemented functions for horizontal/vertical alignment units separately
because I find it easier to read that way...especially with all the
corner-cases.

[chad] Corrected the vertical alignment calculation by checking for
depthstencil formats.

v2:
   - Fix typos in intel_horizontal_texture_alignment_unit():
 s/height/width/ and s/VALIGN/HALIGN.
   - Remove special case for compressed formats in
 intel_get_texture_alignment unit(). Compressed formats are already
 handled in the halign and valign functions.
   - Replace check ``_mesa_is_depth_format(...) ||
 _mesa_is_depthstencil_format(...)`` with explcitit checks against
 GL_DEPTH_COMPONENT and GL_DEPTH_STENCIL.

Reviewed-by: Eric Anholt 
Signed-off-by: Kenneth Graunke 
Signed-off-by: Chad Versace

Re: [Mesa-dev] [v2 37/39] intel/isl/gen7/hack: Use stencil vertical alignment of 8 instead of 4

On Tue, May 09, 2017 at 02:29:13PM +0300, Pohjolainen, Topi wrote:
> On Tue, May 09, 2017 at 08:25:28AM +0300, Pohjolainen, Topi wrote:
> > On Mon, May 08, 2017 at 03:31:55PM -0700, Jason Ekstrand wrote:
> > > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen 
> > >  > > > wrote:
> > > 
> > > > Signed-off-by: Topi Pohjolainen 
> > > > ---
> > > >  src/intel/isl/isl_gen7.c | 6 +-
> > > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
> > > > index 18687b5..cf5b377 100644
> > > > --- a/src/intel/isl/isl_gen7.c
> > > > +++ b/src/intel/isl/isl_gen7.c
> > > > @@ -375,7 +375,11 @@ gen7_choose_valign_el(const struct isl_device *dev,
> > > > * FINISHME(chadv): Decide to set valign=4 or valign=8 after 
> > > > isl's
> > > > API
> > > > * is more polished.
> > > > */
> > > > -  require_valign4 = true;
> > > > +
> > > > +  /* Using valign 4 upsets all depthstencil-render-miplevels on IVB
> > > > and
> > > > +   * HSW. Use alignment 8 instead.
> > > > +   */
> > > > +  return 8;
> > > >
> > > 
> > > I'm confused.  This seems to contradict the documentation which says that
> > > valign for depth is hard-coded to 4 in the hardware.
> > 
> > Just as I am. It took me a while to actually try increasing the alignment - 
> > it
> > was just after I compared what the current logic in i965 does. The thing 
> > here
> > is not the alignment itself but the amount of space that gets allocated. 
> > I'll
> > get back with more details shortly.
> 
> In intel_miptree_set_alignment() we have:
> 
>} else if (mt->format == MESA_FORMAT_S_UINT8) {
>   mt->halign = 8;
>   mt->valign = brw->gen >= 7 ? 8 : 4;
>} else {
> 
> There are a few refactors it seems but I'll try to find the original commit.

Well that looks to go a long way back to a file named intel_tex_layout.c.
Some documentation is still left:

   /**
* +--+
* || alignment unit height ("j") |
* | Surface Property   |-|
* || 915 | 965 | ILK | SNB | IVB |
* +--+
* | BC1-5 compressed format (DXTn/S3TC)|  4  |  4  |  4  |  4  |  4  |
* | FXT1  compressed format|  4  |  4  |  4  |  4  |  4  |
* | Depth Buffer   |  2  |  2  |  2  |  4  |  4  |
* | Separate Stencil Buffer| N/A | N/A | N/A |  4  |  8  |
* | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4  |  4  |
* | All Others |  2  |  2  |  2  |  *  |  *  |
* +--+
*
* Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
* the SURFACE_STATE "Surface Vertical Alignment" field.
*/

Original included also a reference to bspec:

* From the "Alignment Unit Size" section of various specs, namely:
* - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
* - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
* - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
* - BSpec (for Ivybridge and slight variations in separate stencil)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL



On 09.05.2017 12:59, Philipp Zabel wrote:

On Tue, 2017-05-09 at 12:48 +0200, Volker Vogelhuber wrote:
[...]

Ok thanks for the clarification. There is only one missing part for the
GL_TEXTURE_2D case. The second EGLImage is created internally when
calling eglCreateImage with EGL_LINUX_DMA_BUF_EXT, so I only
have one return value I can bind to a texture using
glEGLImageTargetTexture2DOES.
Is there any "get" function to access the ARGB eglimage to bind it to
another
texture?

You create two separate EGLImages, calling eglCreateImage once for each
plane. See for example:

https://cgit.freedesktop.org/gstreamer/gst-plugins-bad/tree/gst-libs/gst/gl/gstglupload.c#n646

or

https://cgit.freedesktop.org/wayland/weston/tree/libweston/gl-renderer.c#n1536

That assumes I have two planes available. But as mentioned the planes
are created internally by dri2_create_image_dma_buf during my call of 
eglCreateImage.
So from the API point of view I only have one FD from the V4L2 buffer 
that is of type
FOURCC('Y','U','Y','V'). Passing that FD to eglCreateImage returns only 
one EGLImage.
Although understanding gstreamer code is always a bit hard, I think in 
their case they have

a mulit plane V4L2 buffer, where I only have a single plane buffer.
That's why I wonder if there is a way to retrieve the "second" EGLImage 
after eglCreateImage

has been called for the single V4L2 DMABUF file descriptor.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [v2 37/39] intel/isl/gen7/hack: Use stencil vertical alignment of 8 instead of 4

On Tue, May 09, 2017 at 08:25:28AM +0300, Pohjolainen, Topi wrote:
> On Mon, May 08, 2017 at 03:31:55PM -0700, Jason Ekstrand wrote:
> > On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen  > > wrote:
> > 
> > > Signed-off-by: Topi Pohjolainen 
> > > ---
> > >  src/intel/isl/isl_gen7.c | 6 +-
> > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
> > > index 18687b5..cf5b377 100644
> > > --- a/src/intel/isl/isl_gen7.c
> > > +++ b/src/intel/isl/isl_gen7.c
> > > @@ -375,7 +375,11 @@ gen7_choose_valign_el(const struct isl_device *dev,
> > > * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's
> > > API
> > > * is more polished.
> > > */
> > > -  require_valign4 = true;
> > > +
> > > +  /* Using valign 4 upsets all depthstencil-render-miplevels on IVB
> > > and
> > > +   * HSW. Use alignment 8 instead.
> > > +   */
> > > +  return 8;
> > >
> > 
> > I'm confused.  This seems to contradict the documentation which says that
> > valign for depth is hard-coded to 4 in the hardware.
> 
> Just as I am. It took me a while to actually try increasing the alignment - it
> was just after I compared what the current logic in i965 does. The thing here
> is not the alignment itself but the amount of space that gets allocated. I'll
> get back with more details shortly.

In intel_miptree_set_alignment() we have:

   } else if (mt->format == MESA_FORMAT_S_UINT8) {
  mt->halign = 8;
  mt->valign = brw->gen >= 7 ? 8 : 4;
   } else {

There are a few refactors it seems but I'll try to find the original commit.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL

2017-05-09 Thread Philipp Zabel

On Tue, 2017-05-09 at 12:48 +0200, Volker Vogelhuber wrote:
[...]
> Ok thanks for the clarification. There is only one missing part for the
> GL_TEXTURE_2D case. The second EGLImage is created internally when
> calling eglCreateImage with EGL_LINUX_DMA_BUF_EXT, so I only
> have one return value I can bind to a texture using 
> glEGLImageTargetTexture2DOES.
> Is there any "get" function to access the ARGB eglimage to bind it to 
> another
> texture?

You create two separate EGLImages, calling eglCreateImage once for each
plane. See for example:

https://cgit.freedesktop.org/gstreamer/gst-plugins-bad/tree/gst-libs/gst/gl/gstglupload.c#n646

or

https://cgit.freedesktop.org/wayland/weston/tree/libweston/gl-renderer.c#n1536

regards
Philipp

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL




On 09.05.2017 11:42, Philipp Zabel wrote:

On Tue, 2017-05-09 at 12:31 +0300, Tapani Pälli wrote:

On 05/09/2017 12:29 PM, Tapani Pälli wrote:


On 05/09/2017 12:14 PM, Volker Vogelhuber wrote:

Hi,

first sorry, for missing the subject in my mail to the mailing list,
then thanks
for the hint with the "ext_image_dma_buf_import-sample_yuv".
Unfortunately
things don't become clearer. In the samples as far as I can see, there
is also
only one sampler defined in the shader. So how are the different
planes accessed
in the shader? In the example only a simple copy is done:
gl_FragColor = texture2D(sampler, texcoords);
In the comment in intel_screen.c it says:

/* For YUYV buffers, we set up two overlapping DRI images and treat
  * them as planar buffers in the compositors.  Plane 0 is GR88 and
  * samples YU or YV pairs and places Y into the R component, while
  * plane 1 is ARGB and samples YUYV clusters and places pairs and
  * places U into the G component and V into A.  This lets the
  * texture sampler interpolate the Y components correctly when
  * sampling from plane 0, and interpolate U and V correctly when
  * sampling from plane 1. */

So how are the pixels transfered from the YUYV memory to the vec4 in
the shader?
Do I have to calculate the chroma values by interpolating myself based
on the
current texture coordinate? Or is it done automatically in some way?
  From my
experience the texture call only returns the values from plane0 which
leads me to the
suspicion that I have to interpolate myself. But why is there then the
plane 1 which
don't seem to be accessible in the shader?

BTW: I'm using the OES_EGL_image extension not the
OES_EGL_image_external, so my
sampler is sampler2D not samplerExternalOES but that shouldn't make a
difference should it?

IMO that is a big difference as samplerExternalOES does the YUV2RGB
conversion and returns RGB values for you.

I have to add that "this is how I think it works", I haven't tried this
myself :)

This is correct. The OES_EGL_image_external extension states:

"Sampling an external texture will return an RGBA vector in the same
 colorspace as the source image.  If the source image is stored in YUV
 (or some other basis) then the YUV values will be transformed to RGB
 values. (But these RGB values will be in the same colorspace as the
 original image."

Whereas when using a GL_TEXTURE_2D target, you have to do the conversion
yourself, sampling from to separate textures specified from two
EGLImages (one RG88 for Y, and one ARGB for UV, as described above).



Ok thanks for the clarification. There is only one missing part for the
GL_TEXTURE_2D case. The second EGLImage is created internally when
calling eglCreateImage with EGL_LINUX_DMA_BUF_EXT, so I only
have one return value I can bind to a texture using 
glEGLImageTargetTexture2DOES.
Is there any "get" function to access the ARGB eglimage to bind it to 
another

texture?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: avoid fence fd dup in EGL layer

2017-05-09 Thread Tapani Pälli




On 05/05/2017 10:14 AM, Randy Xu wrote:

Follow up "i965: Solve Android native fence fd double close"
The _EGLSync.SyncFd is not neccesary to keep after pass to
dri driver.

Test: Run Vulkan and GLES stress test and no crash.


Yep I've verified as well that no crashes happen. Lots of tearing though 
but that happens independent of this patch so I'm assuming it's a bug 
elsewhere.




---
  src/egl/drivers/dri2/egl_dri2.c  | 10 ++
  src/mesa/drivers/dri/i965/brw_sync.c |  2 +-
  2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 0be7132..9ef35d3 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -2637,6 +2637,7 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
   free(dri2_sync);
   return NULL;
}
+  dri2_sync->base.SyncFd = EGL_NO_NATIVE_FENCE_FD_ANDROID;
break;
 }
  
@@ -2678,24 +2679,25 @@ dri2_dup_native_fence_fd(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync)


please rename this to dri2_get_native_fence_fd


  {
 struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
 struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
+   EGLint SyncFd = sync->SyncFd;
  
 assert(sync->Type == EGL_SYNC_NATIVE_FENCE_ANDROID);
  
-   if (sync->SyncFd == EGL_NO_NATIVE_FENCE_FD_ANDROID) {

+   if (SyncFd == EGL_NO_NATIVE_FENCE_FD_ANDROID) {
/* try to retrieve the actual native fence fd.. if rendering is
 * not flushed this will just return -1, aka NO_NATIVE_FENCE_FD:
 */
-  sync->SyncFd = dri2_dpy->fence->get_fence_fd(dri2_dpy->dri_screen,
+  SyncFd = dri2_dpy->fence->get_fence_fd(dri2_dpy->dri_screen,
 dri2_sync->fence);


code indentation went wrong here


 }
  
-   if (sync->SyncFd == EGL_NO_NATIVE_FENCE_FD_ANDROID) {

+   if (SyncFd == EGL_NO_NATIVE_FENCE_FD_ANDROID) {
/* if native fence fd still not created, return an error: */
_eglError(EGL_BAD_PARAMETER, "eglDupNativeFenceFDANDROID");
return EGL_NO_NATIVE_FENCE_FD_ANDROID;
 }
  
-   return dup(sync->SyncFd);

+   return SyncFd;
  }
  
  static EGLint

diff --git a/src/mesa/drivers/dri/i965/brw_sync.c 
b/src/mesa/drivers/dri/i965/brw_sync.c
index a8356c3..5b78503 100644
--- a/src/mesa/drivers/dri/i965/brw_sync.c
+++ b/src/mesa/drivers/dri/i965/brw_sync.c
@@ -470,7 +470,7 @@ brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
   goto fail;
 } else {
/* Import the sync fd as an in-fence. */
-  fence->sync_fd = dup(fd);
+  fence->sync_fd = fd;
 }
  
 assert(fence->sync_fd != -1);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 0/4] Disable glthread if libX11 isn't thread-safe

2017-05-09 Thread Gregory Hainaut

On 5/8/17, Emil Velikov  wrote:
> Having a look at Xlib might be good indeed.
>
> Then again, the solution you've proposed looks perfectly reasonable,
> IMHO. It handles the problem _now_ and should also work when/if we
> address Xlib.
> I'll take another look today/tomorrow, but I think the series is
> perfectly fine to land as-is.
>
> Thanks
> Emil

Hello Emil,

Yes you're right. And potentially it can be back-ported to stable
branch. Besides it would allow to know which applications aren't X
thread safe. And potentially app owners can fix the issue too.

By the way, I don't have commit access so feel free to push the series :)

On 5/8/17, Eric Anholt  wrote:
> gregory hainaut  writes:
>
>> On Fri, 5 May 2017 17:45:01 +0200
>> Axel Davy  wrote:
>>
>>> Hi,
>>>
>>> There should be very few X11 calls while rendering (basically only at
>>> the beginning or end of a frame).
>>>
>>> Why not just always run these calls in the main thread (and wait for
>>> glthread work to finish) ?
>>>
>>> That's basically what we do for gallium nine.
>>>
>>> Yours,
>>>
>>> Axel
>>
>> Hello Axel,
>>
>> Yes it is another possibility. It would requires to track gl calls that
>> end up in X11.
>> I'm not sure if there is an easy way to list all those gl functions. There
>> are at least the
>> draw calls and maybe the clear operations. Besides I'm afraid that we will
>> need to handle
>> various corner cases of the OpenGL API. It is doable but likely more
>> complicated.
>
> General GL calls (draws, clears) won't call X11 except for DRI2's
> GetBuffers.  If you're in DRI3, I believe you won't need to worry about
> that at all.
>
> I think this patch is a good start, though.
>

Hello Eric,

Indeed I saw this behavior on Nouveau. I got a crash on
GetBuffersWithFormat on DRI2 but it was fine on DRI3. I still kept the
check on GLX/DRI3 because I don't know if we have a strong guarantee
that X11 is never used.

Cheers,
Gregory
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

2017-05-09 Thread Thomas Hellstrom

On 05/09/2017 11:29 AM, Michel Dänzer wrote:
> On 09/05/17 06:12 PM, Thomas Hellstrom wrote:
>> On 05/09/2017 10:47 AM, Michel Dänzer wrote:
>>> On 09/05/17 05:32 PM, Thomas Hellstrom wrote:
 On 05/09/2017 10:05 AM, Michel Dänzer wrote:
> On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
>> Increases performance on vmwgfx since we're avoiding full buffer damage 
>> and
>> since we can't sync to vertical retrace anyway.
>>
>> Signed-off-by: Thomas Hellstrom 
>> ---
>>  src/mesa/drivers/dri/common/drirc | 7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/common/drirc 
>> b/src/mesa/drivers/dri/common/drirc
>> index 14d7713..a8f2ccf 100644
>> --- a/src/mesa/drivers/dri/common/drirc
>> +++ b/src/mesa/drivers/dri/common/drirc
>> @@ -137,4 +137,11 @@ TODO: document the other workarounds.
>>  
>>  
>>  
>> +
>> +
>> +
>> +
>> +
>> +
>> +
>>  
>>
> Why restrict this to gnome-shell? Wouldn't any application using these
> extensions be affected by the same issues?
 So far I've only looked into gnome-shell because we had a noticeable
 slowdown. The special thing with gnome-shell is that if
 GLX_EXT_buffer_age is available, it prefers a "swapbuffer with damage"
 path over copy_sub_buffer. Unfortunately the "swapbuffer with damage"
 path is implemented as ordinary swapbuffer on GLX. Not so on EGL. For
 real hardware this is a gain, I believe, since they end up page-flipping
 so I can't really claim gnome-shell is doing something wrong.
>>> I don't think it's directly related to "SwapBuffers with damage" or page
>>> flipping. The idea of GLX_EXT_buffer_age is that it lets the application
>>> know if and when the current back buffer was already used as a back
>>> buffer previously. Based on this, the application can know that it
>>> doesn't need to draw to some areas of the back buffer, because they
>>> already have the desired contents.
>>>
>>> Is the problem maybe that you need to read back the back buffer contents
>>> from the host if the application doesn't fully clear it?
>> I don't think so. My understanding of the code
>>
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__git.gnome.org_browse_mutter_tree_clutter_clutter_cogl_clutter-2Dstage-2Dcogl.c=DwIDaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=Vv2TIj9fJv0pacKZnnUx2Rt4X0PVVVwfADJnlVmGevM=EtclRrb_yCMinIRtc71MvU1RnMFslfRza8lwX3vc1cM=
>>  
>>
>> is that when gnome-shell (based on what you write above) decides it
>> doesn't need to draw some areas of the back buffer, it eventually ends
>> up calling  cogl_onscreen_swap_buffers_with_damage(). And the cogl GLX
>> backend ends up calling glxSwapBuffers(), in effect damaging the full
>> back buffer.
>>
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__git.gnome.org_browse_mutter_tree_cogl_cogl_winsys_cogl-2Dwinsys-2Dglx.c-23n2321=DwIDaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=Vv2TIj9fJv0pacKZnnUx2Rt4X0PVVVwfADJnlVmGevM=JtgpIf2My3zGO2lUWC7-3py5Hgpk5QOOxlUYOjhg-zE=
>>  
> Yes, this is as intended with GLX_EXT_buffer_age; if there was actual
> "SwapBuffers with damage" functionality, GLX_EXT_buffer_age would be
> kind of pointless, since the application could just only draw and swap
> the areas which changed since last time.
>
> Without page flipping, the "undamaged" areas will be unnecessarily
> blitted from back to front buffer, but GLX_EXT_buffer_age still provides
> some savings (compared to drawing the whole buffer and calling
> glXSwapBuffers) via the application not drawing those areas in the first
> place.
>
> In summary, gnome-shell is using GLX_EXT_buffer_age exactly as intended,
> I wouldn't expect other applications to use it any differently.

True, but OTOH I'm not sure other applications would provide such a
performance benefit by switching to copy_sub_buffer()
if it's *not* present. Also (although I'm not sure dri3 server-side
implements this), but I guess even vmware could to true
"page-flipping" in the sense of saving a copy, if rendering to a
redirected window.


>
 For the OML_sync_control extension, some applications may actually notcd
 behave worse with this extension present, even if the time source is
 provided by the Xserver present extension "fake" backend.
 In the gnome-shell case it's a slowdown, though.
>>> The Present fake CRTC ticks at 1 Hz, so it'll presumably slow down any
>>> application which is otherwise usable. :)
>>>
>>>
>> But the 1Hz clock is only when switched away or blanked, right?
>> Otherwise it should be a 60Hz clock. At least glxgears runs faithfully
>> at what it believes is 60Hz on Xwayland/glamor/dri3 and Xorg/vmwgfx/dri3
>> (yet to be pushed).
> Oh, I see present_fake_screen_init sets the interval to 1/60 s if the
>

Re: [Mesa-dev] [PATCH 1/3] egl/x11: Honor the EGL_PLATFORM_X11_SCREEN_EXT attribute

On 3 May 2017 at 16:57, Adam Jackson  wrote:
> Signed-off-by: Adam Jackson 
> ---
>  src/egl/drivers/dri2/platform_x11.c |  2 +-
>  src/egl/main/egldisplay.c   | 19 ---
>  src/egl/main/egldisplay.h   |  1 +
>  3 files changed, 14 insertions(+), 8 deletions(-)
>
> diff --git a/src/egl/drivers/dri2/platform_x11.c 
> b/src/egl/drivers/dri2/platform_x11.c
> index 2f1086e28f..c78656a5be 100644
> --- a/src/egl/drivers/dri2/platform_x11.c
> +++ b/src/egl/drivers/dri2/platform_x11.c
> @@ -1191,7 +1191,7 @@ dri2_get_xcb_connection(_EGLDriver *drv, _EGLDisplay 
> *disp,
>  struct dri2_egl_display *dri2_dpy)
>  {
> xcb_screen_iterator_t s;
> -   int screen = 0;
> +   int screen = (uintptr_t)disp->Options.Platform;
> const char *msg;
>
Seems like we don't propagate the screen to the DRI module. See the
createNewScreen* calls in dri2_create_screen().
Not a huge deal breaker, but can we fix that?

Pardon for missing it last round :-\

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] bin/get-fixes-pick-list.sh: don't warn if more than one, go over them

On 8 May 2017 at 14:49, Andres Gomez  wrote:
> On Mon, 2017-05-08 at 13:41 +0100, Emil Velikov wrote:
>> On 8 May 2017 at 11:52, Andres Gomez  wrote:
>> > On Mon, 2017-05-08 at 10:56 +0100, Emil Velikov wrote:
>> > > On 6 May 2017 at 15:09, Andres Gomez  wrote:
>> > > > If an identified commit was having more than one fix, we would warn
>> > > > about that and only treat the first.
>> > > >
>> > > > Now, we don't warn but treat all of them.
>> > > >
>> > >
>> > > Having read through this a couple of times, I'm not sure I fully parse 
>> > > it.
>> > >
>> > > Are you saying that you want to flesh out all the commits when there's
>> > > multiple Fixes tags?
>> >
>> > I'm not sure I'm understanding this previous sentence ...
>> >
>>
>> What is the exact intent of the patch?
>>
>> > > AFIACT that is not a wise idea, since a handful are corner cases like:
>> > >  - Fixes:\n $sha, Fixes: $piglit
>> > >  - Fixes: $sha.*Fixes:
>> > > and others.
>> >
>> > I'm unsure I get which is the problem you see in those 2 examples. At
>> > least, not if it is not a problem that was already happening with the
>> > cases in which only one "Fixes" tag was found.
>> >
>> > Maybe you can elaborate on that?
>> >
>>
>> The current WARNING approach was deliberately introduced. If commit
>> has multiple tags that means:
>>  - It might need to be split - depending on how severe the issue is
>>  - something has gone wrong - typo, miss-nomination, etc and would
>> require a few seconds more than the ordinary nominations.
>>
>> AFAICT your goal is to have the script produce a dummy list that one
>> can copy/paste, but we do not want that here.
>
> Not really.
>
> The goal of the patch is not producing as output commits that only fix
> commits that have not made it for the branch.
>
Right, thanks for the correction. This is exactly why I asked for
clarification ;-)


> The only cases in which I see this could cause a regression are these:
>   - Fixes:\n $sha, Fixes: whatever
>   - Fixes: whatever.*$sha, Fixes:
> whatever
>
> Which, in my eyes, are not any different to these already problematic
> cases:
>   - Fixes:\n $sha
>   - Fixes: whatever.*$sha
>
> Those need to be faced but I still don't see an easy way to solve them.
>
Indeed they are not that different. Note that the original script
_will_ flag the former while the updated one will not.

> In other words, I will try to follow up with another patch(es) to
> tackle those but my conclusion is that these scripts are only helpers
> and the need to go over the commit list was needed before and is needed
> after this patch.
>
Yes, they are - which is why I struggle to see why one will make them
ignore potential uses patches :-\
As said before - there is less than 5 instances flagged up every few
weeks. And with the cherry-ignore additions (thanks for those) noise
will be kept to minimum.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL

2017-05-09 Thread Philipp Zabel

On Tue, 2017-05-09 at 12:31 +0300, Tapani Pälli wrote:
> 
> On 05/09/2017 12:29 PM, Tapani Pälli wrote:
> > 
> > 
> > On 05/09/2017 12:14 PM, Volker Vogelhuber wrote:
> >> Hi,
> >>
> >> first sorry, for missing the subject in my mail to the mailing list, 
> >> then thanks
> >> for the hint with the "ext_image_dma_buf_import-sample_yuv". 
> >> Unfortunately
> >> things don't become clearer. In the samples as far as I can see, there 
> >> is also
> >> only one sampler defined in the shader. So how are the different 
> >> planes accessed
> >> in the shader? In the example only a simple copy is done:
> >> gl_FragColor = texture2D(sampler, texcoords);
> >> In the comment in intel_screen.c it says:
> >>
> >> /* For YUYV buffers, we set up two overlapping DRI images and treat
> >>  * them as planar buffers in the compositors.  Plane 0 is GR88 and
> >>  * samples YU or YV pairs and places Y into the R component, while
> >>  * plane 1 is ARGB and samples YUYV clusters and places pairs and
> >>  * places U into the G component and V into A.  This lets the
> >>  * texture sampler interpolate the Y components correctly when
> >>  * sampling from plane 0, and interpolate U and V correctly when
> >>  * sampling from plane 1. */
> >>
> >> So how are the pixels transfered from the YUYV memory to the vec4 in 
> >> the shader?
> >> Do I have to calculate the chroma values by interpolating myself based 
> >> on the
> >> current texture coordinate? Or is it done automatically in some way? 
> >>  From my
> >> experience the texture call only returns the values from plane0 which 
> >> leads me to the
> >> suspicion that I have to interpolate myself. But why is there then the 
> >> plane 1 which
> >> don't seem to be accessible in the shader?
> >>
> >> BTW: I'm using the OES_EGL_image extension not the 
> >> OES_EGL_image_external, so my
> >> sampler is sampler2D not samplerExternalOES but that shouldn't make a 
> >> difference should it?
> > 
> > IMO that is a big difference as samplerExternalOES does the YUV2RGB 
> > conversion and returns RGB values for you.
> 
> I have to add that "this is how I think it works", I haven't tried this 
> myself :)

This is correct. The OES_EGL_image_external extension states:

   "Sampling an external texture will return an RGBA vector in the same
colorspace as the source image.  If the source image is stored in YUV
(or some other basis) then the YUV values will be transformed to RGB
values. (But these RGB values will be in the same colorspace as the
original image."

Whereas when using a GL_TEXTURE_2D target, you have to do the conversion
yourself, sampling from to separate textures specified from two
EGLImages (one RG88 for Y, and one ARGB for UV, as described above).

regards
Philipp

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] bin/*py: honor editorconfig formatting

On 5 May 2017 at 14:09, Andres Gomez  wrote:

"Replace the two stray tabs with respective space."

> Signed-off-by: Andres Gomez 
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/2] bin: use tabs for coding style on *.sh files

On 5 May 2017 at 15:49, Andres Gomez  wrote:
> v2: Instead of changing *.sh, adapt the editorconfig file (Emil).
>
Thank you.

> Signed-off-by: Andres Gomez 
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL

2017-05-09 Thread Tapani Pälli




On 05/09/2017 12:29 PM, Tapani Pälli wrote:



On 05/09/2017 12:14 PM, Volker Vogelhuber wrote:

Hi,

first sorry, for missing the subject in my mail to the mailing list, 
then thanks
for the hint with the "ext_image_dma_buf_import-sample_yuv". 
Unfortunately
things don't become clearer. In the samples as far as I can see, there 
is also
only one sampler defined in the shader. So how are the different 
planes accessed

in the shader? In the example only a simple copy is done:
gl_FragColor = texture2D(sampler, texcoords);
In the comment in intel_screen.c it says:

/* For YUYV buffers, we set up two overlapping DRI images and treat
 * them as planar buffers in the compositors.  Plane 0 is GR88 and
 * samples YU or YV pairs and places Y into the R component, while
 * plane 1 is ARGB and samples YUYV clusters and places pairs and
 * places U into the G component and V into A.  This lets the
 * texture sampler interpolate the Y components correctly when
 * sampling from plane 0, and interpolate U and V correctly when
 * sampling from plane 1. */

So how are the pixels transfered from the YUYV memory to the vec4 in 
the shader?
Do I have to calculate the chroma values by interpolating myself based 
on the
current texture coordinate? Or is it done automatically in some way? 
 From my
experience the texture call only returns the values from plane0 which 
leads me to the
suspicion that I have to interpolate myself. But why is there then the 
plane 1 which

don't seem to be accessible in the shader?

BTW: I'm using the OES_EGL_image extension not the 
OES_EGL_image_external, so my
sampler is sampler2D not samplerExternalOES but that shouldn't make a 
difference should it?


IMO that is a big difference as samplerExternalOES does the YUV2RGB 
conversion and returns RGB values for you.


I have to add that "this is how I think it works", I haven't tried this 
myself :)



And I use "texture" instead of "texture2D" because I'm on OpenGL 3.3, 
but that does not seem

to cause any differences.

On 09.05.2017 06:37, Tapani Pälli wrote:

Hi;

Take a look at Piglit test "ext_image_dma_buf_import-sample_yuv", 
(https://cgit.freedesktop.org/piglit). Test creates EGLImage from dma 
buf, binds to a texture and then samples this in a shader with 
samplerExternalOES type from GL_OES_EGL_image_external extension.



On 05/09/2017 01:57 AM, Volker Vogelhuber wrote:

I'm currently trying to render a V4L2 image with OpenGL
on an Intel Apollo Lake using Linux 4.10 and Mesa 13.
Supprisingly I noticed that importing a DMABUF with format
DRM_FORMAT_YUYV into OpenGL using
   eglCreateImage/glEGLImageTargetTexture2DOES
worked out of the box. But I noticed that there seem to be a split into
two textures when importing the buffer. At least the nplanes
variable in the __DRIimage's planar_format is set to 2 and in the
intel_screen.c there is a comment for __DRI_IMAGE_FOURCC_YUYV:
"For YUYV buffers, we set up two overlapping DRI images
   and treat them as planar buffers in the compositors."
So far so good, but how do I access the second texture in the GLSL
shader. I only created one texture object before calling
glEGLImageTargetTexture2DOES with the EGLImage I got from
eglCreateImage. And using the GLSL function texture( source, 
texCoord ) on this

texture samples only the Y and U channel, what seems obvious
as the first plane's texture is created as GL_RG texture.
Can anyone explain to me, how one accesses the second plane.
And if there is an example somewhere how the two planes
have to be sampled to convert the values back to RGB, it would
be nice if someone can send a link to such an example.

Thanks
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL

2017-05-09 Thread Tapani Pälli




On 05/09/2017 12:14 PM, Volker Vogelhuber wrote:

Hi,

first sorry, for missing the subject in my mail to the mailing list, 
then thanks

for the hint with the "ext_image_dma_buf_import-sample_yuv". Unfortunately
things don't become clearer. In the samples as far as I can see, there 
is also
only one sampler defined in the shader. So how are the different planes 
accessed

in the shader? In the example only a simple copy is done:
gl_FragColor = texture2D(sampler, texcoords);
In the comment in intel_screen.c it says:

/* For YUYV buffers, we set up two overlapping DRI images and treat
 * them as planar buffers in the compositors.  Plane 0 is GR88 and
 * samples YU or YV pairs and places Y into the R component, while
 * plane 1 is ARGB and samples YUYV clusters and places pairs and
 * places U into the G component and V into A.  This lets the
 * texture sampler interpolate the Y components correctly when
 * sampling from plane 0, and interpolate U and V correctly when
 * sampling from plane 1. */

So how are the pixels transfered from the YUYV memory to the vec4 in the 
shader?
Do I have to calculate the chroma values by interpolating myself based 
on the
current texture coordinate? Or is it done automatically in some way? 
 From my
experience the texture call only returns the values from plane0 which 
leads me to the
suspicion that I have to interpolate myself. But why is there then the 
plane 1 which

don't seem to be accessible in the shader?

BTW: I'm using the OES_EGL_image extension not the 
OES_EGL_image_external, so my
sampler is sampler2D not samplerExternalOES but that shouldn't make a 
difference should it?


IMO that is a big difference as samplerExternalOES does the YUV2RGB 
conversion and returns RGB values for you.


And I use "texture" instead of "texture2D" because I'm on OpenGL 3.3, 
but that does not seem

to cause any differences.

On 09.05.2017 06:37, Tapani Pälli wrote:

Hi;

Take a look at Piglit test "ext_image_dma_buf_import-sample_yuv", 
(https://cgit.freedesktop.org/piglit). Test creates EGLImage from dma 
buf, binds to a texture and then samples this in a shader with 
samplerExternalOES type from GL_OES_EGL_image_external extension.



On 05/09/2017 01:57 AM, Volker Vogelhuber wrote:

I'm currently trying to render a V4L2 image with OpenGL
on an Intel Apollo Lake using Linux 4.10 and Mesa 13.
Supprisingly I noticed that importing a DMABUF with format
DRM_FORMAT_YUYV into OpenGL using
   eglCreateImage/glEGLImageTargetTexture2DOES
worked out of the box. But I noticed that there seem to be a split into
two textures when importing the buffer. At least the nplanes
variable in the __DRIimage's planar_format is set to 2 and in the
intel_screen.c there is a comment for __DRI_IMAGE_FOURCC_YUYV:
"For YUYV buffers, we set up two overlapping DRI images
   and treat them as planar buffers in the compositors."
So far so good, but how do I access the second texture in the GLSL
shader. I only created one texture object before calling
glEGLImageTargetTexture2DOES with the EGLImage I got from
eglCreateImage. And using the GLSL function texture( source, texCoord 
) on this

texture samples only the Y and U channel, what seems obvious
as the first plane's texture is created as GL_RG texture.
Can anyone explain to me, how one accesses the second plane.
And if there is an example somewhere how the two planes
have to be sampled to convert the values back to RGB, it would
be nice if someone can send a link to such an example.

Thanks
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

On 09/05/17 06:12 PM, Thomas Hellstrom wrote:
> On 05/09/2017 10:47 AM, Michel Dänzer wrote:
>> On 09/05/17 05:32 PM, Thomas Hellstrom wrote:
>>> On 05/09/2017 10:05 AM, Michel Dänzer wrote:
 On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
> Increases performance on vmwgfx since we're avoiding full buffer damage 
> and
> since we can't sync to vertical retrace anyway.
>
> Signed-off-by: Thomas Hellstrom 
> ---
>  src/mesa/drivers/dri/common/drirc | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/common/drirc 
> b/src/mesa/drivers/dri/common/drirc
> index 14d7713..a8f2ccf 100644
> --- a/src/mesa/drivers/dri/common/drirc
> +++ b/src/mesa/drivers/dri/common/drirc
> @@ -137,4 +137,11 @@ TODO: document the other workarounds.
>  
>  
>  
> +
> +
> +
> +
> +
> +
> +
>  
>
 Why restrict this to gnome-shell? Wouldn't any application using these
 extensions be affected by the same issues?
>>> So far I've only looked into gnome-shell because we had a noticeable
>>> slowdown. The special thing with gnome-shell is that if
>>> GLX_EXT_buffer_age is available, it prefers a "swapbuffer with damage"
>>> path over copy_sub_buffer. Unfortunately the "swapbuffer with damage"
>>> path is implemented as ordinary swapbuffer on GLX. Not so on EGL. For
>>> real hardware this is a gain, I believe, since they end up page-flipping
>>> so I can't really claim gnome-shell is doing something wrong.
>> I don't think it's directly related to "SwapBuffers with damage" or page
>> flipping. The idea of GLX_EXT_buffer_age is that it lets the application
>> know if and when the current back buffer was already used as a back
>> buffer previously. Based on this, the application can know that it
>> doesn't need to draw to some areas of the back buffer, because they
>> already have the desired contents.
>>
>> Is the problem maybe that you need to read back the back buffer contents
>> from the host if the application doesn't fully clear it?
> 
> I don't think so. My understanding of the code
> 
> https://git.gnome.org/browse/mutter/tree/clutter/clutter/cogl/clutter-stage-cogl.c
> 
> is that when gnome-shell (based on what you write above) decides it
> doesn't need to draw some areas of the back buffer, it eventually ends
> up calling  cogl_onscreen_swap_buffers_with_damage(). And the cogl GLX
> backend ends up calling glxSwapBuffers(), in effect damaging the full
> back buffer.
> 
> https://git.gnome.org/browse/mutter/tree/cogl/cogl/winsys/cogl-winsys-glx.c#n2321

Yes, this is as intended with GLX_EXT_buffer_age; if there was actual
"SwapBuffers with damage" functionality, GLX_EXT_buffer_age would be
kind of pointless, since the application could just only draw and swap
the areas which changed since last time.

Without page flipping, the "undamaged" areas will be unnecessarily
blitted from back to front buffer, but GLX_EXT_buffer_age still provides
some savings (compared to drawing the whole buffer and calling
glXSwapBuffers) via the application not drawing those areas in the first
place.

In summary, gnome-shell is using GLX_EXT_buffer_age exactly as intended,
I wouldn't expect other applications to use it any differently.


>>> For the OML_sync_control extension, some applications may actually notcd
>>> behave worse with this extension present, even if the time source is
>>> provided by the Xserver present extension "fake" backend.
>>> In the gnome-shell case it's a slowdown, though.
>> The Present fake CRTC ticks at 1 Hz, so it'll presumably slow down any
>> application which is otherwise usable. :)
>>
>>
> But the 1Hz clock is only when switched away or blanked, right?
> Otherwise it should be a 60Hz clock. At least glxgears runs faithfully
> at what it believes is 60Hz on Xwayland/glamor/dri3 and Xorg/vmwgfx/dri3
> (yet to be pushed).

Oh, I see present_fake_screen_init sets the interval to 1/60 s if the
driver doesn't provide a get_crtc hook.

Then I'm unsure again what the problem is, though — the presentation
timing should be basically the same as on real hardware?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Sampling DRM_FORMAT_YUYV in GLSL


Hi,

first sorry, for missing the subject in my mail to the mailing list, 
then thanks

for the hint with the "ext_image_dma_buf_import-sample_yuv". Unfortunately
things don't become clearer. In the samples as far as I can see, there 
is also
only one sampler defined in the shader. So how are the different planes 
accessed

in the shader? In the example only a simple copy is done:
gl_FragColor = texture2D(sampler, texcoords);
In the comment in intel_screen.c it says:

/* For YUYV buffers, we set up two overlapping DRI images and treat
* them as planar buffers in the compositors.  Plane 0 is GR88 and
* samples YU or YV pairs and places Y into the R component, while
* plane 1 is ARGB and samples YUYV clusters and places pairs and
* places U into the G component and V into A.  This lets the
* texture sampler interpolate the Y components correctly when
* sampling from plane 0, and interpolate U and V correctly when
* sampling from plane 1. */

So how are the pixels transfered from the YUYV memory to the vec4 in the 
shader?
Do I have to calculate the chroma values by interpolating myself based 
on the

current texture coordinate? Or is it done automatically in some way? From my
experience the texture call only returns the values from plane0 which 
leads me to the
suspicion that I have to interpolate myself. But why is there then the 
plane 1 which

don't seem to be accessible in the shader?

BTW: I'm using the OES_EGL_image extension not the 
OES_EGL_image_external, so my
sampler is sampler2D not samplerExternalOES but that shouldn't make a 
difference should it?
And I use "texture" instead of "texture2D" because I'm on OpenGL 3.3, 
but that does not seem

to cause any differences.

On 09.05.2017 06:37, Tapani Pälli wrote:

Hi;

Take a look at Piglit test "ext_image_dma_buf_import-sample_yuv", 
(https://cgit.freedesktop.org/piglit). Test creates EGLImage from dma 
buf, binds to a texture and then samples this in a shader with 
samplerExternalOES type from GL_OES_EGL_image_external extension.



On 05/09/2017 01:57 AM, Volker Vogelhuber wrote:

I'm currently trying to render a V4L2 image with OpenGL
on an Intel Apollo Lake using Linux 4.10 and Mesa 13.
Supprisingly I noticed that importing a DMABUF with format
DRM_FORMAT_YUYV into OpenGL using
   eglCreateImage/glEGLImageTargetTexture2DOES
worked out of the box. But I noticed that there seem to be a split into
two textures when importing the buffer. At least the nplanes
variable in the __DRIimage's planar_format is set to 2 and in the
intel_screen.c there is a comment for __DRI_IMAGE_FOURCC_YUYV:
"For YUYV buffers, we set up two overlapping DRI images
   and treat them as planar buffers in the compositors."
So far so good, but how do I access the second texture in the GLSL
shader. I only created one texture object before calling
glEGLImageTargetTexture2DOES with the EGLImage I got from
eglCreateImage. And using the GLSL function texture( source, texCoord 
) on this

texture samples only the Y and U channel, what seems obvious
as the first plane's texture is created as GL_RG texture.
Can anyone explain to me, how one accesses the second plane.
And if there is an example somewhere how the two planes
have to be sampled to convert the values back to RGB, it would
be nice if someone can send a link to such an example.

Thanks
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

2017-05-09 Thread Thomas Hellstrom

On 05/09/2017 10:47 AM, Michel Dänzer wrote:
> On 09/05/17 05:32 PM, Thomas Hellstrom wrote:
>> On 05/09/2017 10:05 AM, Michel Dänzer wrote:
>>> On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
 Increases performance on vmwgfx since we're avoiding full buffer damage and
 since we can't sync to vertical retrace anyway.

 Signed-off-by: Thomas Hellstrom 
 ---
  src/mesa/drivers/dri/common/drirc | 7 +++
  1 file changed, 7 insertions(+)

 diff --git a/src/mesa/drivers/dri/common/drirc 
 b/src/mesa/drivers/dri/common/drirc
 index 14d7713..a8f2ccf 100644
 --- a/src/mesa/drivers/dri/common/drirc
 +++ b/src/mesa/drivers/dri/common/drirc
 @@ -137,4 +137,11 @@ TODO: document the other workarounds.
  
  
  
 +
 +
 +
 +
 +
 +
 +
  

>>> Why restrict this to gnome-shell? Wouldn't any application using these
>>> extensions be affected by the same issues?
>> So far I've only looked into gnome-shell because we had a noticeable
>> slowdown. The special thing with gnome-shell is that if
>> GLX_EXT_buffer_age is available, it prefers a "swapbuffer with damage"
>> path over copy_sub_buffer. Unfortunately the "swapbuffer with damage"
>> path is implemented as ordinary swapbuffer on GLX. Not so on EGL. For
>> real hardware this is a gain, I believe, since they end up page-flipping
>> so I can't really claim gnome-shell is doing something wrong.
> I don't think it's directly related to "SwapBuffers with damage" or page
> flipping. The idea of GLX_EXT_buffer_age is that it lets the application
> know if and when the current back buffer was already used as a back
> buffer previously. Based on this, the application can know that it
> doesn't need to draw to some areas of the back buffer, because they
> already have the desired contents.
>
> Is the problem maybe that you need to read back the back buffer contents
> from the host if the application doesn't fully clear it?

I don't think so. My understanding of the code

https://git.gnome.org/browse/mutter/tree/clutter/clutter/cogl/clutter-stage-cogl.c

is that when gnome-shell (based on what you write above) decides it
doesn't need to draw some areas of the back buffer, it eventually ends
up calling  cogl_onscreen_swap_buffers_with_damage(). And the cogl GLX
backend ends up calling glxSwapBuffers(), in effect damaging the full
back buffer.

https://git.gnome.org/browse/mutter/tree/cogl/cogl/winsys/cogl-winsys-glx.c#n2321

>
>> For the OML_sync_control extension, some applications may actually notcd
>> behave worse with this extension present, even if the time source is
>> provided by the Xserver present extension "fake" backend.
>> In the gnome-shell case it's a slowdown, though.
> The Present fake CRTC ticks at 1 Hz, so it'll presumably slow down any
> application which is otherwise usable. :)
>
>
But the 1Hz clock is only when switched away or blanked, right?
Otherwise it should be a 60Hz clock. At least glxgears runs faithfully
at what it believes is 60Hz on Xwayland/glamor/dri3 and Xorg/vmwgfx/dri3
(yet to be pushed).

Thanks,

Thomas



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] i965: Drop INTEL_DEBUG=stats.

2017-05-09 Thread Samuel Iglesias Gonsálvez

Series is:

Reviewed-by: Samuel Iglesias Gonsálvez 

On Tue, 2017-05-09 at 01:48 -0700, Kenneth Graunke wrote:
> For whatever reason, we had an INTEL_DEBUG=stats option that enabled
> various statistics counters on Gen4-5 systems.  It's been around
> forever, though I can't think of a single time that it's been useful.
> 
> On Gen6+, we enable statistics all the time because they're necessary
> to support various query object targets.  Turning them off would
> break
> those queries.
> 
> Gen4-5 don't support those queries, so the statistics counters
> generally
> aren't useful; we disabled them by default.  This patch disables them
> altogether.
> ---
>  docs/envvars.html  | 1 -
>  src/intel/common/gen_debug.c   | 1 -
>  src/intel/common/gen_debug.h   | 2 +-
>  src/mesa/drivers/dri/i965/brw_cc.c | 2 +-
>  src/mesa/drivers/dri/i965/brw_clip_state.c | 3 ---
>  src/mesa/drivers/dri/i965/brw_gs_state.c   | 3 ---
>  src/mesa/drivers/dri/i965/brw_sf_state.c   | 3 ---
>  src/mesa/drivers/dri/i965/brw_vs_state.c   | 4 
>  src/mesa/drivers/dri/i965/brw_wm_state.c   | 2 +-
>  9 files changed, 3 insertions(+), 18 deletions(-)
> 
> diff --git a/docs/envvars.html b/docs/envvars.html
> index 05afd2d5529..e075c20536a 100644
> --- a/docs/envvars.html
> +++ b/docs/envvars.html
> @@ -195,7 +195,6 @@ See the Xlib software
> driver page for details.
> spill_fs - force spilling of all registers in the scalar
> backend (useful to debug spilling code)
> spill_vec4 - force spilling of all registers in the vec4
> backend (useful to debug spilling code)
> state - emit messages about state flag tracking
> -   stats - enable statistics counters. you probably actually
> want perfmon or intel_gpu_top instead.
> sync - after sending each batch, emit a message and wait for
> that batch to finish rendering
> tcs - dump shader assembly for tessellation control
> shaders
> tes - dump shader assembly for tessellation evaluation
> shaders
> diff --git a/src/intel/common/gen_debug.c
> b/src/intel/common/gen_debug.c
> index be6fcdb3bdc..f5702f009bc 100644
> --- a/src/intel/common/gen_debug.c
> +++ b/src/intel/common/gen_debug.c
> @@ -57,7 +57,6 @@ static const struct debug_control debug_control[] =
> {
> { "vert",DEBUG_VERTS },
> { "dri", DEBUG_DRI },
> { "sf",  DEBUG_SF },
> -   { "stats",   DEBUG_STATS },
> { "wm",  DEBUG_WM },
> { "urb", DEBUG_URB },
> { "vs",  DEBUG_VS },
> diff --git a/src/intel/common/gen_debug.h
> b/src/intel/common/gen_debug.h
> index c0b74ea2afe..f7f59c9b5d8 100644
> --- a/src/intel/common/gen_debug.h
> +++ b/src/intel/common/gen_debug.h
> @@ -57,7 +57,7 @@ extern uint64_t INTEL_DEBUG;
>  #define DEBUG_VERTS   (1ull << 13)
>  #define DEBUG_DRI (1ull << 14)
>  #define DEBUG_SF  (1ull << 15)
> -#define DEBUG_STATS   (1ull << 16)
> +/* Hole - feel free to reuse  (1ull << 16) */
>  #define DEBUG_WM  (1ull << 17)
>  #define DEBUG_URB (1ull << 18)
>  #define DEBUG_VS  (1ull << 19)
> diff --git a/src/mesa/drivers/dri/i965/brw_cc.c
> b/src/mesa/drivers/dri/i965/brw_cc.c
> index 21b01f3bb18..62e81253cc9 100644
> --- a/src/mesa/drivers/dri/i965/brw_cc.c
> +++ b/src/mesa/drivers/dri/i965/brw_cc.c
> @@ -226,7 +226,7 @@ static void upload_cc_unit(struct brw_context
> *brw)
>    cc->cc2.depth_write_enable = brw_depth_writes_enabled(brw);
> }
>  
> -   if (brw->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS))
> +   if (brw->stats_wm)
>    cc->cc5.statistics_enable = 1;
>  
> /* BRW_NEW_CC_VP */
> diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c
> b/src/mesa/drivers/dri/i965/brw_clip_state.c
> index 5e084a9961d..d5fe2b547fa 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
> @@ -114,9 +114,6 @@ brw_upload_clip_unit(struct brw_context *brw)
>    clip->thread4.max_threads = 1 - 1;
> }
>  
> -   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
> -  clip->thread4.stats_enable = 1;
> -
> /* _NEW_TRANSFORM */
> if (brw->gen == 5 || brw->is_g4x)
>    clip->clip5.userclip_enable_flags = ctx-
> >Transform.ClipPlanesEnabled;
> diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c
> b/src/mesa/drivers/dri/i965/brw_gs_state.c
> index ed9ae44bcdb..72ad044f6c7 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
> @@ -80,9 +80,6 @@ brw_upload_gs_unit(struct brw_context *brw)
> if (brw->gen == 5)
>    gs->thread4.rendering_enable = 1;
>  
> -   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
> -  gs->thread4.stats_enable = 1;
> -
> /* BRW_NEW_VIEWPORT_COUNT */
> gs->gs6.max_vp_index = brw->clip.viewport_count - 1;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c
>

[Mesa-dev] [PATCH 2/2] i965: Drop INTEL_DEBUG=stats.

For whatever reason, we had an INTEL_DEBUG=stats option that enabled
various statistics counters on Gen4-5 systems.  It's been around
forever, though I can't think of a single time that it's been useful.

On Gen6+, we enable statistics all the time because they're necessary
to support various query object targets.  Turning them off would break
those queries.

Gen4-5 don't support those queries, so the statistics counters generally
aren't useful; we disabled them by default.  This patch disables them
altogether.
---
 docs/envvars.html  | 1 -
 src/intel/common/gen_debug.c   | 1 -
 src/intel/common/gen_debug.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_cc.c | 2 +-
 src/mesa/drivers/dri/i965/brw_clip_state.c | 3 ---
 src/mesa/drivers/dri/i965/brw_gs_state.c   | 3 ---
 src/mesa/drivers/dri/i965/brw_sf_state.c   | 3 ---
 src/mesa/drivers/dri/i965/brw_vs_state.c   | 4 
 src/mesa/drivers/dri/i965/brw_wm_state.c   | 2 +-
 9 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/docs/envvars.html b/docs/envvars.html
index 05afd2d5529..e075c20536a 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -195,7 +195,6 @@ See the Xlib software driver 
page for details.
spill_fs - force spilling of all registers in the scalar backend 
(useful to debug spilling code)
spill_vec4 - force spilling of all registers in the vec4 backend 
(useful to debug spilling code)
state - emit messages about state flag tracking
-   stats - enable statistics counters. you probably actually want perfmon 
or intel_gpu_top instead.
sync - after sending each batch, emit a message and wait for that batch 
to finish rendering
tcs - dump shader assembly for tessellation control shaders
tes - dump shader assembly for tessellation evaluation shaders
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index be6fcdb3bdc..f5702f009bc 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -57,7 +57,6 @@ static const struct debug_control debug_control[] = {
{ "vert",DEBUG_VERTS },
{ "dri", DEBUG_DRI },
{ "sf",  DEBUG_SF },
-   { "stats",   DEBUG_STATS },
{ "wm",  DEBUG_WM },
{ "urb", DEBUG_URB },
{ "vs",  DEBUG_VS },
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index c0b74ea2afe..f7f59c9b5d8 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -57,7 +57,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_VERTS   (1ull << 13)
 #define DEBUG_DRI (1ull << 14)
 #define DEBUG_SF  (1ull << 15)
-#define DEBUG_STATS   (1ull << 16)
+/* Hole - feel free to reuse  (1ull << 16) */
 #define DEBUG_WM  (1ull << 17)
 #define DEBUG_URB (1ull << 18)
 #define DEBUG_VS  (1ull << 19)
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c 
b/src/mesa/drivers/dri/i965/brw_cc.c
index 21b01f3bb18..62e81253cc9 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -226,7 +226,7 @@ static void upload_cc_unit(struct brw_context *brw)
   cc->cc2.depth_write_enable = brw_depth_writes_enabled(brw);
}
 
-   if (brw->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS))
+   if (brw->stats_wm)
   cc->cc5.statistics_enable = 1;
 
/* BRW_NEW_CC_VP */
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c 
b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 5e084a9961d..d5fe2b547fa 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -114,9 +114,6 @@ brw_upload_clip_unit(struct brw_context *brw)
   clip->thread4.max_threads = 1 - 1;
}
 
-   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
-  clip->thread4.stats_enable = 1;
-
/* _NEW_TRANSFORM */
if (brw->gen == 5 || brw->is_g4x)
   clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled;
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c 
b/src/mesa/drivers/dri/i965/brw_gs_state.c
index ed9ae44bcdb..72ad044f6c7 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -80,9 +80,6 @@ brw_upload_gs_unit(struct brw_context *brw)
if (brw->gen == 5)
   gs->thread4.rendering_enable = 1;
 
-   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
-  gs->thread4.stats_enable = 1;
-
/* BRW_NEW_VIEWPORT_COUNT */
gs->gs6.max_vp_index = brw->clip.viewport_count - 1;
 
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c 
b/src/mesa/drivers/dri/i965/brw_sf_state.c
index d50ceb12133..4ba57c30dfd 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -174,9 +174,6 @@ static void upload_sf_unit( struct brw_context *brw )
sf->thread4.max_threads = MIN2(chipset_max_threads,
  brw->urb.nr_sf_entries) - 1;
 
-   if

[Mesa-dev] [PATCH 1/2] i965: Disable ARB_pipeline_statistics_query on Gen4-5.

We apparently enabled this on all platforms in Mesa 10.6.  However, it
was only ever implemented for Gen6+.  The Gen4-5 query code goes up in
flames with an "Unrecognized query target" unreachable() error if you
even attempt to use any of the new functionality.

This wasn't caught because the Piglit tests require OpenGL 3.0, which
Gen4-5 cannot support.  The extension spec does say 3.0 is required,
though I'm not sure why - it seems like 2.1 would work fine.

We could implement it anyway, but it's a little bit of a pain due to the
lack of hardware contexts (so we have to snapshot around batches).

Given that it's been 100% broken for two years and I haven't seen a bug
report about it, I'm not terribly inclined to care.  So, let it go.
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 0133fa10068..26c074638e9 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -65,7 +65,6 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_map_buffer_range = true;
ctx->Extensions.ARB_occlusion_query = true;
ctx->Extensions.ARB_occlusion_query2 = true;
-   ctx->Extensions.ARB_pipeline_statistics_query = true;
ctx->Extensions.ARB_point_sprite = true;
ctx->Extensions.ARB_seamless_cube_map = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
@@ -172,6 +171,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_enhanced_layouts = true;
   ctx->Extensions.ARB_ES3_compatibility = true;
   ctx->Extensions.ARB_fragment_layer_viewport = true;
+  ctx->Extensions.ARB_pipeline_statistics_query = true;
   ctx->Extensions.ARB_sample_shading = true;
   ctx->Extensions.ARB_shading_language_420pack = true;
   ctx->Extensions.ARB_texture_buffer_object = true;
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

On 09/05/17 05:32 PM, Thomas Hellstrom wrote:
> On 05/09/2017 10:05 AM, Michel Dänzer wrote:
>> On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
>>> Increases performance on vmwgfx since we're avoiding full buffer damage and
>>> since we can't sync to vertical retrace anyway.
>>>
>>> Signed-off-by: Thomas Hellstrom 
>>> ---
>>>  src/mesa/drivers/dri/common/drirc | 7 +++
>>>  1 file changed, 7 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/common/drirc 
>>> b/src/mesa/drivers/dri/common/drirc
>>> index 14d7713..a8f2ccf 100644
>>> --- a/src/mesa/drivers/dri/common/drirc
>>> +++ b/src/mesa/drivers/dri/common/drirc
>>> @@ -137,4 +137,11 @@ TODO: document the other workarounds.
>>>  
>>>  
>>>  
>>> +
>>> +
>>> +
>>> +
>>> +
>>> +
>>> +
>>>  
>>>
>> Why restrict this to gnome-shell? Wouldn't any application using these
>> extensions be affected by the same issues?
> 
> So far I've only looked into gnome-shell because we had a noticeable
> slowdown. The special thing with gnome-shell is that if
> GLX_EXT_buffer_age is available, it prefers a "swapbuffer with damage"
> path over copy_sub_buffer. Unfortunately the "swapbuffer with damage"
> path is implemented as ordinary swapbuffer on GLX. Not so on EGL. For
> real hardware this is a gain, I believe, since they end up page-flipping
> so I can't really claim gnome-shell is doing something wrong.

I don't think it's directly related to "SwapBuffers with damage" or page
flipping. The idea of GLX_EXT_buffer_age is that it lets the application
know if and when the current back buffer was already used as a back
buffer previously. Based on this, the application can know that it
doesn't need to draw to some areas of the back buffer, because they
already have the desired contents.

Is the problem maybe that you need to read back the back buffer contents
from the host if the application doesn't fully clear it?


> For the OML_sync_control extension, some applications may actually not
> behave worse with this extension present, even if the time source is
> provided by the Xserver present extension "fake" backend.
> In the gnome-shell case it's a slowdown, though.

The Present fake CRTC ticks at 1 Hz, so it'll presumably slow down any
application which is otherwise usable. :)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

2017-05-09 Thread Thomas Hellstrom

Hi, Michel,

On 05/09/2017 10:05 AM, Michel Dänzer wrote:
> On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
>> Increases performance on vmwgfx since we're avoiding full buffer damage and
>> since we can't sync to vertical retrace anyway.
>>
>> Signed-off-by: Thomas Hellstrom 
>> ---
>>  src/mesa/drivers/dri/common/drirc | 7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/common/drirc 
>> b/src/mesa/drivers/dri/common/drirc
>> index 14d7713..a8f2ccf 100644
>> --- a/src/mesa/drivers/dri/common/drirc
>> +++ b/src/mesa/drivers/dri/common/drirc
>> @@ -137,4 +137,11 @@ TODO: document the other workarounds.
>>  
>>  
>>  
>> +
>> +
>> +
>> +
>> +
>> +
>> +
>>  
>>
> Why restrict this to gnome-shell? Wouldn't any application using these
> extensions be affected by the same issues?
>
>

So far I've only looked into gnome-shell because we had a noticeable
slowdown. The special thing with gnome-shell is that if
GLX_EXT_buffer_age is available, it prefers a "swapbuffer with damage"
path over copy_sub_buffer. Unfortunately the "swapbuffer with damage"
path is implemented as ordinary swapbuffer on GLX. Not so on EGL. For
real hardware this is a gain, I believe, since they end up page-flipping
so I can't really claim gnome-shell is doing something wrong. I have yet
to investigate other compositors, but at this point I can't claim other
compositors and applications are doing the same thing and if we, in that
case, break some actually wanted behaviour by disabling EXT_buffer_age.

For the OML_sync_control extension, some applications may actually not
behave worse with this extension present, even if the time source is
provided by the Xserver present extension "fake" backend.
In the gnome-shell case it's a slowdown, though.

/Thomas

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH RFC 4/4] dri: Turn of a couple of glx extensions for gnome-shell on vmwgfx.

On 05/05/17 11:02 PM, Thomas Hellstrom wrote:
> Increases performance on vmwgfx since we're avoiding full buffer damage and
> since we can't sync to vertical retrace anyway.
> 
> Signed-off-by: Thomas Hellstrom 
> ---
>  src/mesa/drivers/dri/common/drirc | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/common/drirc 
> b/src/mesa/drivers/dri/common/drirc
> index 14d7713..a8f2ccf 100644
> --- a/src/mesa/drivers/dri/common/drirc
> +++ b/src/mesa/drivers/dri/common/drirc
> @@ -137,4 +137,11 @@ TODO: document the other workarounds.
>  
>  
>  
> +
> +
> +
> +
> +
> +
> +
>  
> 

Why restrict this to gnome-shell? Wouldn't any application using these
extensions be affected by the same issues?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: add KHR_no_error support for glUseProgram

2017-05-09 Thread Nicolai Hähnle


On 09.05.2017 03:17, Timothy Arceri wrote:

On 08/05/17 19:36, Nicolai Hähnle wrote:

On 08.05.2017 02:25, Timothy Arceri wrote:

V3: use always_inline attribute (Suggested by Nicolai)

Cc: Nicolai Hähnle 
---
 src/mapi/glapi/gen/gl_API.xml |  2 +-
 src/mesa/main/shaderapi.c | 75
+--
 src/mesa/main/shaderapi.h |  2 ++
 3 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml
b/src/mapi/glapi/gen/gl_API.xml
index a304ac0..50d60f5 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -5493,21 +5493,21 @@
 

 
 
 
 
 
 
 

-
+
 
 
 

 
 
 
 
 
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index eb75a3e..f91cc89 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1841,20 +1841,70 @@ _mesa_ShaderSource(GLuint shaderObj, GLsizei
count,
   free(source);
   source = replacement;
}
 #endif /* ENABLE_SHADER_CACHE */

shader_source(sh, source);

free(offsets);
 }

+/* The ARB_separate_shader_object spec says:
+ *
+ * "The executable code for an individual shader stage is taken
from
+ * the current program for that stage.  If there is a current
program
+ * object established by UseProgram, that program is considered
current
+ * for all stages.  Otherwise, if there is a bound program pipeline
+ * object (section 2.14.PPO), the program bound to the appropriate
+ * stage of the pipeline object is considered current."
+ */
+static ALWAYS_INLINE void
+reference_pipeline_and_use_program(struct gl_context *ctx,
+   struct gl_shader_program *shProg,
+   bool no_error)
+{
+   if (shProg) {
+  /* Attach shader state to the binding point */
+  _mesa_reference_pipeline_object(ctx, >_Shader,
>Shader);
+  /* Update the program */
+  _mesa_use_shader_program(ctx, shProg);
+   } else {
+  /* Must be done first: detach the progam */
+  _mesa_use_shader_program(ctx, shProg);
+  /* Unattach shader_state binding point */
+  _mesa_reference_pipeline_object(ctx, >_Shader,
+  ctx->Pipeline.Default);
+  /* If a pipeline was bound, rebind it */
+  if (ctx->Pipeline.Current) {
+ if (no_error)
+
_mesa_BindProgramPipeline_no_error(ctx->Pipeline.Current->Name);
+ else
+_mesa_BindProgramPipeline(ctx->Pipeline.Current->Name);
+  }
+   }
+}
+
+  \
+
+void GLAPIENTRY
+_mesa_UseProgram_no_error(GLuint program)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg = NULL;
+
+   if (program) {
+  shProg = _mesa_lookup_shader_program(ctx, program);
+   }
+
+   reference_pipeline_and_use_program(ctx, shProg, true);
+}
+


Unless reference_pipeline_and_use_program has a separate use, I'd
actually feel more comfortable if the *entire* body of
_mesa_UseProgram was moved into the ALWAYS_INLINE function. This is
better for maintainability because it would guarantee that any future
change will automatically update both variants of the functions.


That would just result in some ugly code and would not actually reduce
the number of lines of code. The code takes different paths so nothing
would be updated automatically.

I'd really rather not do that, the no error path is just a single
look-up call, this is a pattern that is going to become very common as
more no_error support is added, I'd rather we be aware that we should
check the no_error variants in future when updating/reviewing api
changes rather than adding a bunch of ugly and hard to follow inline
functions everywhere.


The example below really doesn't look that ugly to me, but I admit it's 
a bit of personal taste. I do still prefer the inline variant, but I'm 
not going to insist on it.


Cheers,
Nicolai



For example this is what the body of the function will look like:

   GET_CURRENT_CONTEXT(ctx);
   struct gl_shader_program *shProg = NULL;

   if (no_error) {
  if (program) {
 shProg = _mesa_lookup_shader_program(ctx, program);
   } else {

  if (_mesa_is_xfb_active_and_unpaused(ctx)) {
 _mesa_error(ctx, GL_INVALID_OPERATION,
 "glUseProgram(transform feedback active)");
 return;
  }

  if (program) {
 shProg = _mesa_lookup_shader_program_err(ctx, program,
"glUseProgram");
 if (!shProg) {
return;
 }
 if (!shProg->data->LinkStatus) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glUseProgram(program %u not linked)", program);
return;
 }

 if (ctx->_Shader->Flags & GLSL_USE_PROG) {
print_shader_info(*shProg);
 }
  }
   }

[Mesa-dev] [PATCH] r600/eg: add support for tracing IBs after a hang.

2017-05-09 Thread Dave Airlie

From: Dave Airlie 

This is a poor man's version of radeonsi ddebug stuff, this
should get hooked into that infrastructure, and grow more stuff,
but for now, just create R600_TRACE var that points to a file
that you want to dump the last IB to.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/Makefile.am |   5 +
 src/gallium/drivers/r600/Makefile.sources|   4 +
 src/gallium/drivers/r600/eg_debug.c  | 359 +++
 src/gallium/drivers/r600/egd_tables.py   | 310 +++
 src/gallium/drivers/r600/evergreen_compute.c |  10 +-
 src/gallium/drivers/r600/evergreen_state.c   |  29 +++
 src/gallium/drivers/r600/evergreend.h|  15 +-
 src/gallium/drivers/r600/r600_hw_context.c   |  39 +++
 src/gallium/drivers/r600/r600_pipe.c |   7 +
 src/gallium/drivers/r600/r600_pipe.h |  11 +
 src/gallium/drivers/r600/r600_state_common.c |   3 +
 11 files changed, 785 insertions(+), 7 deletions(-)
 create mode 100644 src/gallium/drivers/r600/eg_debug.c
 create mode 100644 src/gallium/drivers/r600/egd_tables.py

diff --git a/src/gallium/drivers/r600/Makefile.am 
b/src/gallium/drivers/r600/Makefile.am
index 21762d8..44fd51d 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -1,6 +1,11 @@
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
+egd_tables.h: $(srcdir)/egd_tables.py $(srcdir)/evergreend.h
+   $(AM_V_at)$(MKDIR_P) $(@D)
+   $(AM_V_GEN) $(PYTHON2) $(srcdir)/egd_tables.py $(srcdir)/evergreend.h > 
$@
+
+BUILT_SOURCES = $(R600_GENERATED_FILES)
 AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(RADEON_CFLAGS) \
diff --git a/src/gallium/drivers/r600/Makefile.sources 
b/src/gallium/drivers/r600/Makefile.sources
index 8bf8083..2f20652 100644
--- a/src/gallium/drivers/r600/Makefile.sources
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -2,6 +2,7 @@ C_SOURCES = \
compute_memory_pool.c \
compute_memory_pool.h \
eg_asm.c \
+   eg_debug.c \
eg_sq.h \
evergreen_compute.c \
evergreen_compute.h \
@@ -64,3 +65,6 @@ CXX_SOURCES = \
sb/sb_shader.h \
sb/sb_ssa_builder.cpp \
sb/sb_valtable.cpp
+
+R600_GENERATED_FILES = \
+   egd_tables.h
\ No newline at end of file
diff --git a/src/gallium/drivers/r600/eg_debug.c 
b/src/gallium/drivers/r600/eg_debug.c
new file mode 100644
index 000..32a4f23
--- /dev/null
+++ b/src/gallium/drivers/r600/eg_debug.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *  Marek Olšák 
+ */
+#include "r600_pipe.h"
+#include "evergreend.h"
+
+#include "egd_tables.h"
+
+#define AC_IS_TRACE_POINT(x)(((x) & 0xcafe) == 0xcafe)
+#define AC_GET_TRACE_POINT_ID(x)((x) & 0x)
+
+/* Parsed IBs are difficult to read without colors. Use "less -R file" to
+ * read them, or use "aha -b -f file" to convert them to html.
+ */
+#define COLOR_RESET"\033[0m"
+#define COLOR_RED  "\033[31m"
+#define COLOR_GREEN"\033[1;32m"
+#define COLOR_YELLOW   "\033[1;33m"
+#define COLOR_CYAN "\033[1;36m"
+
+#define INDENT_PKT 8
+
+typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
+static void print_spaces(FILE *f, unsigned num)
+{
+   fprintf(f, "%*s", num, "");
+}
+
+static void print_value(FILE *file, uint32_t value, int bits)
+{
+   /* Guess if it's int or float */
+   if (value <= (1 << 15)) {
+   if (value <= 9)
+   fprintf(file, "%u\n", value);
+   else
+   fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
+   } else {
+   float f = uif(value);
+
+   if (fabs(f) < 10 && f*10 == floor(f*10))
+

Re: [Mesa-dev] [v2 36/39] i965: Add isl based miptree creator

On Mon, May 08, 2017 at 04:23:40PM -0700, Jason Ekstrand wrote:
> On Wed, May 3, 2017 at 2:22 AM, Topi Pohjolainen  > wrote:
> 
> > Signed-off-by: Topi Pohjolainen 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 91
> > +++
> >  1 file changed, 91 insertions(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 4ddcb13..ced1e0e 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -575,6 +575,97 @@ intel_lower_compressed_format(struct brw_context
> > *brw, mesa_format format)
> > }
> >  }
> >
> > +static bool
> > +init_mapping_table(GLenum target, unsigned first_level, unsigned
> > last_level,
> > +   unsigned depth0, struct intel_mipmap_level *table)
> > +{
> > +   unsigned level = first_level;
> > +   for ( ; level <= last_level; level++) {
> > +  const unsigned d = target == GL_TEXTURE_3D ? depth0 >> level :
> > depth0;
> > +
> > +  table[level].slice = calloc(d, sizeof(*table[0].slice));
> > +  if (!table[level].slice)
> > + goto fail;
> >
> 
> It's nice to allocate the table, but I see nothing that actually sets it up.

Right, in case of ISL based we need only needed for one purpose:

intel_mipmap_level::intel_mipmap_slice::map.

This is set on-demand by intel_miptree_attach_map() later on.

> 
> 
> > +   }
> > +
> > +   return true;
> > +
> > +fail:
> > +   for (unsigned i = first_level; i < level; i++)
> > +  free(table[level].slice);
> > +
> > +   return false;
> > +}
> > +
> > +static struct intel_mipmap_tree *
> > +make_surface(struct brw_context *brw, GLenum target, mesa_format format,
> > + unsigned first_level, unsigned last_level,
> > + unsigned width0, unsigned height0, unsigned depth0,
> > + unsigned num_samples, enum isl_tiling isl_tiling,
> > + isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags)
> > +{
> > +   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
> > +   if (!mt)
> > +  return NULL;
> > +
> > +   if (target == GL_TEXTURE_CUBE_MAP ||
> > +   target == GL_TEXTURE_CUBE_MAP_ARRAY)
> > +  isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
> > +
> > +   DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
> > +__func__,
> > +   _mesa_enum_to_string(target),
> > +   _mesa_get_format_name(format),
> > +   num_samples, width0, height0, depth0,
> > +   first_level, last_level, mt);
> > +
> > +   if (!init_mapping_table(target, first_level, last_level, depth0,
> > +   mt->level))
> > +  goto fail;
> > +
> > +   struct isl_surf_init_info init_info = {
> > +  .dim = get_isl_surf_dim(target),
> > +  .format = translate_tex_format(brw, format, false),
> > +  .width = width0,
> > +  .height = height0,
> > +  .depth = target == GL_TEXTURE_3D ? depth0 : 1,
> > +  .levels = last_level + 1,
> > +  .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
> > +  .samples = MAX2(num_samples, 1),
> > +  .usage = isl_usage_flags,
> > +  .tiling_flags = 1u << isl_tiling
> > +   };
> > +
> > +   if (!isl_surf_init_s(>isl_dev, >surf, _info))
> > +  goto fail;
> >
> 
> This leaks the mapping table

Oops, so it does.

> 
> 
> > +
> > +   assert(mt->surf.size % mt->surf.row_pitch == 0);
> > +
> > +   unsigned pitch = mt->surf.row_pitch;
> > +   mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
> > +   mt->surf.row_pitch,
> > +   mt->surf.size / mt->surf.row_pitch,
> > +   1, isl_tiling_to_bufmgr_tiling(
> > isl_tiling),
> > +   , alloc_flags);
> > +   if (!mt->bo)
> > +  goto fail;
> >
> 
> As does this.

Thanks for catching these two.

> 
> 
> > +
> > +   assert(pitch == mt->surf.row_pitch);
> > +
> > +   mt->target = target;
> > +   mt->format = format;
> > +   mt->refcount = 1;
> > +
> > +   exec_list_make_empty(>hiz_map);
> > +   exec_list_make_empty(>color_resolve_map);
> > +
> > +   return mt;
> > +
> > +fail:
> > +   free(mt);
> > +   return NULL;
> > +}
> > +
> >  static struct intel_mipmap_tree *
> >  miptree_create(struct brw_context *brw,
> > GLenum target,
> > --
> > 2.9.3
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965: Use isl for hiz and stencil