[Mesa-dev] [PATCH] mesa/state_tracker: explicitely handle case ir_intrinsic_begin_fragment_shader_ordering in visit_generic_intrinsic()

2018-08-28 Thread kevin . rogovin
From: Kevin Rogovin 

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7b96947c60..48a7b030ce 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4068,6 +4068,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
case ir_intrinsic_generic_atomic_comp_swap:
case ir_intrinsic_begin_invocation_interlock:
case ir_intrinsic_end_invocation_interlock:
+   case ir_intrinsic_begin_fragment_shader_ordering:
   unreachable("Invalid intrinsic");
}
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Add INTEL_fragment_shader_ordering support.

2018-08-27 Thread kevin . rogovin
From: Kevin Rogovin 

Adds suppport for INTEL_fragment_shader_ordering. We achieve
the fragment ordering by using the same instruction as for
beginInvocationInterlockARB() which is by issuing a memory
fence via sendc.

Signed-off-by: Kevin Rogovin 
---
 docs/relnotes/18.3.0.html| 1 +
 src/intel/compiler/brw_fs_nir.cpp| 1 +
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index afcb044817..71fb41ca86 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -59,6 +59,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
 GL_EXT_window_rectangles on radeonsi.
 GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
+GL_INTEL_fragment_shader_ordering on i965.
 GL_NV_fragment_shader_interlock on i965.
 
 
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 9c9df5ac09..62bff2a323 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4836,6 +4836,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_begin_fragment_shader_ordering:
case nir_intrinsic_begin_invocation_interlock: {
   const fs_builder ubld = bld.group(8, 0);
   const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 0b137664b0..1ea8594c34 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -247,6 +247,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.OES_primitive_bounding_box = true;
   ctx->Extensions.OES_texture_buffer = true;
   ctx->Extensions.ARB_fragment_shader_interlock = true;
+  ctx->Extensions.INTEL_fragment_shader_ordering = true;
 
   if (can_do_pipelined_register_writes(brw->screen)) {
  ctx->Extensions.ARB_draw_indirect = true;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: Add GL/GLSL plumbing for INTEL_fragment_shader_ordering.

2018-08-27 Thread kevin . rogovin
From: Kevin Rogovin 

   This extension provides new GLSL built-in function
   beginFragmentShaderOrderingIntel() that guarantees
   (taking wording of GL_INTEL_fragment_shader_ordering
   extension) that any memory transactions issued by
   shader invocations from previous primitives mapped to
   same xy window coordinates (and same sample when
   per-sample shading is active), complete and are visible
   to the shader invocation that called
   beginFragmentShaderOrderingINTEL().

   One advantage of INTEL_fragment_shader_ordering over
   ARB_fragment_shader_interlock is that it provides a
   function that operates as a memory barrie (instead
   of a defining a critcial section) that can be called
   under arbitary control flow from any function (in
   contrast the begin/end of ARB_fragment_shader_interlock
   may only be called once, from main(), under no control
   flow.

Signed-off-by: Kevin Rogovin 
---
 src/compiler/glsl/builtin_functions.cpp  | 17 +
 src/compiler/glsl/glsl_parser_extras.cpp |  1 +
 src/compiler/glsl/glsl_parser_extras.h   |  2 ++
 src/compiler/glsl/glsl_to_nir.cpp|  6 ++
 src/compiler/glsl/ir.h   |  1 +
 src/compiler/nir/nir_intrinsics.py   |  1 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  1 +
 8 files changed, 30 insertions(+)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index b601880686..5650365d1d 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -525,6 +525,12 @@ supports_nv_fragment_shader_interlock(const 
_mesa_glsl_parse_state *state)
return state->NV_fragment_shader_interlock_enable;
 }
 
+static bool
+supports_intel_fragment_shader_ordering(const _mesa_glsl_parse_state *state)
+{
+   return state->INTEL_fragment_shader_ordering_enable;
+}
+
 static bool
 shader_clock(const _mesa_glsl_parse_state *state)
 {
@@ -1305,6 +1311,11 @@ builtin_builder::create_intrinsics()
supports_arb_fragment_shader_interlock,
ir_intrinsic_end_invocation_interlock), NULL);
 
+   add_function("__intrinsic_begin_fragment_shader_ordering",
+_invocation_interlock_intrinsic(
+   supports_intel_fragment_shader_ordering,
+   ir_intrinsic_begin_fragment_shader_ordering), NULL);
+
add_function("__intrinsic_shader_clock",
 _shader_clock_intrinsic(shader_clock,
 glsl_type::uvec2_type),
@@ -3419,6 +3430,12 @@ builtin_builder::create_builtins()
supports_nv_fragment_shader_interlock),
 NULL);
 
+   add_function("beginFragmentShaderOrderingINTEL",
+_invocation_interlock(
+   "__intrinsic_begin_fragment_shader_ordering",
+   supports_intel_fragment_shader_ordering),
+NULL);
+
add_function("anyInvocationARB",
 _vote("__intrinsic_vote_any", vote),
 NULL);
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 0a7d0d78b1..21d4122444 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -725,6 +725,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT_AEP(EXT_texture_buffer),
EXT_AEP(EXT_texture_cube_map_array),
EXT(INTEL_conservative_rasterization),
+   EXT(INTEL_fragment_shader_ordering),
EXT(INTEL_shader_atomic_float_minmax),
EXT(MESA_shader_integer_functions),
EXT(NV_fragment_shader_interlock),
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 2c8353214a..e03b34d7d6 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -812,6 +812,8 @@ struct _mesa_glsl_parse_state {
bool EXT_texture_cube_map_array_warn;
bool INTEL_conservative_rasterization_enable;
bool INTEL_conservative_rasterization_warn;
+   bool INTEL_fragment_shader_ordering_enable;
+   bool INTEL_fragment_shader_ordering_warn;
bool INTEL_shader_atomic_float_minmax_enable;
bool INTEL_shader_atomic_float_minmax_warn;
bool MESA_shader_integer_functions_enable;
diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index a53000f47e..efbb2317ac 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -742,6 +742,9 @@ nir_visitor::visit(ir_call *ir)
   case ir_intrinsic_end_invocation_interlock:
  op = nir_intrinsic_end_invocation_interlock;
  break;
+  case ir_intrinsic_begin_fragment_shader_ordering:
+ op = nir_intrinsic_begin_fragment_shader_ordering;
+ break;
   case ir_intrinsic_group_memory_barrier:
  op = nir_intrinsic_grou

[Mesa-dev] [PATCH 0/2] Implement INTEL_fragment_shader_ordering

2018-08-27 Thread kevin . rogovin
From: Kevin Rogovin 

INTEL_fragment_shader_ordering provides the ability for shaders
to issue a call to gaurnantee memory write operation ordering
of overlapping pixels or samples. In contrast to
ARB_fragment_shader_interlock, INTEL_fragment_shader_ordering
instead of defining a critical region (which must be in main() and
under no flow control) provides a single function that acts like
a memory barrier that can be called under any control flow.

Kevin Rogovin (2):
  mesa: Add GL/GLSL plumbing for INTEL_fragment_shader_ordering.
  i965: Add INTEL_fragment_shader_ordering support.

 docs/relnotes/18.3.0.html|  1 +
 src/compiler/glsl/builtin_functions.cpp  | 17 +
 src/compiler/glsl/glsl_parser_extras.cpp |  1 +
 src/compiler/glsl/glsl_parser_extras.h   |  2 ++
 src/compiler/glsl/glsl_to_nir.cpp|  6 ++
 src/compiler/glsl/ir.h   |  1 +
 src/compiler/nir/nir_intrinsics.py   |  1 +
 src/intel/compiler/brw_fs_nir.cpp|  1 +
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  1 +
 11 files changed, 33 insertions(+)

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs/relnotes: Mark NV_fragment_shader_interlock support in i965

2018-08-24 Thread kevin . rogovin
From: Kevin Rogovin 

---
 docs/relnotes/18.3.0.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index 594b0624a5..afcb044817 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -59,6 +59,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
 GL_EXT_window_rectangles on radeonsi.
 GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
+GL_NV_fragment_shader_interlock on i965.
 
 
 Bug fixes
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs/relnotes: Mark NV_fragment_shader_interlock support in i965

2018-08-24 Thread kevin . rogovin
From: Kevin Rogovin 

---
 docs/relnotes/18.3.0.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index 594b0624a5..afcb044817 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -59,6 +59,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.
 GL_EXT_window_rectangles on radeonsi.
 GL_KHR_texture_compression_astc_sliced_3d on radeonsi.
+GL_NV_fragment_shader_interlock on i965.
 
 
 Bug fixes
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Add NV_fragment_shader_interlock support.

2018-08-23 Thread Kevin Rogovin
Thankyou for pushing; I will post the one liner for the release notes
shortly.

Best Regards,
-Kevin Rogovin

On Fri, 24 Aug 2018 at 3.44, Jason Ekstrand  wrote:

> Your first version has already landed; Ken pushed it:
>
>
> https://cgit.freedesktop.org/mesa/mesa/commit/src?id=7ec308d978019dd9aa0e7a756b5919e34431196d
>
> Mind making a patch with just the release notes update?
>
> Thanks!
>
> --Jason
>
> On August 22, 2018 22:57:42 Kevin Rogovin  wrote:
>
>> Hi,
>>
>>  My request for an account was NAK'd by the i965 maintainer. As such, I
>> will post a v2 with the update to release notes requested and I hope
>> Plamena can push that for me.
>>
>> Best Regards,
>>  -Kevin Rogovin
>>
>> On Tue, Aug 21, 2018 at 12:39 PM Emil Velikov 
>> wrote:
>>
>>> Hi Kevin,
>>>
>>> On 20 August 2018 at 15:11, Kevin Rogovin 
>>> wrote:
>>> > Hi Plamena,
>>> >
>>> >  Thankyou for the fast review. Can you push it or have someone push
>>> it? (I
>>> > do not have push rights)
>>> >
>>> You have contributed a number of good patches in Mesa, always
>>> listening to feedback.
>>> I would welcome if you're given commit access to the repository.
>>>
>>> https://www.mesa3d.org/repository.html#developer
>>> https://bugs.freedesktop.org/show_bug.cgi?id=99601
>>>
>>> That said, can you please add a note about the new extension in the
>>> release notes.
>>>
>>> Thanks
>>> Emil
>>>
>> ___
>>
> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] Add NV_fragment_shader_interlock support.

2018-08-22 Thread Kevin Rogovin
From: Kevin Rogovin 

The main purpose for having NV_fragment_shader_interlock
extension is because that extension is also for GLES31 while
the ARB extension is for GL only.

v2: Add to review notes (requested by Emil Velikov)

Reviewed-by: Plamena Manolova 
---
 docs/relnotes/18.3.0.html|  1 +
 src/compiler/glsl/builtin_functions.cpp  | 18 ++
 src/compiler/glsl/glsl_parser.yy |  6 --
 src/compiler/glsl/glsl_parser_extras.cpp |  1 +
 src/compiler/glsl/glsl_parser_extras.h   |  2 ++
 src/mesa/main/extensions_table.h |  1 +
 6 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/docs/relnotes/18.3.0.html b/docs/relnotes/18.3.0.html
index ac2cc1e893..9989897515 100644
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -53,6 +53,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 GL_AMD_framebuffer_multisample_advanced on radeonsi.
 GL_EXT_window_rectangles on radeonsi.
+GL_NV_fragment_shader_interlock on i965.
 
 
 Bug fixes
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 7119903795..e7b78c0158 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -519,6 +519,12 @@ supports_arb_fragment_shader_interlock(const 
_mesa_glsl_parse_state *state)
return state->ARB_fragment_shader_interlock_enable;
 }
 
+static bool
+supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state *state)
+{
+   return state->NV_fragment_shader_interlock_enable;
+}
+
 static bool
 shader_clock(const _mesa_glsl_parse_state *state)
 {
@@ -3331,6 +3337,18 @@ builtin_builder::create_builtins()
supports_arb_fragment_shader_interlock),
 NULL);
 
+   add_function("beginInvocationInterlockNV",
+_invocation_interlock(
+   "__intrinsic_begin_invocation_interlock",
+   supports_nv_fragment_shader_interlock),
+NULL);
+
+   add_function("endInvocationInterlockNV",
+_invocation_interlock(
+   "__intrinsic_end_invocation_interlock",
+   supports_nv_fragment_shader_interlock),
+NULL);
+
add_function("anyInvocationARB",
 _vote("__intrinsic_vote_any", vote),
 NULL);
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index cb7376995d..bc2571b684 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1450,10 +1450,12 @@ layout_qualifier_id:
   "only valid in fragment shader input layout 
declaration.");
   } else if (pixel_interlock_ordered + pixel_interlock_unordered +
  sample_interlock_ordered + sample_interlock_unordered > 0 &&
- !state->ARB_fragment_shader_interlock_enable) {
+ !state->ARB_fragment_shader_interlock_enable &&
+ !state->NV_fragment_shader_interlock_enable) {
  _mesa_glsl_error(& @1, state,
   "interlock layout qualifier present, but the "
-  "GL_ARB_fragment_shader_interlock extension is not "
+  "GL_ARB_fragment_shader_interlock or "
+  "GL_NV_fragment_shader_interlock extension is not "
   "enabled.");
   } else {
  $$.flags.q.pixel_interlock_ordered = pixel_interlock_ordered;
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 6d92f24ea2..393942b62c 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -724,6 +724,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT_AEP(EXT_texture_cube_map_array),
EXT(INTEL_conservative_rasterization),
EXT(MESA_shader_integer_functions),
+   EXT(NV_fragment_shader_interlock),
EXT(NV_image_formats),
 };
 
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 59a173418b..3b17b54f0a 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -810,6 +810,8 @@ struct _mesa_glsl_parse_state {
bool INTEL_conservative_rasterization_warn;
bool MESA_shader_integer_functions_enable;
bool MESA_shader_integer_functions_warn;
+   bool NV_fragment_shader_interlock_enable;
+   bool NV_fragment_shader_interlock_warn;
bool NV_image_formats_enable;
bool NV_image_formats_warn;
/*@}*/
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index af5ce118da..746e821886 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -346,6 +346,7 @@ EXT(NV_draw_

Re: [Mesa-dev] [PATCH] Add NV_fragment_shader_interlock support.

2018-08-22 Thread Kevin Rogovin
Hi,

 My request for an account was NAK'd by the i965 maintainer. As such, I
will post a v2 with the update to release notes requested and I hope
Plamena can push that for me.

Best Regards,
 -Kevin Rogovin

On Tue, Aug 21, 2018 at 12:39 PM Emil Velikov 
wrote:

> Hi Kevin,
>
> On 20 August 2018 at 15:11, Kevin Rogovin  wrote:
> > Hi Plamena,
> >
> >  Thankyou for the fast review. Can you push it or have someone push it?
> (I
> > do not have push rights)
> >
> You have contributed a number of good patches in Mesa, always
> listening to feedback.
> I would welcome if you're given commit access to the repository.
>
> https://www.mesa3d.org/repository.html#developer
> https://bugs.freedesktop.org/show_bug.cgi?id=99601
>
> That said, can you please add a note about the new extension in the
> release notes.
>
> Thanks
> Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Add NV_fragment_shader_interlock support.

2018-08-21 Thread Kevin Rogovin
Hi Plamena,

 Thankyou for the fast review. Can you push it or have someone push it? (I
do not have push rights)

-Keviv

On Mon, Aug 20, 2018 at 2:50 PM, Manolova, Plamena <
plamena.manol...@intel.com> wrote:

> Hi Kevin,
> This looks good to me :)
>
> Reviewed-by: Plamena Manolova 
>
> On Wed, Aug 15, 2018 at 2:29 PM,  wrote:
>
>> From: Kevin Rogovin 
>>
>> The main purpose for having NV_fragment_shader_interlock
>> extension is because that extension is also for GLES31 while
>> the ARB extension is for GL only.
>> ---
>>  src/compiler/glsl/builtin_functions.cpp  | 18 ++
>>  src/compiler/glsl/glsl_parser.yy |  6 --
>>  src/compiler/glsl/glsl_parser_extras.cpp |  1 +
>>  src/compiler/glsl/glsl_parser_extras.h   |  2 ++
>>  src/mesa/main/extensions_table.h |  1 +
>>  5 files changed, 26 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/compiler/glsl/builtin_functions.cpp
>> b/src/compiler/glsl/builtin_functions.cpp
>> index 7119903795..e7b78c0158 100644
>> --- a/src/compiler/glsl/builtin_functions.cpp
>> +++ b/src/compiler/glsl/builtin_functions.cpp
>> @@ -519,6 +519,12 @@ supports_arb_fragment_shader_interlock(const
>> _mesa_glsl_parse_state *state)
>> return state->ARB_fragment_shader_interlock_enable;
>>  }
>>
>> +static bool
>> +supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state
>> *state)
>> +{
>> +   return state->NV_fragment_shader_interlock_enable;
>> +}
>> +
>>  static bool
>>  shader_clock(const _mesa_glsl_parse_state *state)
>>  {
>> @@ -3331,6 +3337,18 @@ builtin_builder::create_builtins()
>> supports_arb_fragment_shader_interlock),
>>  NULL);
>>
>> +   add_function("beginInvocationInterlockNV",
>> +_invocation_interlock(
>> +   "__intrinsic_begin_invocation_interlock",
>> +   supports_nv_fragment_shader_interlock),
>> +NULL);
>> +
>> +   add_function("endInvocationInterlockNV",
>> +_invocation_interlock(
>> +   "__intrinsic_end_invocation_interlock",
>> +   supports_nv_fragment_shader_interlock),
>> +NULL);
>> +
>> add_function("anyInvocationARB",
>>  _vote("__intrinsic_vote_any", vote),
>>  NULL);
>> diff --git a/src/compiler/glsl/glsl_parser.yy
>> b/src/compiler/glsl/glsl_parser.yy
>> index cb7376995d..bc2571b684 100644
>> --- a/src/compiler/glsl/glsl_parser.yy
>> +++ b/src/compiler/glsl/glsl_parser.yy
>> @@ -1450,10 +1450,12 @@ layout_qualifier_id:
>>"only valid in fragment shader input layout
>> declaration.");
>>} else if (pixel_interlock_ordered + pixel_interlock_unordered +
>>   sample_interlock_ordered + sample_interlock_unordered >
>> 0 &&
>> - !state->ARB_fragment_shader_interlock_enable) {
>> + !state->ARB_fragment_shader_interlock_enable &&
>> + !state->NV_fragment_shader_interlock_enable) {
>>   _mesa_glsl_error(& @1, state,
>>"interlock layout qualifier present, but the "
>> -  "GL_ARB_fragment_shader_interlock extension
>> is not "
>> +  "GL_ARB_fragment_shader_interlock or "
>> +  "GL_NV_fragment_shader_interlock extension is
>> not "
>>"enabled.");
>>} else {
>>   $$.flags.q.pixel_interlock_ordered = pixel_interlock_ordered;
>> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
>> b/src/compiler/glsl/glsl_parser_extras.cpp
>> index 6d92f24ea2..393942b62c 100644
>> --- a/src/compiler/glsl/glsl_parser_extras.cpp
>> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
>> @@ -724,6 +724,7 @@ static const _mesa_glsl_extension
>> _mesa_glsl_supported_extensions[] = {
>> EXT_AEP(EXT_texture_cube_map_array),
>> EXT(INTEL_conservative_rasterization),
>> EXT(MESA_shader_integer_functions),
>> +   EXT(NV_fragment_shader_interlock),
>> EXT(NV_image_formats),
>>  };
>>
>> diff --git a/src/compiler/glsl/glsl_parser_extras.h
>> b/src/compiler/glsl/glsl_parser_extras.h
>> index 59a173418b..3b17b54f0a 100644
>> --- a/src/compiler/glsl/glsl_p

Re: [Mesa-dev] [PATCH] Add NV_fragment_shader_interlock support.

2018-08-20 Thread Kevin Rogovin
Hi Plamena,

 Thankyou for the fast review. Can you push it or have someone push it? (I
do not have push rights)

-Keviv

On Mon, Aug 20, 2018, 2:50 PM Manolova, Plamena 
wrote:

> Hi Kevin,
> This looks good to me :)
>
> Reviewed-by: Plamena Manolova 
>
> On Wed, Aug 15, 2018 at 2:29 PM,  wrote:
>
>> From: Kevin Rogovin 
>>
>> The main purpose for having NV_fragment_shader_interlock
>> extension is because that extension is also for GLES31 while
>> the ARB extension is for GL only.
>> ---
>>  src/compiler/glsl/builtin_functions.cpp  | 18 ++
>>  src/compiler/glsl/glsl_parser.yy |  6 --
>>  src/compiler/glsl/glsl_parser_extras.cpp |  1 +
>>  src/compiler/glsl/glsl_parser_extras.h   |  2 ++
>>  src/mesa/main/extensions_table.h |  1 +
>>  5 files changed, 26 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/compiler/glsl/builtin_functions.cpp
>> b/src/compiler/glsl/builtin_functions.cpp
>> index 7119903795..e7b78c0158 100644
>> --- a/src/compiler/glsl/builtin_functions.cpp
>> +++ b/src/compiler/glsl/builtin_functions.cpp
>> @@ -519,6 +519,12 @@ supports_arb_fragment_shader_interlock(const
>> _mesa_glsl_parse_state *state)
>> return state->ARB_fragment_shader_interlock_enable;
>>  }
>>
>> +static bool
>> +supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state
>> *state)
>> +{
>> +   return state->NV_fragment_shader_interlock_enable;
>> +}
>> +
>>  static bool
>>  shader_clock(const _mesa_glsl_parse_state *state)
>>  {
>> @@ -3331,6 +3337,18 @@ builtin_builder::create_builtins()
>> supports_arb_fragment_shader_interlock),
>>  NULL);
>>
>> +   add_function("beginInvocationInterlockNV",
>> +_invocation_interlock(
>> +   "__intrinsic_begin_invocation_interlock",
>> +   supports_nv_fragment_shader_interlock),
>> +NULL);
>> +
>> +   add_function("endInvocationInterlockNV",
>> +_invocation_interlock(
>> +   "__intrinsic_end_invocation_interlock",
>> +   supports_nv_fragment_shader_interlock),
>> +NULL);
>> +
>> add_function("anyInvocationARB",
>>  _vote("__intrinsic_vote_any", vote),
>>  NULL);
>> diff --git a/src/compiler/glsl/glsl_parser.yy
>> b/src/compiler/glsl/glsl_parser.yy
>> index cb7376995d..bc2571b684 100644
>> --- a/src/compiler/glsl/glsl_parser.yy
>> +++ b/src/compiler/glsl/glsl_parser.yy
>> @@ -1450,10 +1450,12 @@ layout_qualifier_id:
>>"only valid in fragment shader input layout
>> declaration.");
>>} else if (pixel_interlock_ordered + pixel_interlock_unordered +
>>   sample_interlock_ordered + sample_interlock_unordered >
>> 0 &&
>> - !state->ARB_fragment_shader_interlock_enable) {
>> + !state->ARB_fragment_shader_interlock_enable &&
>> + !state->NV_fragment_shader_interlock_enable) {
>>   _mesa_glsl_error(& @1, state,
>>"interlock layout qualifier present, but the "
>> -  "GL_ARB_fragment_shader_interlock extension is
>> not "
>> +  "GL_ARB_fragment_shader_interlock or "
>> +  "GL_NV_fragment_shader_interlock extension is
>> not "
>>"enabled.");
>>} else {
>>   $$.flags.q.pixel_interlock_ordered = pixel_interlock_ordered;
>> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
>> b/src/compiler/glsl/glsl_parser_extras.cpp
>> index 6d92f24ea2..393942b62c 100644
>> --- a/src/compiler/glsl/glsl_parser_extras.cpp
>> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
>> @@ -724,6 +724,7 @@ static const _mesa_glsl_extension
>> _mesa_glsl_supported_extensions[] = {
>> EXT_AEP(EXT_texture_cube_map_array),
>> EXT(INTEL_conservative_rasterization),
>> EXT(MESA_shader_integer_functions),
>> +   EXT(NV_fragment_shader_interlock),
>> EXT(NV_image_formats),
>>  };
>>
>> diff --git a/src/compiler/glsl/glsl_parser_extras.h
>> b/src/compiler/glsl/glsl_parser_extras.h
>> index 59a173418b..3b17b54f0a 100644
>> --- a/src/compiler/glsl/glsl_parser_extras.h
>> +++ b/s

[Mesa-dev] [PATCH] Add NV_fragment_shader_interlock support.

2018-08-15 Thread kevin . rogovin
From: Kevin Rogovin 

The main purpose for having NV_fragment_shader_interlock
extension is because that extension is also for GLES31 while
the ARB extension is for GL only.
---
 src/compiler/glsl/builtin_functions.cpp  | 18 ++
 src/compiler/glsl/glsl_parser.yy |  6 --
 src/compiler/glsl/glsl_parser_extras.cpp |  1 +
 src/compiler/glsl/glsl_parser_extras.h   |  2 ++
 src/mesa/main/extensions_table.h |  1 +
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 7119903795..e7b78c0158 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -519,6 +519,12 @@ supports_arb_fragment_shader_interlock(const 
_mesa_glsl_parse_state *state)
return state->ARB_fragment_shader_interlock_enable;
 }
 
+static bool
+supports_nv_fragment_shader_interlock(const _mesa_glsl_parse_state *state)
+{
+   return state->NV_fragment_shader_interlock_enable;
+}
+
 static bool
 shader_clock(const _mesa_glsl_parse_state *state)
 {
@@ -3331,6 +3337,18 @@ builtin_builder::create_builtins()
supports_arb_fragment_shader_interlock),
 NULL);
 
+   add_function("beginInvocationInterlockNV",
+_invocation_interlock(
+   "__intrinsic_begin_invocation_interlock",
+   supports_nv_fragment_shader_interlock),
+NULL);
+
+   add_function("endInvocationInterlockNV",
+_invocation_interlock(
+   "__intrinsic_end_invocation_interlock",
+   supports_nv_fragment_shader_interlock),
+NULL);
+
add_function("anyInvocationARB",
 _vote("__intrinsic_vote_any", vote),
 NULL);
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index cb7376995d..bc2571b684 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1450,10 +1450,12 @@ layout_qualifier_id:
   "only valid in fragment shader input layout 
declaration.");
   } else if (pixel_interlock_ordered + pixel_interlock_unordered +
  sample_interlock_ordered + sample_interlock_unordered > 0 &&
- !state->ARB_fragment_shader_interlock_enable) {
+ !state->ARB_fragment_shader_interlock_enable &&
+ !state->NV_fragment_shader_interlock_enable) {
  _mesa_glsl_error(& @1, state,
   "interlock layout qualifier present, but the "
-  "GL_ARB_fragment_shader_interlock extension is not "
+  "GL_ARB_fragment_shader_interlock or "
+  "GL_NV_fragment_shader_interlock extension is not "
   "enabled.");
   } else {
  $$.flags.q.pixel_interlock_ordered = pixel_interlock_ordered;
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 6d92f24ea2..393942b62c 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -724,6 +724,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT_AEP(EXT_texture_cube_map_array),
EXT(INTEL_conservative_rasterization),
EXT(MESA_shader_integer_functions),
+   EXT(NV_fragment_shader_interlock),
EXT(NV_image_formats),
 };
 
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 59a173418b..3b17b54f0a 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -810,6 +810,8 @@ struct _mesa_glsl_parse_state {
bool INTEL_conservative_rasterization_warn;
bool MESA_shader_integer_functions_enable;
bool MESA_shader_integer_functions_warn;
+   bool NV_fragment_shader_interlock_enable;
+   bool NV_fragment_shader_interlock_warn;
bool NV_image_formats_enable;
bool NV_image_formats_warn;
/*@}*/
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index af5ce118da..746e821886 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -346,6 +346,7 @@ EXT(NV_draw_buffers , dummy_true
 EXT(NV_fbo_color_attachments, dummy_true   
  ,  x ,  x ,  x , ES2, 2010)
 EXT(NV_fill_rectangle   , NV_fill_rectangle
  , GLL, GLC,  x ,  x , 2015)
 EXT(NV_fog_distance , NV_fog_distance  
  , GLL,  x ,  x ,  x , 2001)
+EXT(NV_fragment_shader_interlock, ARB_fragment_shader_interlock
  , GLL, GLC,  x ,  31, 2015)
 EXT(NV_image_

[Mesa-dev] [PATCH v4 2/3] i965: prevent using auxilary buffers when an astc5x5 texture is present

2018-03-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

If ASTC5x5 textures are present, resolve all textures that the sampler
accesses so that auxilary buffer is unneeded when the astc5x5 workaround
is needed and also program the sampler state to not use the auxilary
buffer as well.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_compute.c  |  4 +-
 src/mesa/drivers/dri/i965/brw_context.h  |  5 +-
 src/mesa/drivers/dri/i965/brw_draw.c | 73 +---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 15 -
 4 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 5ce899bcbc..131ff77348 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -177,7 +177,9 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
brw_validate_textures(brw);
 
-   brw_predraw_resolve_inputs(brw, false, NULL);
+   enum brw_astc5x5_wa_mode_t astc5x5_wa_mode =
+  brw_predraw_resolve_inputs(brw, false, NULL);
+   gen9_set_astc5x5_wa_mode(brw, astc5x5_wa_mode);
 
/* Flush the batch if the batch/state buffers are nearly full.  We can
 * grow them if needed, but this is not free, so we'd like to avoid it.
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 656d13277c..0ccf808d95 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1334,8 +1334,9 @@ void intel_update_renderbuffers(__DRIcontext *context,
 __DRIdrawable *drawable);
 void intel_prepare_render(struct brw_context *brw);
 
-void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
-bool *draw_aux_buffer_disabled);
+enum brw_astc5x5_wa_mode_t
+brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
+   bool *draw_aux_buffer_disabled);
 
 void intel_resolve_for_dri2_flush(struct brw_context *brw,
   __DRIdrawable *drawable);
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 299e7f929e..8d69d311eb 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -392,8 +392,10 @@ mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
  *
  * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
  * enabled depth texture, and flush the render cache for any dirty textures.
+ * In addition, if the ASTC5x5 workaround is needed and if ASTC5x5 textures
+ * are present, resolve textures so that auxilary buffers are not needed.
  */
-void
+enum brw_astc5x5_wa_mode_t
 brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
bool *draw_aux_buffer_disabled)
 {
@@ -412,8 +414,33 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
   mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
}
 
-   /* Resolve depth buffer and render cache of each enabled texture. */
int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+   bool disable_aux = false;
+   bool texture_astc5x5_present = false;
+   bool texture_with_auxilary_present = false;
+
+   if (gen9_astc5x5_wa_required(brw)) {
+  /* walk through all the texture units to see if an ASTC5x5 and/or
+   * a texture with an auxilary buffer is to be accessed.
+   */
+  for (int i = 0; i <= maxEnabledUnit; i++) {
+ if (!ctx->Texture.Unit[i]._Current)
+continue;
+ tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+ if (!tex_obj)
+continue;
+ if (tex_obj->mt && tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE) {
+texture_with_auxilary_present = true;
+ }
+ if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) {
+texture_astc5x5_present = true;
+ }
+  }
+  disable_aux = texture_astc5x5_present;
+   }
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
for (int i = 0; i <= maxEnabledUnit; i++) {
   if (!ctx->Texture.Unit[i]._Current)
 continue;
@@ -445,9 +472,16 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
  "for sampling");
   }
 
-  intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
-min_level, num_levels,
-min_layer, num_layers);
+  if (!disable_aux) {
+ intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
+   min_level, num_levels,
+

[Mesa-dev] [PATCH v4 0/3] i965: ASTC5x5 workaround

2018-03-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series implements a needed workaround for Gen9 for ASTC5x5
sampler reads. The crux of the work around is to make sure that the
sampler does not read an ASTC5x5 texture and a surface with an auxilary
buffer without having a texture cache invalidate and command streamer
stall between such accesses.

With this patch series applied to the (current) master branch of mesa,
carchase works on my SKL GT4.

v4:
  Reduce additions to brw_context to a single enum.
  (requested by Jason Ekstrand)

  Rather than having a field in brw_context stating
  if the workaround is needed, have a function.
  (requested by Jason Ekstrand)

  Do not restore disable_aux argument to
  intel_miptree_prepare_texture().
  (requested by Jason Ekstrand)

  Place all resolve logic for astc5x5 workaround
  in brw_predraw_resolve_inputs().
  (requested by Jason Ekstrand)

  Changes to behavior of work around internals to that
  mode must be NONE if the workaround is not needed.

  Some patch squashing for changes in how resolve logic
  for astc5x5 workaround is implemented.

  Have blorp detect internally if astc5x5 or auxilary
  buffers are read from src.
  (requested by Jason Ekstrand)

v3:
  Rebase against current master; this required an additional
  patch to restore the disable_aux argument to
  intel_miptree_prepare_texture().

  Minor polishing of patches in mid-series for cleanness. 

v2:
  Rename workaround functions from brw_ to gen9_
  (suggested/requested by Topi Pohjolainen).

  Place texture resolve to avoid using auxilary surface
  when ASTC5x5 is detected in brw_predraw_resolve_inputs()
  instead of another dedicated function; doing so allows
  one to avoid walking the textures again.
  (suggested/requested by Topi Pohjolainen).

  Emit command streamer stall in addition to texture
  invalidate.
  (original short-coming caught by Jason Ekstrand)

  Place workaround function in (new) dedicated file.

  Minor patch re-ordering to accomodate changes.


Kevin Rogovin (3):
  i965: define astx5x5 workaround infrastructure
  i965: prevent using auxilary buffers when an astc5x5 texture is
present
  i965: ASTC5x5 workaround logic for blorp

 src/intel/blorp/blorp.c  | 16 ++
 src/intel/blorp/blorp.h  |  6 ++
 src/mesa/drivers/dri/i965/Makefile.sources   |  1 +
 src/mesa/drivers/dri/i965/brw_compute.c  |  4 +-
 src/mesa/drivers/dri/i965/brw_context.c  |  2 +
 src/mesa/drivers/dri/i965/brw_context.h  | 30 +-
 src/mesa/drivers/dri/i965/brw_draw.c | 73 +---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 15 -
 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c  | 24 
 src/mesa/drivers/dri/i965/genX_blorp_exec.c  |  9 +++
 src/mesa/drivers/dri/i965/meson.build|  1 +
 11 files changed, 167 insertions(+), 14 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 3/3] i965: ASTC5x5 workaround logic for blorp

2018-03-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/blorp/blorp.c | 16 
 src/intel/blorp/blorp.h |  6 ++
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  9 +
 3 files changed, 31 insertions(+)

diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
index e348cafb2e..1bd51a4f06 100644
--- a/src/intel/blorp/blorp.c
+++ b/src/intel/blorp/blorp.c
@@ -357,3 +357,19 @@ blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf 
*surf,
   batch->blorp->exec(batch, );
}
 }
+
+bool
+blorp_params_src_has_astc5x5(const struct blorp_params *params)
+{
+   return params->src.enabled &&
+  (params->src.surf.format == ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB ||
+   params->src.surf.format == ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16 ||
+   params->src.surf.format == ISL_FORMAT_ASTC_HDR_2D_5X5_FLT16);
+}
+
+bool
+blorp_params_src_has_aux(const struct blorp_params *params)
+{
+   return params->src.enabled &&
+  params->src.aux_usage != ISL_AUX_USAGE_NONE;
+}
diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 4626f2f83c..fe602c6ccc 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -223,6 +223,12 @@ blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf 
*surf,
  uint32_t level, uint32_t start_layer, uint32_t num_layers,
  enum isl_aux_op op);
 
+bool
+blorp_params_src_has_astc5x5(const struct blorp_params *params);
+
+bool
+blorp_params_src_has_aux(const struct blorp_params *params);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif /* __cplusplus */
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 062171af60..74fda5c810 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -230,6 +230,15 @@ genX(blorp_exec)(struct blorp_batch *batch,
struct gl_context *ctx = >ctx;
bool check_aperture_failed_once = false;
 
+   if (gen9_astc5x5_wa_required(brw)) {
+  if (blorp_params_src_has_astc5x5(params)) {
+ assert(!blorp_params_src_has_aux(params));
+ gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+  } else if (blorp_params_src_has_aux(params)) {
+ gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+  }
+   }
+
/* Flush the sampler and render caches.  We definitely need to flush the
 * sampler cache so that we get updated contents from the render cache for
 * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/3] i965: define astx5x5 workaround infrastructure

2018-03-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Gen9 GPU's suffer from a HW bug where the GPU will hang if
the GPU accesses a texture with a an auxilary buffer and
an ASTC5x5 texture without having a pipeline cs stall (and
texture cache flush) between such accesses. This patch
creates the infrastucture to track such potential texture
accesses and to issue the required pipeline stalls.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources  |  1 +
 src/mesa/drivers/dri/i965/brw_context.c |  2 ++
 src/mesa/drivers/dri/i965/brw_context.h | 25 +
 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c | 24 
 src/mesa/drivers/dri/i965/meson.build   |  1 +
 5 files changed, 53 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 2f349aa07a..20f7dee8f1 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -78,6 +78,7 @@ i965_FILES = \
gen7_urb.c \
gen8_depth_state.c \
gen8_multisample_state.c \
+gen9_astc5x5_wa.c \
hsw_queryobj.c \
hsw_sol.c \
intel_batchbuffer.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index ea1c78d1fe..40679d6728 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1058,6 +1058,8 @@ brwCreateContext(gl_api api,
if (ctx->Extensions.INTEL_performance_query)
   brw_init_performance_queries(brw);
 
+   brw->astc5x5_wa_mode = BRW_ASTC5x5_WA_MODE_NONE;
+
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 050b656e3d..656d13277c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -166,6 +166,12 @@ enum brw_cache_id {
BRW_MAX_CACHE
 };
 
+enum brw_astc5x5_wa_mode_t {
+   BRW_ASTC5x5_WA_MODE_NONE,
+   BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5,
+   BRW_ASTC5x5_WA_MODE_HAS_AUX,
+};
+
 enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1296,6 +1302,14 @@ struct brw_context
 */
enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
 
+   /* Certain GEN's have a hardware bug where the sampler hangs if it attempts
+* to access auxilary buffers and an ASTC5x5 compressed buffer. The
+* workaround is to make sure that the texture cache is cleared between
+* such accesses and that such accesses have a command streamer stall
+* between them.
+*/
+   enum brw_astc5x5_wa_mode_t astc5x5_wa_mode;
+
__DRIcontext *driContext;
struct intel_screen *screen;
 };
@@ -1729,6 +1743,17 @@ void brw_query_internal_format(struct gl_context *ctx, 
GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
 
+
+static inline
+bool gen9_astc5x5_wa_required(struct brw_context *brw)
+{
+   return brw->screen->devinfo.gen == 9;
+}
+
+/* gen9_astc5x5_wa.c */
+void gen9_set_astc5x5_wa_mode(struct brw_context *brw,
+ enum brw_astc5x5_wa_mode_t mode);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c 
b/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c
new file mode 100644
index 00..b060d43bd4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c
@@ -0,0 +1,24 @@
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_mipmap_tree.h"
+
+void
+gen9_set_astc5x5_wa_mode(struct brw_context *brw,
+ enum brw_astc5x5_wa_mode_t mode)
+{
+   assert(mode == BRW_ASTC5x5_WA_MODE_NONE ||
+  gen9_astc5x5_wa_required(brw));
+
+   if (mode == BRW_ASTC5x5_WA_MODE_NONE ||
+   brw->astc5x5_wa_mode == mode) {
+  return;
+   }
+
+   if (brw->astc5x5_wa_mode != BRW_ASTC5x5_WA_MODE_NONE) {
+  const uint32_t flags = PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+  brw_emit_pipe_control_flush(brw, flags);
+   }
+
+   brw->astc5x5_wa_mode = mode;
+}
diff --git a/src/mesa/drivers/dri/i965/meson.build 
b/src/mesa/drivers/dri/i965/meson.build
index e6866147d9..55a5f15c1e 100644
--- a/src/mesa/drivers/dri/i965/meson.build
+++ b/src/mesa/drivers/dri/i965/meson.build
@@ -97,6 +97,7 @@ files_i965 = files(
   'gen7_urb.c',
   'gen8_depth_state.c',
   'gen8_multisample_state.c',
+  'gen9_astc5x5_wa.c',
   'hsw_queryobj.c',
   'hsw_sol.c',
   'intel_batchbuffer.c',
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 6/7] i965: use ASTC5x5 workaround in brw_dispatch_compute_common()

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_compute.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 5ce899b..ec96687 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -179,6 +179,12 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
brw_predraw_resolve_inputs(brw, false, NULL);
 
+   /* if necessary, perform astc5x5 workarounds to make sure sampling
+* from astc5x5 and textures with an auxilary surface have a command
+* streamer stall and texture invalidate between them.
+*/
+   gen9_astc5x5_perform_wa(brw);
+
/* Flush the batch if the batch/state buffers are nearly full.  We can
 * grow them if needed, but this is not free, so we'd like to avoid it.
 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 3/7] i965: set ASTC5x5 workaround texture type tracking on texture validate

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_tex_validate.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c 
b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index eaa60ba..2bf6c65 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -179,6 +179,8 @@ brw_validate_textures(struct brw_context *brw)
struct gl_context *ctx = >ctx;
const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
 
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
for (int unit = 0; unit <= max_enabled_unit; unit++) {
   struct gl_texture_object *tex_obj = ctx->Texture.Unit[unit]._Current;
 
@@ -194,5 +196,18 @@ brw_validate_textures(struct brw_context *brw)
 
   intel_update_max_level(tex_obj, sampler);
   intel_finalize_mipmap_tree(brw, tex_obj);
+
+  /* ASTC5x5 workaround needs to know if textures in use have
+   * auxilary in buffers and/or a texture in use is ASTC5x5
+   */
+  struct intel_texture_object *tex = intel_texture_object(tex_obj);
+  struct intel_mipmap_tree *mt = tex->mt;
+  if (mt && mt->aux_usage != ISL_AUX_USAGE_NONE) {
+ brw->astc5x5_wa.texture_with_auxilary_present = true;
+  }
+  if (tex->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+  tex->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) {
+ brw->astc5x5_wa.texture_astc5x5_present = true;
+  }
}
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/7] i965: restore diable_aux argument to intel_miptree_prepare_texture()

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c  | 9 ++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 +++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 3 ++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 299e7f9..0241035 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -447,7 +447,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
 
   intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
 min_level, num_levels,
-min_layer, num_layers);
+min_layer, num_layers,
+false);
 
   /* If any programs are using it with texelFetch, we may need to also do
* a prepare with an sRGB format to ensure texelFetch works "properly".
@@ -458,7 +459,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
  if (txf_format != view_format) {
 intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
   min_level, num_levels,
-  min_layer, num_layers);
+  min_layer, num_layers,
+  false);
  }
   }
 
@@ -530,7 +532,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
  if (irb) {
 intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
   irb->mt_level, 1,
-  irb->mt_layer, irb->layer_count);
+  irb->mt_layer, irb->layer_count,
+  false);
  }
   }
}
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index c6213b2..dbd9f7a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2648,9 +2648,10 @@ intel_miptree_prepare_texture(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
   enum isl_format view_format,
   uint32_t start_level, uint32_t num_levels,
-  uint32_t start_layer, uint32_t num_layers)
+  uint32_t start_layer, uint32_t num_layers,
+  bool disable_aux)
 {
-   enum isl_aux_usage aux_usage =
+   enum isl_aux_usage aux_usage = (disable_aux) ? ISL_AUX_USAGE_NONE :
   intel_miptree_texture_aux_usage(brw, mt, view_format);
bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 07c8580..ee72309 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -642,7 +642,8 @@ intel_miptree_prepare_texture(struct brw_context *brw,
   struct intel_mipmap_tree *mt,
   enum isl_format view_format,
   uint32_t start_level, uint32_t num_levels,
-  uint32_t start_layer, uint32_t num_layers);
+  uint32_t start_layer, uint32_t num_layers,
+  bool disable_aux);
 void
 intel_miptree_prepare_image(struct brw_context *brw,
 struct intel_mipmap_tree *mt);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 0/7] i965: ASTC5x5 workaround

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series implements a needed workaround for Gen9 for ASTC5x5
sampler reads. The crux of the work around is to make sure that the
sampler does not read an ASTC5x5 texture and a surface with an auxilary
buffer without having a texture cache invalidate and command streamer
stall between such accesses.

With this patch series applied to the (current) master branch of mesa,
carchase works on my SKL GT4.

v3:
  Rebase against current master; this required an additional
  patch to restore the disable_aux argument to
  intel_miptree_prepare_texture().

  Minor polishing of patches in mid-series for cleanness. 

v2:
  Rename workaround functions from brw_ to gen9_
  (suggested/requested by Topi Pohjolainen).

  Place texture resolve to avoid using auxilary surface
  when ASTC5x5 is detected in brw_predraw_resolve_inputs()
  instead of another dedicated function; doing so allows
  one to avoid walking the textures again.
  (suggested/requested by Topi Pohjolainen).

  Emit command streamer stall in addition to texture
  invalidate.
  (original short-coming caught by Jason Ekstrand)

  Place workaround function in (new) dedicated file.

  Minor patch re-ordering to accomodate changes.


Kevin Rogovin (7):
  i965: define astx5x5 workaround infrastructure
  i965: restore diable_aux argument to intel_miptree_prepare_texture()
  i965: set ASTC5x5 workaround texture type tracking on texture validate
  i965: prevent using auxilary buffers when an astc5x5 texture is
present
  i965: use ASTC5x5 workaround in brw_prepare_drawing()
  i965: use ASTC5x5 workaround in brw_dispatch_compute_common()
  i965: ASTC5x5 workaround logic for blorp

 src/mesa/drivers/dri/i965/Makefile.sources   |  1 +
 src/mesa/drivers/dri/i965/brw_compute.c  |  6 
 src/mesa/drivers/dri/i965/brw_context.c  |  6 
 src/mesa/drivers/dri/i965/brw_context.h  | 24 
 src/mesa/drivers/dri/i965/brw_draw.c | 31 ++--
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  7 +++--
 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c  | 36 
 src/mesa/drivers/dri/i965/genX_blorp_exec.c  |  5 
 src/mesa/drivers/dri/i965/intel_batchbuffer.c|  1 +
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c|  5 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  3 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c  | 16 ---
 src/mesa/drivers/dri/i965/intel_tex_validate.c   | 15 ++
 src/mesa/drivers/dri/i965/meson.build|  1 +
 14 files changed, 145 insertions(+), 12 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/7] i965: define astx5x5 workaround infrastructure

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Gen9 GPU's suffer from a HW bug where the GPU will hang if
the GPU accesses a texture with a an auxilary buffer and
an ASTC5x5 texture without having a pipeline cs stall (and
texture cache flush) between such accesses. This patch
creates the infrastucture to track such potential texture
accesses and to issue the required pipeline stalls.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources|  1 +
 src/mesa/drivers/dri/i965/brw_context.c   |  6 +
 src/mesa/drivers/dri/i965/brw_context.h   | 24 ++
 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c   | 36 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  1 +
 src/mesa/drivers/dri/i965/meson.build |  1 +
 6 files changed, 69 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 2f349aa..20f7dee 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -78,6 +78,7 @@ i965_FILES = \
gen7_urb.c \
gen8_depth_state.c \
gen8_multisample_state.c \
+gen9_astc5x5_wa.c \
hsw_queryobj.c \
hsw_sol.c \
intel_batchbuffer.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index ea1c78d..a6e95b2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1058,6 +1058,12 @@ brwCreateContext(gl_api api,
if (ctx->Extensions.INTEL_performance_query)
   brw_init_performance_queries(brw);
 
+   brw->astc5x5_wa.required = (devinfo->gen == 9);
+   brw->astc5x5_wa.mode = BRW_ASTC5x5_WA_MODE_NONE;
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
+   brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 050b656..5fe0b29 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -166,6 +166,12 @@ enum brw_cache_id {
BRW_MAX_CACHE
 };
 
+enum brw_astc5x5_wa_mode_t {
+   BRW_ASTC5x5_WA_MODE_NONE,
+   BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5,
+   BRW_ASTC5x5_WA_MODE_HAS_AUX,
+};
+
 enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1296,6 +1302,19 @@ struct brw_context
 */
enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
 
+   /* Certain GEN's have a hardware bug where the sampler hangs if it attempts
+* to access auxilary buffers and an ASTC5x5 compressed buffer. The 
workaround
+* is to make sure that the texture cache is cleared between such accesses
+* and that such accesses have a command streamer stall between them.
+*/
+   struct {
+  bool required;
+  enum brw_astc5x5_wa_mode_t mode;
+  bool texture_astc5x5_present;
+  bool texture_with_auxilary_present;
+  bool blorp_sampling_from_astc5x5;
+   } astc5x5_wa;
+
__DRIcontext *driContext;
struct intel_screen *screen;
 };
@@ -1729,6 +1748,11 @@ void brw_query_internal_format(struct gl_context *ctx, 
GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
 
+/* gen9_astc5x5_wa.c */
+void gen9_set_astc5x5_wa_mode(struct brw_context *brw,
+ enum brw_astc5x5_wa_mode_t mode);
+void gen9_astc5x5_perform_wa(struct brw_context *brw);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c 
b/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c
new file mode 100644
index 000..d3efd05
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c
@@ -0,0 +1,36 @@
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_mipmap_tree.h"
+
+void
+gen9_set_astc5x5_wa_mode(struct brw_context *brw,
+ enum brw_astc5x5_wa_mode_t mode)
+{
+   if (!brw->astc5x5_wa.required ||
+   mode == BRW_ASTC5x5_WA_MODE_NONE ||
+   brw->astc5x5_wa.mode == mode) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.mode != BRW_ASTC5x5_WA_MODE_NONE) {
+  const uint32_t flags = PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+  brw_emit_pipe_control_flush(brw, flags);
+   }
+
+   brw->astc5x5_wa.mode = mode;
+}
+
+void
+gen9_astc5x5_perform_wa(struct brw_context *brw)
+{
+   if (!brw->astc5x5_wa.required) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.texture_astc5x5_present) {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else if (brw->astc5x5_wa.texture_with_

[Mesa-dev] [PATCH v3 5/7] i965: use ASTC5x5 workaround in brw_prepare_drawing()

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 34f4de3..f891e91 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -759,6 +759,12 @@ brw_prepare_drawing(struct gl_context *ctx,
brw_predraw_resolve_inputs(brw, true, draw_aux_buffer_disabled);
brw_predraw_resolve_framebuffer(brw, draw_aux_buffer_disabled);
 
+   /* if necessary, perform astc5x5 workarounds to make sure sampling
+* from astc5x5 and textures with an auxilary surface have a command
+* streamer stall and texture invalidate between them.
+*/
+   gen9_astc5x5_perform_wa(brw);
+
/* Bind all inputs, derive varying and size information:
 */
brw_merge_inputs(brw, arrays);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 4/7] i965: prevent using auxilary buffers when an astc5x5 texture is present

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c | 20 ++--
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  7 +--
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 0241035..34f4de3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -392,6 +392,8 @@ mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
  *
  * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
  * enabled depth texture, and flush the render cache for any dirty textures.
+ * In addition, if the ASTC5x5 workaround is needed and if ASTC5x5 textures
+ * are present, resolve textures so that auxilary buffers are not needed.
  */
 void
 brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
@@ -445,10 +447,14 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
  "for sampling");
   }
 
+  const bool astc_disables_aux = (brw->astc5x5_wa.required &&
+ brw->astc5x5_wa.texture_astc5x5_present &&
+ tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE);
+
   intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
 min_level, num_levels,
 min_layer, num_layers,
-false);
+astc_disables_aux);
 
   /* If any programs are using it with texelFetch, we may need to also do
* a prepare with an sRGB format to ensure texelFetch works "properly".
@@ -460,7 +466,7 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
 intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
   min_level, num_levels,
   min_layer, num_layers,
-  false);
+  astc_disables_aux);
  }
   }
 
@@ -496,6 +502,16 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering,
  }
   }
}
+
+   /* if an ASTC5x5 is present when we need the workaround, then
+* the above has made sure that the textures have been resolved
+* so that they do not need an auxilary buffer, so we clear
+* texture_with_auxilary_present
+*/
+   if (brw->astc5x5_wa.required &&
+   brw->astc5x5_wa.texture_astc5x5_present) {
+  brw->astc5x5_wa.texture_with_auxilary_present = false;
+   }
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 23bf5a2..505e2bf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -441,7 +441,8 @@ swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 }
 
-static void brw_update_texture_surface(struct gl_context *ctx,
+static void
+brw_update_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset,
bool for_gather,
@@ -581,7 +582,9 @@ static void brw_update_texture_surface(struct gl_context 
*ctx,
   obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
  view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 
-  enum isl_aux_usage aux_usage =
+  bool disable_aux = brw->astc5x5_wa.required &&
+ brw->astc5x5_wa.texture_astc5x5_present;
+  enum isl_aux_usage aux_usage = (disable_aux) ? ISL_AUX_USAGE_NONE :
  intel_miptree_texture_aux_usage(brw, mt, format);
 
   brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 7/7] i965: ASTC5x5 workaround logic for blorp

2018-02-27 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  5 +
 src/mesa/drivers/dri/i965/intel_tex_image.c | 16 
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 062171a..b7c35e9 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -230,6 +230,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
struct gl_context *ctx = >ctx;
bool check_aperture_failed_once = false;
 
+   if (brw->astc5x5_wa.blorp_sampling_from_astc5x5) {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
/* Flush the sampler and render caches.  We definitely need to flush the
 * sampler cache so that we get updated contents from the render cache for
 * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index e25bc9a..38967fb 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -824,10 +824,18 @@ intel_get_tex_sub_image(struct gl_context *ctx,
DBG("%s\n", __func__);
 
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-  if (intel_gettexsubimage_blorp(brw, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth, format, type,
- pixels, >Pack))
+  bool blorp_success;
+
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 =
+ (texImage->TexFormat == MESA_FORMAT_RGBA_ASTC_5x5 ||
+  texImage->TexFormat == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5);
+  blorp_success = intel_gettexsubimage_blorp(brw, texImage,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels,
+ >Pack);
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+  if (blorp_success)
  return;
 
   perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 3/3] i965: if DEBUG_CHECK_OOB is up, check that noise padding for each bo used in batchbuffer is correct

2018-01-30 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 86d88a701f..749a384def 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -1023,11 +1023,31 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
 
ret = submit_batch(brw, in_fence_fd, out_fence_fd);
 
-   if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
+   if (unlikely(INTEL_DEBUG & (DEBUG_SYNC | DEBUG_CHECK_OOB))) {
   fprintf(stderr, "waiting for idle\n");
   brw_bo_wait_rendering(brw->batch.batch.bo);
}
 
+   if (unlikely(INTEL_DEBUG & DEBUG_CHECK_OOB)) {
+  bool detected_out_of_bounds_write = false;
+
+  for (int i = 0; i < brw->batch.exec_count; i++) {
+ struct brw_bo *bo = brw->batch.exec_bos[i];
+
+ if (!brw_bo_padding_is_good(bo)) {
+detected_out_of_bounds_write = true;
+fprintf(stderr,
+"Detected buffer out-of-bounds write from brw_bo %p "
+"(GEM %u, label = \"%s\")\n",
+bo, bo->gem_handle, bo->name);
+ }
+  }
+
+  if (unlikely(detected_out_of_bounds_write)) {
+ abort();
+  }
+   }
+
/* Start a new batch buffer. */
brw_new_batch(brw);
 
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/3] intel/common:add debug flag for adding and checking padding on BO's

2018-01-30 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f581..27ddbd0d6b 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "check_oob",   DEBUG_CHECK_OOB },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a569d..6c9915fdfb 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_CHECK_OOB   (1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2018-01-30 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 101 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |  13 +
 2 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index fb180289a0..5c609b2332 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -220,6 +220,27 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
   >cache_bucket[index] : NULL;
 }
 
+/* Our goal is not to have noise good enough for cryto,
+ * but instead values that are unique-ish enough that
+ * it is incredibly unlikely that a buffer overwrite
+ * will produce the exact same values.
+ */
+static uint8_t
+next_noise_value(uint8_t prev_noise)
+{
+   uint32_t v = prev_noise;
+   return (v * 103u + 227u) & 0xFF;
+}
+
+static void
+fill_noise_buffer(uint8_t *dst, uint8_t start, uint32_t length)
+{
+   for(uint32_t i = 0; i < length; ++i) {
+  dst[i] = start;
+  start = next_noise_value(start);
+   }
+}
+
 int
 brw_bo_busy(struct brw_bo *bo)
 {
@@ -367,7 +388,18 @@ retry:
   bo->size = bo_size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  bo->padding_size = 0;
+  if (unlikely(INTEL_DEBUG & DEBUG_CHECK_OOB)) {
+ /* TODO: we want to make sure that the padding forces
+  * the BO to take another page on the (PP)GTT; 4KB
+  * may or may not be the page size for the BO. Indeed,
+  * depending on GPU, kernel version and GEM size, the
+  * page size can be one of 4KB, 64KB or 2M.
+  */
+ bo->padding_size = 4096;
+  }
+
+  struct drm_i915_gem_create create = { .size = bo_size + bo->padding_size 
};
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -378,6 +410,25 @@ retry:
  goto err;
   }
 
+  if (unlikely(bo->padding_size > 0)) {
+ uint8_t *noise_values;
+ struct drm_i915_gem_mmap mmap_arg = {
+.handle = create.handle,
+.offset = bo->size,
+.size = bo->padding_size,
+.flags = 0,
+ };
+
+ ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
+ if (ret != 0)
+goto err_free;
+
+ noise_values = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr;
+ fill_noise_buffer(noise_values, create.handle & 0xFF,
+   bo->padding_size);
+ drm_munmap(noise_values, bo->padding_size);
+  }
+
   bo->gem_handle = create.handle;
 
   bo->bufmgr = bufmgr;
@@ -424,6 +475,53 @@ err:
return NULL;
 }
 
+bool
+brw_bo_padding_is_good(struct brw_bo *bo)
+{
+   if (bo->padding_size > 0) {
+  struct drm_i915_gem_mmap mmap_arg = {
+ .handle = bo->gem_handle,
+ .offset = bo->size,
+ .size = bo->padding_size,
+ .flags = 0,
+  };
+  uint8_t *mapped;
+  int ret;
+  uint8_t expected_value;
+
+  /* We cannot use brw_bo_map() because it maps precisely the range
+   * [0, bo->size) and we wish to map the range of the padding which
+   * is [bo->size, bo->size + bo->pading_size)
+   */
+  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
+  if (ret != 0) {
+ DBG("Unable to map mapping buffer %d (%s) buffer for pad checking.\n",
+ bo->gem_handle, bo->name);
+ return false;
+  }
+
+  mapped = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr;
+  /* bah-humbug, we need to see the latest contents and
+   * if the bo is not cache coherent we likely need to
+   * invalidate the cache lines to get it.
+   */
+  if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
+ gen_invalidate_range(mapped, bo->padding_size);
+  }
+
+  expected_value = bo->gem_handle & 0xFF;
+  for (uint32_t i = 0; i < bo->padding_size; ++i) {
+ if (expected_value != mapped[i]) {
+drm_munmap(mapped, bo->padding_size);
+return false;
+ }
+ expected_value = next_noise_value(expected_value);
+  }
+  drm_munmap(mapped, bo->padding_size);
+   }
+   return true;
+}
+
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
  const char *name, uint64_t size, uint64_t alignment)
@@ -1157,6 +1255,7 @@ brw_bo_gem_create_from_prime_internal(struct brw_bufmgr 
*bufmgr, int prime_fd,
bo->name = "prime";
bo->reusable = false;
bo->external = true;
+   bo->padding_size = 0;
 
if (tiling_mode < 0) {
   struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
d

[Mesa-dev] [PATCH v4 0/3] GEM BO padding to find OOB buffer writes

2018-01-30 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series adds a new debug option to pad each GEM BO allocated
by the brw_bufmgr with (weak) pseudo-random noise values which are then
checked after each batchbuffer dispatch to the kernel. This can be quite
valuable to find diffucult to track down heisenberg style bugs.

A possible follow-up series would be to write to stderr (or another
logging mechanism) if the OOB write is to a GEM BO that backs a GL
buffer object; that features would be quite useful for application
developers.

v4:
 Change debug macro name value to DEBUG_CHECK_OOB.
 (suggested/requested by Jason Ekstrand)

 Use map as well when filling the noise values to make code style more
 consistent.
 (suggested/requested by Jason Ekstrand)

v3:
 Change from using pread to mapping buffer padding on checking noise
 padding.
 (spawned from Chris Wilson feedback)

 Use gen_invalidate_range() to make sure values read are correct.
 (suggested/requested by Chris Wilson)

 Add comment to declaration of brw_bo_padding_is_good() indicating
 that one needs to wait for the GPU to finish the rendering of the
 contents of the batchbuffer that uses the GEM BO before calling
 brw_bo_padding_is_good().
 (spawned from Chris Wilson feedback)

 Call brw_bo_wait_rendering() when DEBUG_OUT_OF_BOUND_CHK bit
 is up before using brw_bo_padding_is_good().
 (spawned from Chris Wilson feedback)

v2:
 Change from using rand() to using internal generating function
 (requested/suggested by Jason Ekstrand)

 Avoid having extra pointers in brw_bo struct via using the internal
 function and allocating buffer for pread at brw_bo_padding_is_good()
 (requested/suggested by Jason Ekstrand)

 Comments indicating that pread ioctl will do the required waiting
 for GPU commands to finish

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_CHECK_OOB is up, check that noise padding for each bo
used in batchbuffer is correct

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 101 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  13 
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  22 +-
 5 files changed, 136 insertions(+), 2 deletions(-)

-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2018-01-26 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 02bfd3f333..fc6998a7ca 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -1019,11 +1019,31 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
 
ret = submit_batch(brw, in_fence_fd, out_fence_fd);
 
-   if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
+   if (unlikely(INTEL_DEBUG & (DEBUG_SYNC | DEBUG_OUT_OF_BOUND_CHK))) {
   fprintf(stderr, "waiting for idle\n");
   brw_bo_wait_rendering(brw->batch.batch.bo);
}
 
+   if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+  bool detected_out_of_bounds_write = false;
+
+  for (int i = 0; i < brw->batch.exec_count; i++) {
+ struct brw_bo *bo = brw->batch.exec_bos[i];
+
+ if (!brw_bo_padding_is_good(bo)) {
+detected_out_of_bounds_write = true;
+fprintf(stderr,
+"Detected buffer out-of-bounds write from brw_bo %p "
+"(GEM %u, label = \"%s\")\n",
+bo, bo->gem_handle, bo->name);
+ }
+  }
+
+  if (unlikely(detected_out_of_bounds_write)) {
+ abort();
+  }
+   }
+
/* Start a new batch buffer. */
brw_new_batch(brw);
 
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/3] intel/common:add debug flag for adding and checking padding on BO's

2018-01-26 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f581..2154b23756 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "check_oob",   DEBUG_OUT_OF_BOUND_CHK },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a569d..92fc68b494 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_OUT_OF_BOUND_CHK(1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 0/3] GEM BO padding to find OOB buffer writes

2018-01-26 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series adds a new debug option to pad each GEM BO
allocated by the brw_bufmgr with (weak) pseudo-random noise values
which are then checked after each batchbuffer dispatch to the kernel.
This can be quite valuable to find diffucult to track down heisenberg
style bugs.

A possible follow-up series would be to write to stderr (or
another logging mechanism) if the OOB write is to a GEM BO that
backs a GL buffer object; that features would be quite useful for
application developers.

I am resending this series because the Mesa-dev archives had
lost the posting of the series; Patch 2/3 was reviewed by Chris
Wilson with the caveat of stylistic nitpicks that he thought
better handled by Kenneth and/or Ian.

v3:
 Change from using pread to mapping buffer padding on checking
 noise padding.
 (spawned from Chris Wilson feedback)

 Use gen_invalidate_range() to make sure values read are correct.
 (suggested/requested by Chris Wilson)

 Add comment to declaration of brw_bo_padding_is_good() indicating
 that one needs to wait for the GPU to finish the rendering of the
 contents of the batchbuffer that uses the GEM BO before calling
 brw_bo_padding_is_good().
 (spawned from Chris Wilson feedback)

 Call brw_bo_wait_rendering() when DEBUG_OUT_OF_BOUND_CHK bit
 is up before using brw_bo_padding_is_good().
 (spawned from Chris Wilson feedback)

v2:
 Change from using rand() to using internal generating function
 (requested/suggested by Jason Ekstrand)

 Avoid having extra pointers in brw_bo struct via using the internal
 function and allocating buffer for pread at brw_bo_padding_is_good()
 (requested/suggested by Jason Ekstrand)

 Comments indicating that pread ioctl will do the required waiting
 for GPU commands to finish

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 115 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  13 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  22 -
 5 files changed, 150 insertions(+), 2 deletions(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2018-01-26 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 115 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |  13 
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index fb180289a0..7e50ba512e 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -220,6 +220,39 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
   >cache_bucket[index] : NULL;
 }
 
+/* Our goal is not to have noise good enough for cryto,
+ * but instead values that are unique-ish enough that
+ * it is incredibly unlikely that a buffer overwrite
+ * will produce the exact same values.
+ */
+static uint8_t
+next_noise_value(uint8_t prev_noise)
+{
+   uint32_t v = prev_noise;
+   return (v * 103u + 227u) & 0xFF;
+}
+
+static void
+fill_noise_buffer(uint8_t *dst, uint8_t start, uint32_t length)
+{
+   for(uint32_t i = 0; i < length; ++i) {
+  dst[i] = start;
+  start = next_noise_value(start);
+   }
+}
+
+static uint8_t*
+generate_noise_buffer(uint8_t start, uint32_t length)
+{
+   uint8_t *return_value;
+   return_value = malloc(length);
+   if (return_value) {
+  fill_noise_buffer(return_value, start, length);
+   }
+
+   return return_value;
+}
+
 int
 brw_bo_busy(struct brw_bo *bo)
 {
@@ -367,7 +400,18 @@ retry:
   bo->size = bo_size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  bo->padding_size = 0;
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ /* TODO: we want to make sure that the padding forces
+  * the BO to take another page on the (PP)GTT; 4KB
+  * may or may not be the page size for the GEM. Indeed,
+  * depending on generation, kernel version and GEM size,
+  * the page size can be one of 4KB, 64KB or 2M.
+  */
+ bo->padding_size = 4096;
+  }
+
+  struct drm_i915_gem_create create = { .size = bo_size + bo->padding_size 
};
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -378,6 +422,27 @@ retry:
  goto err;
   }
 
+  if (unlikely(bo->padding_size > 0)) {
+ uint8_t *noise_values;
+ struct drm_i915_gem_pwrite pwrite;
+
+ noise_values = generate_noise_buffer(create.handle, bo->padding_size);
+ if (!noise_values)
+goto err_free;
+
+ pwrite.handle = create.handle;
+ pwrite.pad = 0;
+ pwrite.offset = bo_size;
+ pwrite.size = bo->padding_size;
+ pwrite.data_ptr = (__u64) (uintptr_t) noise_values;
+
+ ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, );
+ free(noise_values);
+
+ if (ret != 0)
+goto err_free;
+  }
+
   bo->gem_handle = create.handle;
 
   bo->bufmgr = bufmgr;
@@ -424,6 +489,53 @@ err:
return NULL;
 }
 
+bool
+brw_bo_padding_is_good(struct brw_bo *bo)
+{
+   if (bo->padding_size > 0) {
+  struct drm_i915_gem_mmap mmap_arg = {
+ .handle = bo->gem_handle,
+ .offset = bo->size,
+ .size = bo->padding_size,
+ .flags = 0,
+  };
+  uint8_t *mapped;
+  int ret;
+  uint8_t expected_value;
+
+  /* We cannot use brw_bo_map() because it maps precisely the range
+   * [0, bo->size) and we wish to map the range of the padding which
+   * is [bo->size, bo->size + bo->pading_size)
+   */
+  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
+  if (ret != 0) {
+ DBG("Unable to map mapping buffer %d (%s) buffer for pad checking.\n",
+ bo->gem_handle, bo->name);
+ return false;
+  }
+
+  mapped = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr;
+  /* bah-humbug, we need to see the latest contents and
+   * if the bo is not cache coherent we likely need to
+   * invalidate the cache lines to get it.
+   */
+  if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
+ gen_invalidate_range(mapped, bo->padding_size);
+  }
+
+  expected_value = bo->gem_handle & 0xFF;
+  for (uint32_t i = 0; i < bo->padding_size; ++i) {
+ if (expected_value != mapped[i]) {
+drm_munmap(mapped, bo->padding_size);
+return false;
+ }
+ expected_value = next_noise_value(expected_value);
+  }
+  drm_munmap(mapped, bo->padding_size);
+   }
+   return true;
+}
+
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
  const char *name, uint64_t size, uint64_t alignment)
@@ -1157,6 +1269,7 @@ brw_b

[Mesa-dev] [PATCH v3 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2018-01-09 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index a17e169925..7ae945c71c 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -939,11 +939,31 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
 
ret = submit_batch(brw, in_fence_fd, out_fence_fd);
 
-   if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
+   if (unlikely(INTEL_DEBUG & (DEBUG_SYNC | DEBUG_OUT_OF_BOUND_CHK))) {
   fprintf(stderr, "waiting for idle\n");
   brw_bo_wait_rendering(brw->batch.batch.bo);
}
 
+   if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+  bool detected_out_of_bounds_write = false;
+
+  for (int i = 0; i < brw->batch.exec_count; i++) {
+ struct brw_bo *bo = brw->batch.exec_bos[i];
+
+ if (!brw_bo_padding_is_good(bo)) {
+detected_out_of_bounds_write = true;
+fprintf(stderr,
+"Detected buffer out-of-bounds write from brw_bo %p "
+"(GEM %u, label = \"%s\")\n",
+bo, bo->gem_handle, bo->name);
+ }
+  }
+
+  if (unlikely(detected_out_of_bounds_write)) {
+ abort();
+  }
+   }
+
/* Start a new batch buffer. */
brw_new_batch(brw);
 
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2018-01-09 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 115 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |  13 
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 52b5bf97a1..07a9fea5dd 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -220,6 +220,39 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
   >cache_bucket[index] : NULL;
 }
 
+/* Our goal is not to have noise good enough for cryto,
+ * but instead values that are unique-ish enough that
+ * it is incredibly unlikely that a buffer overwrite
+ * will produce the exact same values.
+ */
+static uint8_t
+next_noise_value(uint8_t prev_noise)
+{
+   uint32_t v = prev_noise;
+   return (v * 103u + 227u) & 0xFF;
+}
+
+static void
+fill_noise_buffer(uint8_t *dst, uint8_t start, uint32_t length)
+{
+   for(uint32_t i = 0; i < length; ++i) {
+  dst[i] = start;
+  start = next_noise_value(start);
+   }
+}
+
+static uint8_t*
+generate_noise_buffer(uint8_t start, uint32_t length)
+{
+   uint8_t *return_value;
+   return_value = malloc(length);
+   if (return_value) {
+  fill_noise_buffer(return_value, start, length);
+   }
+
+   return return_value;
+}
+
 int
 brw_bo_busy(struct brw_bo *bo)
 {
@@ -367,7 +400,18 @@ retry:
   bo->size = bo_size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  bo->padding_size = 0;
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ /* TODO: we want to make sure that the padding forces
+  * the BO to take another page on the (PP)GTT; 4KB
+  * may or may not be the page size for the GEM. Indeed,
+  * depending on generation, kernel version and GEM size,
+  * the page size can be one of 4KB, 64KB or 2M.
+  */
+ bo->padding_size = 4096;
+  }
+
+  struct drm_i915_gem_create create = { .size = bo_size + bo->padding_size 
};
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -378,6 +422,27 @@ retry:
  goto err;
   }
 
+  if (unlikely(bo->padding_size > 0)) {
+ uint8_t *noise_values;
+ struct drm_i915_gem_pwrite pwrite;
+
+ noise_values = generate_noise_buffer(create.handle, bo->padding_size);
+ if (!noise_values)
+goto err_free;
+
+ pwrite.handle = create.handle;
+ pwrite.pad = 0;
+ pwrite.offset = bo_size;
+ pwrite.size = bo->padding_size;
+ pwrite.data_ptr = (__u64) (uintptr_t) noise_values;
+
+ ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, );
+ free(noise_values);
+
+ if (ret != 0)
+goto err_free;
+  }
+
   bo->gem_handle = create.handle;
 
   bo->bufmgr = bufmgr;
@@ -424,6 +489,53 @@ err:
return NULL;
 }
 
+bool
+brw_bo_padding_is_good(struct brw_bo *bo)
+{
+   if (bo->padding_size > 0) {
+  struct drm_i915_gem_mmap mmap_arg = {
+ .handle = bo->gem_handle,
+ .offset = bo->size,
+ .size = bo->padding_size,
+ .flags = 0,
+  };
+  uint8_t *mapped;
+  int ret;
+  uint8_t expected_value;
+
+  /* We cannot use brw_bo_map() because it maps precisely the range
+   * [0, bo->size) and we wish to map the range of the padding which
+   * is [bo->size, bo->size + bo->pading_size)
+   */
+  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
+  if (ret != 0) {
+ DBG("Unable to map mapping buffer %d (%s) buffer for pad checking.\n",
+ bo->gem_handle, bo->name);
+ return false;
+  }
+
+  mapped = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr;
+  /* bah-humbug, we need to see the latest contents and
+   * if the bo is not cache coherent we likely need to
+   * invalidate the cache lines to get it.
+   */
+  if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
+ gen_invalidate_range(mapped, bo->padding_size);
+  }
+
+  expected_value = bo->gem_handle & 0xFF;
+  for (uint32_t i = 0; i < bo->padding_size; ++i) {
+ if (expected_value != mapped[i]) {
+drm_munmap(mapped, bo->padding_size);
+return false;
+ }
+ expected_value = next_noise_value(expected_value);
+  }
+  drm_munmap(mapped, bo->padding_size);
+   }
+   return true;
+}
+
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
  const char *name, uint64_t size, uint64_t alignment)
@@ -1156,6 +1268,7 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, 
int prime_fd

[Mesa-dev] [PATCH v3 1/3] intel/common:add debug flag for adding and checking padding on BO's

2018-01-09 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f581..2154b23756 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "check_oob",   DEBUG_OUT_OF_BOUND_CHK },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a569d..92fc68b494 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_OUT_OF_BOUND_CHK(1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 0/3] GEM BO padding to find OOB buffer writes

2018-01-09 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series adds a new debug option to pad each GEM BO
allocated by the brw_bufmgr with pseudo-(weak) random noise values
which are then checked after each batchbuffer dispatch to the kernel.
This can be quite valuable to find diffucult to track down heisenberg
style bugs.

A possible follow-up series would be to write to stderr (or
another logging mechanism) if the OOB write is to a GEM BO that
backs a GL buffer object; that features would be quite useful for
application developers.

v3:
 Change from using pread to mapping buffer padding on checking
 noise padding.
 (spawned from Chris Wilson feedback)

 Use gen_invalidate_range() to make sure values read are correct.
 (suggested/requested by Chris Wilson)

 Add comment to declaration of brw_bo_padding_is_good() indicating
 that one needs to wait for the GPU to finish the rendering of the
 contents of the batchbuffer that uses the GEM BO before calling
 brw_bo_padding_is_good().
 (spawned from Chris Wilson feedback)

 Call brw_bo_wait_rendering() when DEBUG_OUT_OF_BOUND_CHK bit
 is up before using brw_bo_padding_is_good().
 (spawned from Chris Wilson feedback)

v2:
 Change from using rand() to using internal generating function
 (requested/suggested by Jason Ekstrand)

 Avoid having extra pointers in brw_bo struct via using the internal
 function and allocating buffer for pread at brw_bo_padding_is_good()
 (requested/suggested by Jason Ekstrand)

 Comments indicating that pread ioctl will do the required waiting
 for GPU commands to finish

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 115 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  13 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  22 -
 5 files changed, 150 insertions(+), 2 deletions(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/5] i965: set ASTC5x5 workaround texture type tracking on texture validate

2017-12-14 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_tex_validate.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c 
b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 2b7798c..812c0c7 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -188,11 +188,24 @@ brw_validate_textures(struct brw_context *brw)
struct gl_context *ctx = >ctx;
const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
 
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
for (int unit = 0; unit <= max_enabled_unit; unit++) {
   struct gl_texture_unit *tex_unit = >Texture.Unit[unit];
 
   if (tex_unit->_Current) {
+ struct intel_texture_object *tex =
+intel_texture_object(tex_unit->_Current);
+ struct intel_mipmap_tree *mt = tex->mt;
+
  intel_finalize_mipmap_tree(brw, unit);
+ if (mt && mt->aux_usage != ISL_AUX_USAGE_NONE) {
+brw->astc5x5_wa.texture_with_auxilary_present = true;
+ }
+ if (tex->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ tex->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) {
+brw->astc5x5_wa.texture_astc5x5_present = true;
+ }
   }
}
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/5] i965: define astx5x5 workaround infrastructure

2017-12-14 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/Makefile.sources|  1 +
 src/mesa/drivers/dri/i965/brw_context.c   |  6 +
 src/mesa/drivers/dri/i965/brw_context.h   | 24 ++
 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c   | 36 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  1 +
 src/mesa/drivers/dri/i965/meson.build |  1 +
 6 files changed, 69 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index d928f71..4698fcb 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -77,6 +77,7 @@ i965_FILES = \
gen7_urb.c \
gen8_depth_state.c \
gen8_multisample_state.c \
+gen9_astc5x5_wa.c \
hsw_queryobj.c \
hsw_sol.c \
intel_batchbuffer.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 126c187..f3ccbda 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1073,6 +1073,12 @@ brwCreateContext(gl_api api,
if (ctx->Extensions.INTEL_performance_query)
   brw_init_performance_queries(brw);
 
+   brw->astc5x5_wa.required = (devinfo->gen == 9);
+   brw->astc5x5_wa.mode = BRW_ASTC5x5_WA_MODE_NONE;
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
+   brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0f0aad8..60a1d3b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -166,6 +166,12 @@ enum brw_cache_id {
BRW_MAX_CACHE
 };
 
+enum brw_astc5x5_wa_mode_t {
+   BRW_ASTC5x5_WA_MODE_NONE,
+   BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5,
+   BRW_ASTC5x5_WA_MODE_HAS_AUX,
+};
+
 enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1263,6 +1269,19 @@ struct brw_context
 */
bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
 
+   /* Certain GEN's have a hardware bug where the sampler hangs if it attempts
+* to access auxilary buffers and an ASTC5x5 compressed buffer. The 
workaround
+* is to make sure that the texture cache is cleared between such accesses
+* and that such accesses have a command streamer stall between them.
+*/
+   struct {
+  bool required;
+  enum brw_astc5x5_wa_mode_t mode;
+  bool texture_astc5x5_present;
+  bool texture_with_auxilary_present;
+  bool blorp_sampling_from_astc5x5;
+   } astc5x5_wa;
+
__DRIcontext *driContext;
struct intel_screen *screen;
 };
@@ -1695,6 +1714,11 @@ void brw_query_internal_format(struct gl_context *ctx, 
GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
 
+/* gen9_astc5x5_wa.c */
+void gen9_set_astc5x5_wa_mode(struct brw_context *brw,
+ enum brw_astc5x5_wa_mode_t mode);
+void gen9_astc5x5_perform_wa(struct brw_context *brw);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c 
b/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c
new file mode 100644
index 000..247fd00
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c
@@ -0,0 +1,36 @@
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_mipmap_tree.h"
+
+void
+gen9_set_astc5x5_wa_mode(struct brw_context *brw,
+enum brw_astc5x5_wa_mode_t mode)
+{
+   if (!brw->astc5x5_wa.required ||
+   mode == BRW_ASTC5x5_WA_MODE_NONE ||
+   brw->astc5x5_wa.mode == mode) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.mode != BRW_ASTC5x5_WA_MODE_NONE) {
+  const uint32_t flags = PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+  brw_emit_pipe_control_flush(brw, flags);
+   }
+
+   brw->astc5x5_wa.mode = mode;
+}
+
+void
+gen9_astc5x5_perform_wa(struct brw_context *brw)
+{
+   if (!brw->astc5x5_wa.required) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.texture_astc5x5_present) {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else if (brw->astc5x5_wa.texture_with_auxilary_present) {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 91a6506..b7e2450 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -613,

[Mesa-dev] [PATCH v2 4/5] i965: use ASTC5x5 workaround in brw_compute

2017-12-14 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_compute.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 9be7523..c8d90f5 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -179,6 +179,12 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
brw_predraw_resolve_inputs(brw, false);
 
+   /* if necessary, perform astc5x5 workarounds to make sure sampling
+* from astc5x5 and textures with an auxilary surface have a command
+* streamer stall and texture invalidate between them.
+*/
+   gen9_astc5x5_perform_wa(brw);
+
/* Flush the batch if the batch/state buffers are nearly full.  We can
 * grow them if needed, but this is not free, so we'd like to avoid it.
 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/5] i965: use ASTC5x5 workaround in brw_draw

2017-12-14 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c | 16 ++--
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  5 +
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcf..2d3fb75 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -376,6 +376,8 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
  *
  * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
  * enabled depth texture, and flush the render cache for any dirty textures.
+ * In addition, if the ASTC5x5 workaround is needed and if ASTC5x5 textures
+ * are present, resolve textures so that auxilary buffers are not needed.
  */
 void
 brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering)
@@ -413,9 +415,13 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering)
  num_layers = INTEL_REMAINING_LAYERS;
   }
 
-  const bool disable_aux = rendering &&
+  const bool astc_disables_aux = (brw->astc5x5_wa.required &&
+ brw->astc5x5_wa.texture_astc5x5_present &&
+ tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE);
+
+  const bool disable_aux = (rendering &&
  intel_disable_rb_aux_buffer(brw, tex_obj->mt, min_level, num_levels,
- "for sampling");
+ "for sampling")) || astc_disables_aux;
 
   intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
 min_level, num_levels,
@@ -684,6 +690,12 @@ brw_prepare_drawing(struct gl_context *ctx,
brw_predraw_resolve_inputs(brw, true);
brw_predraw_resolve_framebuffer(brw);
 
+   /* if necessary, perform astc5x5 workarounds to make sure sampling
+* from astc5x5 and textures with an auxilary surface have a command
+* streamer stall and texture invalidate between them.
+*/
+   gen9_astc5x5_perform_wa(brw);
+
/* Bind all inputs, derive varying and size information:
 */
brw_merge_inputs(brw, arrays);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index adf60a8..ccdb537 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -447,6 +447,11 @@ brw_aux_surface_disabled(const struct brw_context *brw,
 {
const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 
+   if (brw->astc5x5_wa.required &&
+   brw->astc5x5_wa.texture_astc5x5_present) {
+  return true;
+   }
+
for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
   const struct intel_renderbuffer *irb =
  intel_renderbuffer(fb->_ColorDrawBuffers[i]);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/5] i965: ASTC5x5 workaround logic for blorp

2017-12-14 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  5 +
 src/mesa/drivers/dri/i965/intel_tex_image.c | 16 
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index e8bc52e..97791b7 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -230,6 +230,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
struct gl_context *ctx = >ctx;
bool check_aperture_failed_once = false;
 
+   if (brw->astc5x5_wa.blorp_sampling_from_astc5x5) {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else {
+  gen9_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
/* Flush the sampler and render caches.  We definitely need to flush the
 * sampler cache so that we get updated contents from the render cache for
 * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 37c8e24..60028bb 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -759,10 +759,18 @@ intel_get_tex_sub_image(struct gl_context *ctx,
DBG("%s\n", __func__);
 
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-  if (intel_gettexsubimage_blorp(brw, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth, format, type,
- pixels, >Pack))
+  bool blorp_success;
+
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 =
+ (texImage->TexFormat == MESA_FORMAT_RGBA_ASTC_5x5 ||
+  texImage->TexFormat == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5);
+  blorp_success = intel_gettexsubimage_blorp(brw, texImage,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels,
+ >Pack);
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+  if (blorp_success)
  return;
 
   perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/5] i965: ASTC5x5 workaround

2017-12-14 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series implements a needed workaround for Gen9 for ASTC5x5
sampler reads. The crux of the work around is to make sure that the
sampler does not read an ASTC5x5 texture and a surface with an auxilary
buffer without having a texture cache invalidate and command streamer
stall between such accesses.

With this patch series applied to the (current) master branch of mesa,
carchase works on my SKL GT4.

v2:
  Rename workaround functions from brw_ to gen9_
  (suggested/requested by Topi Pohjolainen).

  Place texture resolve to avoid using auxilary surface
  when ASTC5x5 is detected in brw_predraw_resolve_inputs()
  instead of another detected function; doing so allows
  one to avoid walking the textures again.
  (suggested/requested by Topi Pohjolainen).

  Emit command streamer stall in addition to texture
  invalidate.
  (original short-coming caught by Jason Ekstrand)

  Place workaround function in (new) dedicated file.

  Minor path re-ordering to accomodate changes.

Kevin Rogovin (5):
  i965: define astx5x5 workaround infrastructure
  i965: set ASTC5x5 workaround texture type tracking on texture validate
  i965: use ASTC5x5 workaround in brw_draw
  i965: use ASTC5x5 workaround in brw_compute
  i965: ASTC5x5 workaround logic for blorp

 src/mesa/drivers/dri/i965/Makefile.sources   |  1 +
 src/mesa/drivers/dri/i965/brw_compute.c  |  6 
 src/mesa/drivers/dri/i965/brw_context.c  |  6 
 src/mesa/drivers/dri/i965/brw_context.h  | 24 
 src/mesa/drivers/dri/i965/brw_draw.c | 16 +--
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  5 
 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c  | 36 
 src/mesa/drivers/dri/i965/genX_blorp_exec.c  |  5 
 src/mesa/drivers/dri/i965/intel_batchbuffer.c|  1 +
 src/mesa/drivers/dri/i965/intel_tex_image.c  | 16 ---
 src/mesa/drivers/dri/i965/intel_tex_validate.c   | 13 +
 src/mesa/drivers/dri/i965/meson.build|  1 +
 12 files changed, 124 insertions(+), 6 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

v2:
 Comments indicating that brw_bo_padding_is_good() will do the required
 waiting for GPU commands to finish

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 91a6506a89..689ae2362a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -758,6 +758,7 @@ execbuffer(int fd,
};
 
unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
+   bool detected_out_of_bounds_write = false;
 
if (in_fence != -1) {
   execbuf.rsvd2 = in_fence;
@@ -787,6 +788,24 @@ execbuffer(int fd,
  batch->validation_list[i].offset);
  bo->gtt_offset = batch->validation_list[i].offset;
   }
+
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ /* brw_bo_padding_is_good() performs the necessary
+  * syncing itself to make sure that the padding read
+  * is correct.
+  */
+ if (!brw_bo_padding_is_good(bo)) {
+detected_out_of_bounds_write = true;
+fprintf(stderr,
+"Detected buffer out-of-bounds write from brw_bo %p "
+"(GEM %u, label = \"%s\")\n",
+bo, bo->gem_handle, bo->name);
+ }
+  }
+   }
+
+   if (unlikely(detected_out_of_bounds_write)) {
+  abort();
}
 
if (ret == 0 && out_fence != NULL)
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/3] intel/common:add debug flag for adding and checking padding on BO's

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f581..2154b23756 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "check_oob",   DEBUG_OUT_OF_BOUND_CHK },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a569d..92fc68b494 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_OUT_OF_BOUND_CHK(1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

v2:
 Change from using rand() to using internal generating function
 (requested/suggested by Jason Ekstrand)

 Avoid having extra pointers in brw_bo struct via using the internal
 function and allocating buffer for pread at brw_bo_padding_is_good()
 (requested/suggested by Jason Ekstrand)

 Comments indicating that pread ioctl will do the required waiting
 for GPU commands to finish

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 107 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |   8 +++
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 52b5bf97a1..274147d2ce 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -220,6 +220,39 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
   >cache_bucket[index] : NULL;
 }
 
+/* Our goal is not to have noise good enough for cryto,
+ * but instead values that are unique-ish enough that
+ * it is incredibly unlikely that a buffer overwrite
+ * will produce the exact same values.
+ */
+static uint8_t
+next_noise_value(uint8_t prev_noise)
+{
+   uint32_t v = prev_noise;
+   return (v * 103u + 227u) & 0xFF;
+}
+
+static void
+fill_noise_buffer(uint8_t *dst, uint8_t start, uint32_t length)
+{
+   for(uint32_t i = 0; i < length; ++i) {
+  dst[i] = start;
+  start = next_noise_value(start);
+   }
+}
+
+static uint8_t*
+generate_noise_buffer(uint8_t start, uint32_t length)
+{
+   uint8_t *return_value;
+   return_value = malloc(length);
+   if (return_value) {
+  fill_noise_buffer(return_value, start, length);
+   }
+
+   return return_value;
+}
+
 int
 brw_bo_busy(struct brw_bo *bo)
 {
@@ -367,7 +400,18 @@ retry:
   bo->size = bo_size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  bo->padding_size = 0;
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ /* TODO: we want to make sure that the padding forces
+  * the BO to take another page on the (PP)GTT; 4KB
+  * may or may not be the page size for the GEM. Indeed,
+  * depending on generation, kernel version and GEM size,
+  * the page size can be one of 4KB, 64KB or 2M.
+  */
+ bo->padding_size = 4096;
+  }
+
+  struct drm_i915_gem_create create = { .size = bo_size + bo->padding_size 
};
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -378,6 +422,27 @@ retry:
  goto err;
   }
 
+  if (unlikely(bo->padding_size > 0)) {
+ uint8_t *noise_values;
+ struct drm_i915_gem_pwrite pwrite;
+
+ noise_values = generate_noise_buffer(create.handle, bo->padding_size);
+ if (!noise_values)
+goto err_free;
+
+ pwrite.handle = create.handle;
+ pwrite.pad = 0;
+ pwrite.offset = bo_size;
+ pwrite.size = bo->padding_size;
+ pwrite.data_ptr = (__u64) (uintptr_t) noise_values;
+
+ ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, );
+ free(noise_values);
+
+ if (ret != 0)
+goto err_free;
+  }
+
   bo->gem_handle = create.handle;
 
   bo->bufmgr = bufmgr;
@@ -424,6 +489,44 @@ err:
return NULL;
 }
 
+bool
+brw_bo_padding_is_good(struct brw_bo *bo)
+{
+   if (bo->padding_size > 0) {
+  uint8_t *tmp;
+  struct drm_i915_gem_pread pread;
+  int ret;
+  uint8_t start;
+
+  tmp = malloc(bo->padding_size);
+  if (!tmp)
+ return false;
+
+  pread.handle = bo->gem_handle;
+  pread.pad = 0;
+  pread.offset = bo->size;
+  pread.size = bo->padding_size;
+  pread.data_ptr = (__u64) (uintptr_t) tmp;
+
+  /* PREAD waits for any processing by the GPU that uses the buffer
+   * to finish before reading values from that buffer.
+   */
+  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_PREAD, );
+  assert(ret == 0);
+
+  start = bo->gem_handle & 0xFF;
+  for (uint32_t i = 0; i < bo->padding_size; ++i) {
+ if (start != tmp[i]) {
+free(tmp);
+return false;
+ }
+ start = next_noise_value(start);
+  }
+  free(tmp);
+   }
+   return true;
+}
+
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
  const char *name, uint64_t size, uint64_t alignment)
@@ -598,6 +701,7 @@ bo_free(struct brw_bo *bo)
   DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
   bo->gem_handle, bo->name, strerror(errno));
}
+
free(bo);
 }
 
@@ -1156,6 +1260,7 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, 
int prime_fd)
bo->name = "prime&quo

[Mesa-dev] [PATCH v2 0/3] GEM BO padding to find OOB buffer writes

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series adds a new debug option to pad each GEM BO
allocated by the brw_bufmgr with pseudo-(weak) random noise values
which are then checked after each batchbuffer dispatch to the kernel.
This can be quite valuable to find diffucult to track down heisenberg
style bugs.

A possible follow-up series would be to write to stderr (or
another logging mechanism) if the OOB write is to a GEM BO that
backs a GL buffer object; that features would be quite useful for
application developers.

(resending because I sent out v2 earlier today without tagging it as v2).

v2:
 Change from using rand() to using internal generating function
 (requested/suggested by Jason Ekstrand)

 Avoid having extra pointers in brw_bo struct via using the internal
 function and allocating buffer for pread at brw_bo_padding_is_good()
 (requested/suggested by Jason Ekstrand)

 Comments indicating that pread ioctl will do the required waiting
 for GPU commands to finish

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 107 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h|   8 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  19 +
 5 files changed, 135 insertions(+), 1 deletion(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 105 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |   8 +++
 2 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 52b5bf97a1..c47a6d0a29 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -220,6 +220,39 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
   >cache_bucket[index] : NULL;
 }
 
+/* Our goal is not to have noise good enough for cryto,
+ * but instead values that are unique-ish enough that
+ * it is incredibly unlikely that a buffer overwrite
+ * will produce the exact same values.
+ */
+static uint8_t
+next_noise_value(uint8_t prev_noise)
+{
+   uint32_t v = prev_noise;
+   return (v * 103u + 227u) & 0xFF;
+}
+
+static void
+fill_noise_buffer(uint8_t *dst, uint8_t start, uint32_t length)
+{
+   for(uint32_t i = 0; i < length; ++i) {
+  dst[i] = start;
+  start = next_noise_value(start);
+   }
+}
+
+static uint8_t*
+generate_noise_buffer(uint8_t start, uint32_t length)
+{
+   uint8_t *return_value;
+   return_value = malloc(length);
+   if (return_value) {
+  fill_noise_buffer(return_value, start, length);
+   }
+
+   return return_value;
+}
+
 int
 brw_bo_busy(struct brw_bo *bo)
 {
@@ -367,7 +400,16 @@ retry:
   bo->size = bo_size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  bo->padding_size = 0;
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ /* TODO: we want to make sure that the padding forces
+  * the BO to take another page on the (PP)GTT; getpagesize()
+  * may or may not be the page size in the (PP)GTT.
+  */
+ bo->padding_size = getpagesize();
+  }
+
+  struct drm_i915_gem_create create = { .size = bo_size + bo->padding_size 
};
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -378,6 +420,27 @@ retry:
  goto err;
   }
 
+  if (unlikely(bo->padding_size > 0)) {
+ uint8_t *noise_values;
+ struct drm_i915_gem_pwrite pwrite;
+
+ noise_values = generate_noise_buffer(create.handle, bo->padding_size);
+ if (!noise_values)
+goto err_free;
+
+ pwrite.handle = create.handle;
+ pwrite.pad = 0;
+ pwrite.offset = bo_size;
+ pwrite.size = bo->padding_size;
+ pwrite.data_ptr = (__u64) (uintptr_t) noise_values;
+
+ ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, );
+ free(noise_values);
+
+ if (ret != 0)
+goto err_free;
+  }
+
   bo->gem_handle = create.handle;
 
   bo->bufmgr = bufmgr;
@@ -424,6 +487,44 @@ err:
return NULL;
 }
 
+bool
+brw_bo_padding_is_good(struct brw_bo *bo)
+{
+   if (bo->padding_size > 0) {
+  uint8_t *tmp;
+  struct drm_i915_gem_pread pread;
+  int ret;
+  uint8_t start;
+
+  tmp = malloc(bo->padding_size);
+  if (!tmp)
+ return false;
+
+  pread.handle = bo->gem_handle;
+  pread.pad = 0;
+  pread.offset = bo->size;
+  pread.size = bo->padding_size;
+  pread.data_ptr = (__u64) (uintptr_t) tmp;
+
+  /* PREAD waits for any processing by the GPU that uses the buffer
+   * to finish before reading values from that buffer.
+   */
+  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_PREAD, );
+  assert(ret == 0);
+
+  start = bo->gem_handle & 0xFF;
+  for (uint32_t i = 0; i < bo->padding_size; ++i) {
+ if (start != tmp[i]) {
+free(tmp);
+return false;
+ }
+ start = next_noise_value(start);
+  }
+  free(tmp);
+   }
+   return true;
+}
+
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
  const char *name, uint64_t size, uint64_t alignment)
@@ -598,6 +699,7 @@ bo_free(struct brw_bo *bo)
   DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
   bo->gem_handle, bo->name, strerror(errno));
}
+
free(bo);
 }
 
@@ -1156,6 +1258,7 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, 
int prime_fd)
bo->name = "prime";
bo->reusable = false;
bo->external = true;
+   bo->padding_size = 0;
 
struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, _tiling))
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 0ae541cda0..e402e61c53 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -165,6 +165,12 @@ struct brw_bo

[Mesa-dev] [PATCH 0/3] GEM BO padding to find OOB buffer writes

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series adds a new debug option to pad each GEM BO
allocated by the brw_bufmgr with pseudo-(weak) random noise values
which are then checked after each batchbuffer dispatch to the kernel.
This can be quite valuable to find diffucult to track down heisenberg
style bugs.

A possible follow-up series would be to write to stderr (or
another logging mechanism) if the OOB write is to a GEM BO that
backs a GL buffer object; that features would be quite useful for
application developers.

v2:
 Change from using rand() to using internal generating function
 (requested/suggested by Jason Ekstrand)

 Avoid having extra pointers in brw_bo struct via using the internal
 function and allocating buffer for pread at brw_bo_padding_is_good()
 (requested/suggested by Jason Ekstrand)

 Comments indicating that pread ioctl will do the required waiting
 for GPU commands to finish

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

 src/intel/common/gen_debug.c  |   1 +
 src/intel/common/gen_debug.h  |   1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 105 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h|   8 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  19 +
 5 files changed, 133 insertions(+), 1 deletion(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 91a6506a89..689ae2362a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -758,6 +758,7 @@ execbuffer(int fd,
};
 
unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
+   bool detected_out_of_bounds_write = false;
 
if (in_fence != -1) {
   execbuf.rsvd2 = in_fence;
@@ -787,6 +788,24 @@ execbuffer(int fd,
  batch->validation_list[i].offset);
  bo->gtt_offset = batch->validation_list[i].offset;
   }
+
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ /* brw_bo_padding_is_good() performs the necessary
+  * syncing itself to make sure that the padding read
+  * is correct.
+  */
+ if (!brw_bo_padding_is_good(bo)) {
+detected_out_of_bounds_write = true;
+fprintf(stderr,
+"Detected buffer out-of-bounds write from brw_bo %p "
+"(GEM %u, label = \"%s\")\n",
+bo, bo->gem_handle, bo->name);
+ }
+  }
+   }
+
+   if (unlikely(detected_out_of_bounds_write)) {
+  abort();
}
 
if (ret == 0 && out_fence != NULL)
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] intel/common:add debug flag for adding and checking padding on BO's

2017-12-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f581..2154b23756 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "check_oob",   DEBUG_OUT_OF_BOUND_CHK },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a569d..92fc68b494 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_OUT_OF_BOUND_CHK(1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly for Gen9

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_program.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 6aa4100..1ae0aa0 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -368,9 +368,13 @@ brw_alloc_stage_scratch(struct brw_context *brw,
*
* According to the other driver team, this applies to compute shaders
* as well.  This is not currently documented at all.
+   *
+   * brw->screen->subslice_total is the TOTAL number of subslices
+   * and we wish to view that there are 4 subslices per slice
+   * instead of the actual number of subslices per slice.
*/
   if (devinfo->gen >= 9)
- subslices = 4;
+ subslices = 4 * brw->screen->devinfo.num_slices;
 
   /* WaCSScratchSize:hsw
*
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Program MEDIA_VFE_STATE in a more readable fashion.

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch is purely for readability improvements when programming
the MEDIA_VFE_STATE.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 04a4925..50ac5bc 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4183,28 +4183,35 @@ genX(upload_cs_state)(struct brw_context *brw)
 
brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
   if (prog_data->total_scratch) {
- uint32_t bo_offset;
+ uint32_t per_thread_scratch_value;
 
  if (GEN_GEN >= 8) {
 /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
  * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
  */
-bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 
11;
  } else if (GEN_IS_HASWELL) {
 /* Haswell's Per Thread Scratch Space is in the range [0, 10]
  * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
  */
-bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 
12;
  } else {
 /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
  * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
  */
-bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+per_thread_scratch_value = stage_state->per_thread_scratch / 1024 
- 1;
  }
- vfe.ScratchSpaceBasePointer =
-rw_bo(stage_state->scratch_bo, bo_offset);
+ vfe.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0);
+ vfe.PerThreadScratchSpace = per_thread_scratch_value;
   }
 
+  /* If brw->screen->subslice_total is greater than one, then
+   * devinfo->max_cs_threads stores number of threads per sub-slice;
+   * thus we need to multiply by that number by subslices to get
+   * the actual maximum number of threads; the -1 is because the HW
+   * has a bias of 1 (would not make sense to say the maximum number
+   * of threads is 0).
+   */
   const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
   vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
   vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] i965: scratch space fixes (v2)

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series offers a readability improvement for programming
MEDIA_VFE_STATE and fixes a scratch space sizing bug for Gen9.
Together with the ASTC5x5 fixes posted before, carchase on GLES
works on my SKL GT4.

v2:
 correctly state that first patch is just readability patch
 corrently compute subslices as 4 times number slices

Kevin Rogovin (2):
  i965: Program MEDIA_VFE_STATE in a more readable fashion.
  i965: compute scratch space size correctly for Gen9

 src/mesa/drivers/dri/i965/brw_program.c   |  6 +-
 src/mesa/drivers/dri/i965/genX_state_upload.c | 19 +--
 2 files changed, 18 insertions(+), 7 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: correctly program MEDIA_VFE_STATE for compute shading

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 04a492539a..50ac5bc59f 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4183,28 +4183,35 @@ genX(upload_cs_state)(struct brw_context *brw)
 
brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
   if (prog_data->total_scratch) {
- uint32_t bo_offset;
+ uint32_t per_thread_scratch_value;
 
  if (GEN_GEN >= 8) {
 /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
  * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
  */
-bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 
11;
  } else if (GEN_IS_HASWELL) {
 /* Haswell's Per Thread Scratch Space is in the range [0, 10]
  * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
  */
-bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 
12;
  } else {
 /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
  * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
  */
-bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+per_thread_scratch_value = stage_state->per_thread_scratch / 1024 
- 1;
  }
- vfe.ScratchSpaceBasePointer =
-rw_bo(stage_state->scratch_bo, bo_offset);
+ vfe.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0);
+ vfe.PerThreadScratchSpace = per_thread_scratch_value;
   }
 
+  /* If brw->screen->subslice_total is greater than one, then
+   * devinfo->max_cs_threads stores number of threads per sub-slice;
+   * thus we need to multiply by that number by subslices to get
+   * the actual maximum number of threads; the -1 is because the HW
+   * has a bias of 1 (would not make sense to say the maximum number
+   * of threads is 0).
+   */
   const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
   vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
   vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] i965: scratch space fixes

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series fixes 2 issues for scratch space
on compute shaders for GEN. Together with the ASTC5x5
fixes posted before, carchase on GLES works on my SKL
GT4.

Kevin Rogovin (2):
  i965: correctly program MEDIA_VFE_STATE for compute shading
  i965: compute scratch space size correctly for Gen9

 src/mesa/drivers/dri/i965/brw_program.c   | 12 ++--
 src/mesa/drivers/dri/i965/genX_state_upload.c | 19 +--
 2 files changed, 23 insertions(+), 8 deletions(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: compute scratch space size correctly for Gen9

2017-12-12 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_program.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 6aa41009e7..7bfcad9a65 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -368,9 +368,17 @@ brw_alloc_stage_scratch(struct brw_context *brw,
*
* According to the other driver team, this applies to compute shaders
* as well.  This is not currently documented at all.
+   *
+   * brw->screen->subslice_total is the TOTAL number of subslices
+   * and we wish to view that there are 4 subslices per slice
+   * instead of the actaul number of subslices per slice.
*/
-  if (devinfo->gen >= 9)
- subslices = 4;
+  if (devinfo->gen >= 9) {
+ subslices = 0;
+ for (int i = 0; i < brw->screen->devinfo.num_slices; ++i) {
+subslices += 4 * brw->screen->devinfo.num_subslices[i];
+ }
+  }
 
   /* WaCSScratchSize:hsw
*
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/16] src/intel/tools: add BatchbufferLogger to meson build system

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 meson_options.txt   |  6 
 src/intel/tools/meson.build | 71 +
 2 files changed, 77 insertions(+)

diff --git a/meson_options.txt b/meson_options.txt
index 39b137cbea..f6690c5e8e 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -261,3 +261,9 @@ option(
   choices : ['8', '16', '32'],
   description : 'Number of channel bits for OSMesa.'
 )
+option(
+  'build-intel-tools',
+  type : 'boolean',
+  value : false,
+  description : 'if true, then build intel tools.'
+)
diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build
index 1996d5208f..e82cbedf58 100644
--- a/src/intel/tools/meson.build
+++ b/src/intel/tools/meson.build
@@ -18,6 +18,8 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+build_intel_tools = get_option('build-intel-tools')
+
 aubinator = executable(
   'aubinator',
   files('aubinator.c', 'disasm.c', 'gen_disasm.h', 'intel_aub.h'),
@@ -37,3 +39,72 @@ aubinator_error_decode = executable(
   c_args : [c_vis_args, no_override_init_args],
   build_by_default : false,
 )
+
+i965_batchbuffer_logger = shared_library(
+  'i965_batchbuffer_logger',
+  files('i965_batchbuffer_logger.cpp', 'disasm.c', 'gen_disasm.h'),
+  dependencies : [dep_zlib, dep_thread],
+  include_directories : [inc_common, inc_intel],
+  link_with : [libintel_common, libintel_compiler, libmesa_util, libnir, 
libisl],
+  cpp_args : ['-std=c++11'],
+  build_by_default : build_intel_tools,
+  install : build_intel_tools,
+)
+
+i965_batchbuffer_dump_show = executable(
+  'i965_batchbuffer_dump_show',
+  files('i965_batchbuffer_dump_show.c'),
+  include_directories : [inc_common, inc_intel],
+  c_args : [],
+  build_by_default : build_intel_tools,
+  install : build_intel_tools,
+)
+
+i965_batchbuffer_dump_show_xml = executable(
+  'i965_batchbuffer_dump_show_xml',
+  files('i965_batchbuffer_dump_show_xml.cpp'),
+  include_directories : [inc_common, inc_intel],
+  cpp_args : [],
+  build_by_default : build_intel_tools,
+  install : build_intel_tools,
+)
+
+i965_batchbuffer_dump_show_json = executable(
+  'i965_batchbuffer_dump_show_json',
+  files('i965_batchbuffer_dump_show_json.cpp'),
+  include_directories : [inc_common, inc_intel],
+  cpp_args : [],
+  build_by_default : build_intel_tools,
+  install : build_intel_tools,
+)
+
+gen_shader_disassembler = executable(
+  'gen_shader_disassembler',
+  files('gen_shader_disassembler.c', 'disasm.c'),
+  dependencies : [dep_thread],
+  include_directories : [inc_common, inc_intel],
+  link_with : [libintel_common, libintel_compiler, libmesa_util],
+  cpp_args : [],
+  build_by_default : build_intel_tools,
+  install : build_intel_tools,
+)
+
+libdir_value = join_paths(get_option('prefix'), get_option('libdir'))
+sed_replace_string = 's,[@]libdir[@],' + libdir_value + ',g'
+i965_batchbuffer_logger_sh = custom_target(
+  'i965_batchbuffer_logger_sh',
+  input : 'i965_batchbuffer_logger_sh.in',
+  output : 'i965_batchbuffer_logger_sh',
+  command : ['sed', '-e', sed_replace_string, '@INPUT@'],
+  capture : true,
+  build_by_default : build_intel_tools,
+  install : build_intel_tools,
+  install_dir : get_option('bindir'),
+)
+
+if build_intel_tools
+  i965_batchbuffer_logger_headers = install_headers(
+'i965_batchbuffer_logger_app.h',
+'i965_batchbuffer_logger_output.h'
+  )
+endif
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/16] intel/tools: add command line GEN shader disassembler tool

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am   |  21 ++-
 src/intel/tools/.gitignore|   1 +
 src/intel/tools/gen_shader_disassembler.c | 213 ++
 3 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/tools/gen_shader_disassembler.c

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index a86e76653b..1ccf5a35cc 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -32,7 +32,8 @@ intellib_LTLIBRARIES = \
 
 intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show \
tools/i965_batchbuffer_dump_show_xml \
-   tools/i965_batchbuffer_dump_show_json
+   tools/i965_batchbuffer_dump_show_json \
+   tools/gen_shader_disassembler
 
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
@@ -111,3 +112,21 @@ tools_i965_batchbuffer_dump_show_xml_SOURCES = \
 
 tools_i965_batchbuffer_dump_show_json_SOURCES = \
tools/i965_batchbuffer_dump_show_json.cpp
+
+tools_gen_shader_disassembler_SOURCES = \
+   tools/gen_shader_disassembler.c \
+   tools/disasm.c \
+   tools/gen_disasm.h
+
+tools_gen_shader_disassembler_LDADD = \
+   common/libintel_common.la \
+   compiler/libintel_compiler.la \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(PTHREAD_LIBS) \
+   $(EXPAT_LIBS) \
+   $(ZLIB_LIBS)
+
+tools_gen_shader_disassembler_CFLAGS = \
+   $(AM_CFLAGS) \
+   $(EXPAT_CFLAGS) \
+   $(ZLIB_CFLAGS)
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index ea4dc23c20..e9b22c89aa 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -4,3 +4,4 @@
 /i965_batchbuffer_dump_show
 /i965_batchbuffer_dump_show_xml
 /i965_batchbuffer_dump_show_json
+/gen_shader_disassembler
diff --git a/src/intel/tools/gen_shader_disassembler.c 
b/src/intel/tools/gen_shader_disassembler.c
new file mode 100644
index 00..070c588d54
--- /dev/null
+++ b/src/intel/tools/gen_shader_disassembler.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "compiler/brw_inst.h"
+#include "compiler/brw_eu.h"
+
+static void
+print_opcodes(const void *data, int data_sz,
+  struct gen_device_info *devinfo,
+  bool print_offsets)
+{
+   for (int offset = 0; offset < data_sz;) {
+  const brw_inst *insn = data + offset;
+  bool compacted;
+  brw_inst uncompacted;
+  enum opcode opcode;
+  const struct opcode_desc *desc;
+
+  if (print_offsets) {
+ printf("0x%08x: ", offset);
+  }
+
+  compacted = brw_inst_cmpt_control(devinfo, insn);
+  if (compacted) {
+ brw_compact_inst *compacted_insn = (brw_compact_inst *)insn;
+ brw_uncompact_instruction(devinfo, , compacted_insn);
+ insn = 
+ offset += 8;
+  } else {
+ offset += 16;
+  }
+
+  opcode = brw_inst_opcode(devinfo, insn);
+  desc = brw_opcode_desc(devinfo, opcode);
+  if (desc) {
+ printf("(0x%08x) %s", opcode, desc->name);
+  } else {
+ printf("(0x%08x) UnknownOpcode", opcode);
+  }
+
+  if (compacted) {
+ printf(" {compacted}");
+  }
+
+  printf("\n");
+   }
+}
+
+static void
+print_disassembly(const void *data, int data_sz,
+  struct gen_device_info *devinfo,
+  bool print_offsets)
+{
+   struct disasm_info *disasm_info = disasm_initialize(devinfo, NULL);
+   disasm_new_inst_group(disasm_info, 0);
+   disasm_new_inst_group(disasm_info, data_sz);
+
+   brw_va

[Mesa-dev] [PATCH 14/16] intel/tools/BatchbufferLogger (output-json): add json outputter

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am|   6 +-
 src/intel/tools/.gitignore |   1 +
 .../tools/i965_batchbuffer_dump_show_json.cpp  | 253 +
 3 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/tools/i965_batchbuffer_dump_show_json.cpp

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 1c44d92d60..a86e76653b 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -31,7 +31,8 @@ intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
 intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show \
-   tools/i965_batchbuffer_dump_show_xml
+   tools/i965_batchbuffer_dump_show_xml \
+   tools/i965_batchbuffer_dump_show_json
 
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
@@ -107,3 +108,6 @@ tools_i965_batchbuffer_dump_show_SOURCES = \
 
 tools_i965_batchbuffer_dump_show_xml_SOURCES = \
tools/i965_batchbuffer_dump_show_xml.cpp
+
+tools_i965_batchbuffer_dump_show_json_SOURCES = \
+   tools/i965_batchbuffer_dump_show_json.cpp
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index 7180e4ba4b..ea4dc23c20 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -3,3 +3,4 @@
 /i965_batchbuffer_logger_sh
 /i965_batchbuffer_dump_show
 /i965_batchbuffer_dump_show_xml
+/i965_batchbuffer_dump_show_json
diff --git a/src/intel/tools/i965_batchbuffer_dump_show_json.cpp 
b/src/intel/tools/i965_batchbuffer_dump_show_json.cpp
new file mode 100644
index 00..81d5ca66e7
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_dump_show_json.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "tools/i965_batchbuffer_logger_output.h"
+
+static
+std::string
+json_name_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::string return_value;
+   unsigned int i;
+   char c;
+
+   for(i = 0; i < len; ++i) {
+  if (std::fread(, sizeof(char), 1, pfile) != sizeof(char)) {
+ break;
+  }
+  if(c != '\n') {
+ return_value.push_back(c);
+  }
+   }
+
+   return return_value;
+}
+
+static
+std::string
+json_block_name_from_file(const struct i965_batchbuffer_logger_header *hdr,
+  std::FILE *pfile)
+{
+   std::ostringstream return_value;
+
+   return_value << json_name_from_file(hdr->name_length, pfile);
+   if (hdr->value_length > 0) {
+  return_value << ":" << json_name_from_file(hdr->value_length, pfile);
+   }
+
+   return return_value.str();
+}
+
+static
+void
+json_print_value_line(std::vector::const_iterator begin,
+  std::vector::const_iterator end)
+{
+   for(; begin != end; ++begin) {
+  switch(*begin) {
+  case '\t':
+ std::cout << "\\t";
+ break;
+  case '\"':
+ std::cout << "\\\"";
+ break;
+  case '\\':
+ /* This is silly but required; apirtace will place control
+  * character codes (sometimes) within string values (typically
+  * from shader sources). So for example if a shader source
+  * has (within a comment) something like "Famous"
+  * then the detailed function value will have then \"Famour\"
+  * within its string value. The below silly block of code
+  * just checks if there is a non-white character and if so,
+  * just assume that the \ is a control code.
+  */
+ if (begin !=

[Mesa-dev] [PATCH 12/16] intel/tools/BatchbufferLogger (txt-output): example txt dumper

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am  |   5 +
 src/intel/tools/.gitignore   |   1 +
 src/intel/tools/i965_batchbuffer_dump_show.c | 135 +++
 3 files changed, 141 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_dump_show.c

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index f5eda31a5c..2c36649f89 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -30,6 +30,8 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
+intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show
+
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
 
@@ -98,3 +100,6 @@ tools_libi965_batchbuffer_logger_la_LDFLAGS = \
 intel_sed_prefix_vars = sed -e 's,[@]libdir[@],$(intellibdir),g'
 tools/i965_batchbuffer_logger_sh: tools/i965_batchbuffer_logger_sh.in
$(intel_sed_prefix_vars) < tools/i965_batchbuffer_logger_sh.in > 
tools/i965_batchbuffer_logger_sh
+
+tools_i965_batchbuffer_dump_show_SOURCES = \
+   tools/i965_batchbuffer_dump_show.c
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index fa9bf70808..7d95bcdf3a 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -1,3 +1,4 @@
 /aubinator
 /aubinator_error_decode
 /i965_batchbuffer_logger_sh
+/i965_batchbuffer_dump_show
diff --git a/src/intel/tools/i965_batchbuffer_dump_show.c 
b/src/intel/tools/i965_batchbuffer_dump_show.c
new file mode 100644
index 00..e69ab47edc
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_dump_show.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "tools/i965_batchbuffer_logger_output.h"
+
+static
+void
+print_tabs(int l)
+{
+   for(int i = 0; i < l; ++i) {
+  printf("\t");
+   }
+}
+
+static
+void
+print_instrumentation_message(int block_level,
+  const struct i965_batchbuffer_logger_header *hdr,
+  FILE *pfile)
+{
+   bool line_start;
+   uint32_t i;
+   char c = 0;
+   size_t num_read;
+
+   /* assume name has no EOLs */
+   print_tabs(block_level);
+   for (i = 0; !feof(pfile) && i < hdr->name_length; ++i) {
+  num_read = fread(, sizeof(c), 1, pfile);
+  if (num_read != 1)
+ return;
+  printf("%c", c);
+   }
+
+   if (hdr->value_length > 0) {
+  printf(" : ");
+   }
+
+   /* print the value (if there is one). */
+   for (i = 0, line_start = false; !feof(pfile) && i < hdr->value_length; ++i) 
{
+  num_read = fread(, sizeof(c), 1, pfile);
+
+  if (num_read != 1)
+ break;
+
+  if (line_start) {
+ print_tabs(block_level);
+  }
+
+  printf("%c", c);
+  line_start = (c == '\n');
+   }
+
+   if (c != '\n') {
+  printf("\n");
+   }
+}
+
+
+int
+main(int argc, char **argv)
+{
+   FILE *pfile;
+   int block_level = 0;
+
+   if (argc != 2) {
+  return -1;
+   }
+
+   pfile = fopen(argv[1], "r");
+   if (pfile == NULL) {
+  return -1;
+   }
+
+   while (!feof(pfile)) {
+  struct i965_batchbuffer_logger_header hdr;
+  size_t num_read;
+
+  num_read = fread(, sizeof(hdr), 1, pfile);
+  if (num_read != 1)
+ break;
+
+  switch(hdr.type) {
+ case I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN: {
+print_instrumentation_message(block_level, , pfile);
+++block_level;
+ }
+ break;
+
+ case I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END: {
+--block_level;
+

[Mesa-dev] [PATCH 09/16] intel/compiler: add print_offsets argument to brw_disassemble()

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/compiler/brw_compile_clip.c | 4 +++-
 src/intel/compiler/brw_compile_sf.c   | 4 +++-
 src/intel/compiler/brw_disasm_info.c  | 4 +++-
 src/intel/compiler/brw_eu.c   | 5 +++--
 src/intel/compiler/brw_eu.h   | 3 ++-
 src/intel/tools/disasm.c  | 2 +-
 src/mesa/drivers/dri/i965/brw_ff_gs.c | 3 ++-
 src/mesa/drivers/dri/i965/brw_program_cache.c | 4 +++-
 8 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/intel/compiler/brw_compile_clip.c 
b/src/intel/compiler/brw_compile_clip.c
index c04d1a8277..d37a7d607e 100644
--- a/src/intel/compiler/brw_compile_clip.c
+++ b/src/intel/compiler/brw_compile_clip.c
@@ -88,7 +88,9 @@ brw_compile_clip(const struct brw_compiler *compiler,
if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) {
   fprintf(stderr, "clip:\n");
   brw_disassemble(compiler->devinfo,
-  program, 0, *final_assembly_size, stderr);
+  program, 0, *final_assembly_size,
+  INTEL_DEBUG & DEBUG_PRINT_OFFSETS,
+  stderr);
   fprintf(stderr, "\n");
}
 
diff --git a/src/intel/compiler/brw_compile_sf.c 
b/src/intel/compiler/brw_compile_sf.c
index 91e8a6da6c..8294242f30 100644
--- a/src/intel/compiler/brw_compile_sf.c
+++ b/src/intel/compiler/brw_compile_sf.c
@@ -871,7 +871,9 @@ brw_compile_sf(const struct brw_compiler *compiler,
if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
   fprintf(stderr, "sf:\n");
   brw_disassemble(compiler->devinfo,
-  program, 0, *final_assembly_size, stderr);
+  program, 0, *final_assembly_size,
+  INTEL_DEBUG & DEBUG_PRINT_OFFSETS,
+  stderr);
   fprintf(stderr, "\n");
}
 
diff --git a/src/intel/compiler/brw_disasm_info.c 
b/src/intel/compiler/brw_disasm_info.c
index 3db79ede70..baeb3271a9 100644
--- a/src/intel/compiler/brw_disasm_info.c
+++ b/src/intel/compiler/brw_disasm_info.c
@@ -72,7 +72,9 @@ dump_assembly(void *assembly, struct disasm_info *disasm)
 fprintf(stderr, "   %s\n", last_annotation_string);
   }
 
-  brw_disassemble(devinfo, assembly, start_offset, end_offset, stderr);
+  brw_disassemble(devinfo, assembly, start_offset, end_offset,
+  INTEL_DEBUG & DEBUG_PRINT_OFFSETS,
+  stderr);
 
   if (group->error) {
  fputs(group->error, stderr);
diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
index bc297a21b3..48ad82edfc 100644
--- a/src/intel/compiler/brw_eu.c
+++ b/src/intel/compiler/brw_eu.c
@@ -338,7 +338,8 @@ const unsigned *brw_get_program( struct brw_codegen *p,
 
 void
 brw_disassemble(const struct gen_device_info *devinfo,
-const void *assembly, int start, int end, FILE *out)
+const void *assembly, int start, int end,
+bool print_offsets, FILE *out)
 {
bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
 
@@ -346,7 +347,7 @@ brw_disassemble(const struct gen_device_info *devinfo,
   const brw_inst *insn = assembly + offset;
   brw_inst uncompacted;
   bool compacted = brw_inst_cmpt_control(devinfo, insn);
-  if (0)
+  if (print_offsets)
  fprintf(out, "0x%08x: ", offset);
 
   if (compacted) {
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 2d0f56f793..ec98c046ee 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -127,7 +127,8 @@ void brw_init_codegen(const struct gen_device_info *, 
struct brw_codegen *p,
 int brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
  const struct brw_inst *inst, bool is_compacted);
 void brw_disassemble(const struct gen_device_info *devinfo,
- const void *assembly, int start, int end, FILE *out);
+ const void *assembly, int start, int end,
+ bool print_offsets, FILE *out);
 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
 
 brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
diff --git a/src/intel/tools/disasm.c b/src/intel/tools/disasm.c
index 9a136db4db..c4a23594f8 100644
--- a/src/intel/tools/disasm.c
+++ b/src/intel/tools/disasm.c
@@ -97,7 +97,7 @@ gen_disasm_disassemble(struct gen_disasm *disasm, const void 
*assembly,
   int start_offset = group->offset;
   int end_offset = next->offset;
 
-  brw_disassemble(devinfo, assembly, start_offset, end_offset, out);
+  brw_disassemble(devinfo, assembly, start_offset, end_offset, false, out);
 
   if (group->error) {
  fputs(group->error, out);
diff --git a/src/mesa/drivers/dri/i965/

[Mesa-dev] [PATCH 08/16] intel/common: add new debug option print_offsets options for dumping assembly

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f581..ecb6be0028 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "print_offsets", DEBUG_PRINT_OFFSETS },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a569d..e9047676d5 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_PRINT_OFFSETS   (1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/16] intel/tools/BatchbufferLogger (output-xml): add outputter to XML

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am|   6 +-
 src/intel/tools/.gitignore |   1 +
 src/intel/tools/i965_batchbuffer_dump_show_xml.cpp | 217 +
 3 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/tools/i965_batchbuffer_dump_show_xml.cpp

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 2c36649f89..1c44d92d60 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -30,7 +30,8 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
-intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show
+intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show \
+   tools/i965_batchbuffer_dump_show_xml
 
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
@@ -103,3 +104,6 @@ tools/i965_batchbuffer_logger_sh: 
tools/i965_batchbuffer_logger_sh.in
 
 tools_i965_batchbuffer_dump_show_SOURCES = \
tools/i965_batchbuffer_dump_show.c
+
+tools_i965_batchbuffer_dump_show_xml_SOURCES = \
+   tools/i965_batchbuffer_dump_show_xml.cpp
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index 7d95bcdf3a..7180e4ba4b 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -2,3 +2,4 @@
 /aubinator_error_decode
 /i965_batchbuffer_logger_sh
 /i965_batchbuffer_dump_show
+/i965_batchbuffer_dump_show_xml
diff --git a/src/intel/tools/i965_batchbuffer_dump_show_xml.cpp 
b/src/intel/tools/i965_batchbuffer_dump_show_xml.cpp
new file mode 100644
index 00..64916897ea
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_dump_show_xml.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "tools/i965_batchbuffer_logger_output.h"
+
+static
+std::string
+xml_value_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::ostringstream str;
+   unsigned int i;
+   char c;
+
+   str << "";
+   return str.str();
+}
+
+static
+std::string
+xml_string_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::ostringstream str;
+   unsigned int i;
+   char c;
+
+   for(i = 0; i < len; ++i) {
+  if (std::fread(, sizeof(char), 1, pfile) != sizeof(char)) {
+ break;
+  }
+  switch(c) {
+  case '<':
+ str << "";
+ break;
+  case '>':
+ str << "";
+ break;
+  case '&':
+ str << "";
+ break;
+  case '\n':
+ str << "";
+ break;
+  case '\"':
+ str << "";
+ break;
+  default:
+ str << c;
+  }
+   }
+
+   return str.str();
+}
+
+static
+bool
+legal_tag_char(char c)
+{
+   switch(c)
+   {
+   case '-':
+   case '_':
+   case '.':
+  return true;
+   }
+   return !std::isspace(c) && std::isalnum(c);
+}
+
+static
+std::string
+xml_tag_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::string return_value;
+   unsigned int i;
+   char c;
+
+   for(i = 0; i < len; ++i) {
+  if (std::fread(, sizeof(char), 1, pfile) != sizeof(char)) {
+ break;
+  }
+  if (legal_tag_char(c)) {
+ if (i == 0 && (std::isdigit(c) || c == '.' || c == '-')) {
+return_value.push_back('_');
+ }
+ return_value.push_back(c);
+  } else {
+ return_value.push_back('_');
+  }
+   }
+
+   return return_value;
+}
+
+static
+void
+handle_block_begin(const struct i965_batchbuffer_logger_header *hdr,
+   std::vecto

[Mesa-dev] [PATCH 11/16] intel/tools/BatchbufferLogger: install i965_batchbuffer non-driver interface headers

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am   |  12 +++
 src/intel/tools/.gitignore|   1 +
 src/intel/tools/i965_batchbuffer_logger_sh.in | 108 ++
 3 files changed, 121 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger_sh.in

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 270463589d..f5eda31a5c 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -19,7 +19,9 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+intelbindir = $(bindir)
 intellibdir = $(libdir)
+intelincludedir = $(includedir)
 
 noinst_PROGRAMS += \
tools/aubinator \
@@ -28,6 +30,12 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
+intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
+CLEANFILES += $(intelbin_SCRIPTS)
+
+intelinclude_HEADERS = tools/i965_batchbuffer_logger_app.h \
+   tools/i965_batchbuffer_logger_output.h
+
 tools_aubinator_SOURCES = \
tools/aubinator.c \
tools/disasm.c \
@@ -86,3 +94,7 @@ tools_libi965_batchbuffer_logger_la_CXXFLAGS = \
 
 tools_libi965_batchbuffer_logger_la_LDFLAGS = \
-no-undefined -avoid-version -shared -shrext .so
+
+intel_sed_prefix_vars = sed -e 's,[@]libdir[@],$(intellibdir),g'
+tools/i965_batchbuffer_logger_sh: tools/i965_batchbuffer_logger_sh.in
+   $(intel_sed_prefix_vars) < tools/i965_batchbuffer_logger_sh.in > 
tools/i965_batchbuffer_logger_sh
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index 27437f9eef..fa9bf70808 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -1,2 +1,3 @@
 /aubinator
 /aubinator_error_decode
+/i965_batchbuffer_logger_sh
diff --git a/src/intel/tools/i965_batchbuffer_logger_sh.in 
b/src/intel/tools/i965_batchbuffer_logger_sh.in
new file mode 100644
index 00..0746173417
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_sh.in
@@ -0,0 +1,108 @@
+#!/bin/bash
+# -*- mode: sh -*-
+
+function show_help() {
+cat <https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/16] intel/tools/disasm: gen_disasm_disassemble to take const void* instead of void*

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Reviewed-by: Matt Turner 
Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/disasm.c | 6 +++---
 src/intel/tools/gen_disasm.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/intel/tools/disasm.c b/src/intel/tools/disasm.c
index 5091579522..ffcd48084f 100644
--- a/src/intel/tools/disasm.c
+++ b/src/intel/tools/disasm.c
@@ -44,14 +44,14 @@ is_send(uint32_t opcode)
 }
 
 static int
-gen_disasm_find_end(struct gen_disasm *disasm, void *assembly, int start)
+gen_disasm_find_end(struct gen_disasm *disasm, const void *assembly, int start)
 {
struct gen_device_info *devinfo = >devinfo;
int offset = start;
 
/* This loop exits when send-with-EOT or when opcode is 0 */
while (true) {
-  brw_inst *insn = assembly + offset;
+  const brw_inst *insn = assembly + offset;
 
   if (brw_inst_cmpt_control(devinfo, insn)) {
  offset += 8;
@@ -70,7 +70,7 @@ gen_disasm_find_end(struct gen_disasm *disasm, void 
*assembly, int start)
 }
 
 void
-gen_disasm_disassemble(struct gen_disasm *disasm, void *assembly,
+gen_disasm_disassemble(struct gen_disasm *disasm, const void *assembly,
int start, FILE *out)
 {
struct gen_device_info *devinfo = >devinfo;
diff --git a/src/intel/tools/gen_disasm.h b/src/intel/tools/gen_disasm.h
index d2764bb90b..8d82a58617 100644
--- a/src/intel/tools/gen_disasm.h
+++ b/src/intel/tools/gen_disasm.h
@@ -32,7 +32,7 @@ struct gen_disasm;
 
 struct gen_disasm *gen_disasm_create(int pciid);
 void gen_disasm_disassemble(struct gen_disasm *disasm,
-void *assembly, int start, FILE *out);
+const void *assembly, int start, FILE *out);
 
 void gen_disasm_destroy(struct gen_disasm *disasm);
 
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/16] intel/tools: BatchBufferLogger define output file format of tool

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Define the output format of the BatchbufferLogger. The output
is a sequence of blocks where blocks can have member blocks
or values. The top level blocks come from the application
calling into the BatchBufferLogger when an GL/GLES API call
is started and returns.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/i965_batchbuffer_logger_output.h | 66 
 1 file changed, 66 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger_output.h

diff --git a/src/intel/tools/i965_batchbuffer_logger_output.h 
b/src/intel/tools/i965_batchbuffer_logger_output.h
new file mode 100644
index 00..4e4c988d3c
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_output.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef I965_BATCHBUFFER_LOGGER_OUTPUT_H
+#define I965_BATCHBUFFER_LOGGER_OUTPUT_H
+
+#include 
+#include "i965_batchbuffer_logger_app.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * FILE format of the log is a seqence of (HEADER, NAME-DATA, VALUE-DATA)
+ * tuples where HEADER is the struct i965_batchbuffer_logger_header packed
+ * via fwrite, NAME-DATA is a sequence of HEADER.name_length characers and
+ * VALUE-DATA is a seqnece of HEADER.value_length characters. The start
+ * and end of blocks are used to give nested structure to the data (for
+ * example to make better JSON or XML output). Block endings will have that
+ * the name and value lengths are both ALWAYS 0. For other types, name should
+ * never be zero, but value can be (typically for blocks).
+ */
+
+struct i965_batchbuffer_logger_header {
+   enum i965_batchbuffer_logger_message_type_t type;
+
+   /**
+* length of the string for the name must be 0 for type
+* I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END
+*/
+   uint32_t name_length;
+
+   /**
+* length of the string for the value, must be 0 for types
+* I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN and
+* I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END
+*/
+   uint32_t value_length;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/16] intel/tools/disasm: add gen_disasm_assembly_length function

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

The length function is needed if one wishes to save GEN binary
shaders to file.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/disasm.c | 7 +++
 src/intel/tools/gen_disasm.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/intel/tools/disasm.c b/src/intel/tools/disasm.c
index ffcd48084f..9a136db4db 100644
--- a/src/intel/tools/disasm.c
+++ b/src/intel/tools/disasm.c
@@ -107,6 +107,13 @@ gen_disasm_disassemble(struct gen_disasm *disasm, const 
void *assembly,
ralloc_free(disasm_info);
 }
 
+int
+gen_disasm_assembly_length(struct gen_disasm *disasm,
+   const void *assembly, int start)
+{
+   return gen_disasm_find_end(disasm, assembly, start) - start;
+}
+
 struct gen_disasm *
 gen_disasm_create(int pciid)
 {
diff --git a/src/intel/tools/gen_disasm.h b/src/intel/tools/gen_disasm.h
index 8d82a58617..8b95a710e2 100644
--- a/src/intel/tools/gen_disasm.h
+++ b/src/intel/tools/gen_disasm.h
@@ -33,6 +33,8 @@ struct gen_disasm;
 struct gen_disasm *gen_disasm_create(int pciid);
 void gen_disasm_disassemble(struct gen_disasm *disasm,
 const void *assembly, int start, FILE *out);
+int gen_disasm_assembly_length(struct gen_disasm *disasm,
+   const void *assembly, int start);
 
 void gen_disasm_destroy(struct gen_disasm *disasm);
 
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/16] i965: Enable BatchbufferLogger in i965 driver

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

The interface for BatchbufferLogger is that it is active
only if it is LD_PRELOAD'ed. Thus, the i965 driver is to
use dlsym to see if it is there, and if so fetch the object
at intel_screen creation.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 22 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  8 -
 src/mesa/drivers/dri/i965/brw_context.c   | 37 +
 src/mesa/drivers/dri/i965/brw_context.h   | 12 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 30 +++--
 src/mesa/drivers/dri/i965/intel_screen.c  | 46 +--
 src/mesa/drivers/dri/i965/intel_screen.h  |  3 ++
 7 files changed, 150 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 52b5bf97a1..c5b1bfb83d 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -65,6 +65,8 @@
 #include "string.h"
 
 #include "i915_drm.h"
+#include "intel_screen.h"
+#include "tools/i965_batchbuffer_logger.h"
 
 #ifdef HAVE_VALGRIND
 #include 
@@ -104,6 +106,7 @@ struct bo_cache_bucket {
 };
 
 struct brw_bufmgr {
+   struct intel_screen *screen;
int fd;
 
mtx_t lock;
@@ -631,9 +634,14 @@ bo_unreference_final(struct brw_bo *bo, time_t time)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
struct bo_cache_bucket *bucket;
+   struct i965_batchbuffer_logger *bb_logger =
+  bufmgr->screen->batchbuffer_logger;
 
DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
 
+   if(bb_logger != NULL) {
+  bb_logger->clear_batchbuffer_log(bb_logger, bufmgr->fd, bo->gem_handle);
+   }
bucket = bucket_for_size(bufmgr, bo->size);
/* Put the buffer into our internal cache for reuse if we can. */
if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
@@ -1065,6 +1073,12 @@ brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
free(bufmgr);
 }
 
+int
+brw_bufmgr_fd(const struct brw_bufmgr *bufmgr)
+{
+   return bufmgr->fd;
+}
+
 static int
 bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
uint32_t stride)
@@ -1369,9 +1383,14 @@ gem_param(int fd, int name)
  * \param fd File descriptor of the opened DRM device.
  */
 struct brw_bufmgr *
-brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
+brw_bufmgr_init(struct intel_screen *screen)
 {
struct brw_bufmgr *bufmgr;
+   struct gen_device_info *devinfo;
+   int fd;
+
+   devinfo = >devinfo;
+   fd = screen->driScrnPriv->fd;
 
bufmgr = calloc(1, sizeof(*bufmgr));
if (bufmgr == NULL)
@@ -1387,6 +1406,7 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
 * fd so that its namespace does not clash with another.
 */
bufmgr->fd = fd;
+   bufmgr->screen = screen;
 
if (mtx_init(>lock, mtx_plain) != 0) {
   free(bufmgr);
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 0ae541cda0..b5d724be7b 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -46,6 +46,7 @@ extern "C" {
 
 struct gen_device_info;
 struct brw_context;
+struct intel_screen;
 
 struct brw_bo {
/**
@@ -274,6 +275,11 @@ void brw_bo_wait_rendering(struct brw_bo *bo);
  */
 void brw_bufmgr_destroy(struct brw_bufmgr *bufmgr);
 
+/**
+ * Returns the file descriptor of the buffer manager
+ */
+int brw_bufmgr_fd(const struct brw_bufmgr *bufmgr);
+
 /**
  * Get the current tiling (and resulting swizzling) mode for the bo.
  *
@@ -313,7 +319,7 @@ int brw_bo_busy(struct brw_bo *bo);
 int brw_bo_madvise(struct brw_bo *bo, int madv);
 
 /* drm_bacon_bufmgr_gem.c */
-struct brw_bufmgr *brw_bufmgr_init(struct gen_device_info *devinfo, int fd);
+struct brw_bufmgr *brw_bufmgr_init(struct intel_screen *screen);
 struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
const char *name,
unsigned int handle);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 126c187f62..b5e041bb7b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -75,6 +75,8 @@
 #include "util/debug.h"
 #include "isl/isl.h"
 
+#include "tools/i965_batchbuffer_logger.h"
+
 /***
  * Mesa's Driver Functions
  ***/
@@ -1075,6 +1077,7 @@ brwCreateContext(gl_api api,
 
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
+   brw->have_active_batchbuffer = true;
 
brw_disk_cache_init(brw);
 
@@ -1089,6 +1092,7 @@ intelDe

[Mesa-dev] [PATCH 04/16] i965: assign BindingTableEntryCount of INTERFACE_DESCRIPTOR_DATA

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 04a492539a..3643d79231 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4259,6 +4259,7 @@ genX(upload_cs_state)(struct brw_context *brw)
   .SamplerStatePointer = stage_state->sampler_offset,
   .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 
4),
   .BindingTablePointer = stage_state->bind_bo_offset,
+  .BindingTableEntryCount = prog_data->binding_table.size_bytes / 4,
   .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
   .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
   .SharedLocalMemorySize = encode_slm_size(GEN_GEN,
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/16] intel/tools: define BatchBufferLogger driver interface

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Define the driver interface for BatchbufferLogger. The
interface assumes that for any -thread- there is only
one batchbuffer to which commands are to be added. A
driver needs to provide the information on what is the
active batchbuffer on a calling thread and also a function
that given a batchbuffer, returns where in the batchbuffer
the driver is currently "at".

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/i965_batchbuffer_logger.h | 185 ++
 1 file changed, 185 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger.h

diff --git a/src/intel/tools/i965_batchbuffer_logger.h 
b/src/intel/tools/i965_batchbuffer_logger.h
new file mode 100644
index 00..d174af0e05
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef I965_BATCHBUFFER_LOGGER_DRIVER_H
+#define I965_BATCHBUFFER_LOGGER_DRIVER_H
+
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * i965_batchbuffer_logger tracks a batchbuffers
+ * by the pair (GEM-BO handle, File descriptor)
+ * pair.
+ */
+struct i965_logged_batchbuffer {
+   /**
+* GEM BO of the batch buffer, this is the BO
+* sent to kernel to execute commands on the
+* GPU
+*/
+   uint32_t gem_bo;
+
+   /**
+* The file descriptor of the GEM BO
+*/
+   int fd;
+
+   /**
+* Opaque pointer used by the driver associated
+* to the batch buffer; an i965_batchbuffer_logger
+* does NOT use this value to identify a batchbuffer.
+* It is for the driver to use to help it compute
+* where it is in a specified batchbuffer.
+*/
+   const void *driver_data;
+};
+
+/**
+ * A counter is to be used by a driver to provide counts of elements
+ * added to a batchbuffer when the counter is active. Actions acting
+ * on a counter are placed on the active batchbuffer of the calling
+ * thread, not executed immediately. The actual counting (in addition
+ * to activation, deactivation, reset, and relase) are performed when
+ * the batchbuffer is intercepted by the logger.
+ */
+struct i965_batchbuffer_counter {
+   void *opaque;
+};
+
+/**
+ * An i965_batchbuffer_logger object represents the hooking
+ * of a GPU driver.
+ */
+struct i965_batchbuffer_logger {
+   /**
+* To be called by the driver to instruct the batchbuffer logger
+* to clear the log associated to a GEM BO from an FD.
+*/
+   void (*clear_batchbuffer_log)(struct i965_batchbuffer_logger *logger,
+ int fd, uint32_t gem_bo);
+
+   /**
+* To be called by the driver if it migrates commands from one
+* batchbuffer to another batchbuffer.
+*/
+   void (*migrate_batchbuffer)(struct i965_batchbuffer_logger *logger,
+   const struct i965_logged_batchbuffer *from,
+   const struct i965_logged_batchbuffer *to);
+
+   /**
+* To be called by the driver to add log-message data to the
+* batchbuffer log. The message will be added to the log of
+* the batchbuffer dst. If counter is non-NULL the values
+* in the counter are also emitted to the log.
+*/
+   void (*add_message)(struct i965_batchbuffer_logger *logger,
+   const struct i965_logged_batchbuffer *dst,
+   const char *txt);
+
+   /**
+* call to release the i965_batchbuffer_logger
+*/
+   void (*release_driver)(struct i965_batchbuffer_logger *logger);
+
+   /**
+* Create a counter object. If filename is non-NULL, then
+* the values of the counter will be emitted to the named
+* file when the counter is deleted.
+*/
+   struct i965_batchbuffer_counter(*cre

[Mesa-dev] [PATCH 02/16] intel/tools: define BatchbufferLogger application interface

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Define the application interface to BatchbufferLogger. The
BatchbufferLogger needs from the application when a GL/GLES
API call is issues and returns. It will use this information
to correctly correlate batchbuffer additions to GL/GLES API
calls.

In addition, because the BatchbufferLogger will be intercepting
drmIoctl, an application does NOT link against it. Instead, one
needs to LD_PRELOAD the .so.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/i965_batchbuffer_logger_app.h | 186 ++
 1 file changed, 186 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger_app.h

diff --git a/src/intel/tools/i965_batchbuffer_logger_app.h 
b/src/intel/tools/i965_batchbuffer_logger_app.h
new file mode 100644
index 00..6f131646e1
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_app.h
@@ -0,1 +1,186 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef I965_BATCHBUFFER_LOGGER_APP_H
+#define I965_BATCHBUFFER_LOGGER_APP_H
+
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration value specifying the kind of data to write;
+ * Data written to a a i965_batchbuffer_logger_session
+ * can mark the start of a block, then end of a block
+ * of item within a block. Eac type has an optional
+ * name and value strings.
+ */
+enum i965_batchbuffer_logger_message_type_t {
+   I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN, /* start of a block */
+   I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END,   /* end of a block */
+   I965_BATCHBUFFER_LOGGER_MESSAGE_VALUE,   /* value in block */
+};
+
+/**
+ * An i965_batchbuffer_logger_session_params represents how
+ * to emit batchbuffer logger data. The batchbuffer logger
+ * is controlled by the following environmental variables:
+ * - I965_DECODE_BEFORE_IOCTL if non-zero, emit batchbuffer log data
+ *BEFORE calling the kernel ioctl.
+ * - I965_EMIT_TOTAL_STATS gives a filename to which to emit the total
+ * counts and lengths of GPU commands emitted
+ *  - I965_PCI_ID pci_id Give a hexadecimal value of the PCI ID value for
+ *   the GPU the BatchbufferLogger to decode for; this
+ *   value is used if and only if the driver fails to
+ *   tell the BatchbufferLogger a valid PCI ID value to
+ *   use
+ *  - I965_DECODE_LEVEL controls the level of batchbuffer decoding
+ * - no_decode do not decode batchbuffer at all
+ * - instruction_decode decode instruction name only
+ * - instruction_details_decode decode instruction contents
+ *  - I965_PRINT_RELOC_LEVEL controls at what level to print reloc data
+ * - print_reloc_nothing do not print reloc data
+ * - print_reloc_gem_gpu_updates print reloc data GEM by GEM
+ *  - I965_DECODE_SHADERS if set and is 0, shader binaries are written to
+ *file;  otherwise their disassembly is emitted
+ *in each session
+ */
+struct i965_batchbuffer_logger_session_params {
+   /**
+* Client data opaque pointer passed back to
+* function callbacks.
+*/
+   void *client_data;
+
+   /**
+* Function called by i965_batchbuffer_logger_app to write
+* data for the sessions.
+* \param client_data the pointer value in
+*i965_batchbuffer_logger_session_params::client_data
+* \param tp the message type
+* \param name of the data
+* \param name_length length of the name data
+* \param value of the data
+* \param value_length length of the value data
+*/
+   void (*write)(void *client_data,
+ enum i965_batchbuffer_logger_message_type_t tp,
+ 

[Mesa-dev] [PATCH 00/16] Batchbuffer Logger for Intel GPU (v3)

2017-12-11 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series defines and implements a BatchbufferLogger
for Intel GEN. The main purpose of the BatchbufferLogger is
to strongly correlate API calls to data added to a batchbuffer.
In addition to this function, the BatchbufferLogger also tracks
GPU state (respecting HW context as well). The logger intercepts
drmIoctl recording the information needed to decode a bachbuffer
(such as GEM BO creation/deletion, HW context create/delete,
and most importantly execbuffer2). When the execbuffer2 returns
from the kernel, the BatchbufferLogger will log information
in its log of what was added when and in addition log the
GPU state (at the point in the batchbuffer) of 3DPRIMITIVE and
GPGPU_WALKER commands.

It is an application's requirment to tell the BatchbufferLogger
just before and just after an API call. Because of the need
to intercept drmIoctl, having an application link against
BatchbufferLogger is not robust. Instead, an application is
to use dlsym to fetch the correction a function pointer that
returns the BatchbufferLogger's application interface. The
interface of the BatchbufferLogger is defined in patch 0002.
A script is also provided to use the BatchbufferLogger in
an easier way than needing to set environmental variables.

On the subject of application integration, I have a -very-
small patch series that enabled BatchbufferLogger for
apitrace. I can share these patches if anyone asks, but I
cannot submit them to apitrace until atleast the BatchbufferLogger
is in Mesa with a stable application interface.

The log emitted by the BatchbufferLogger is a sequence of blocks
with possibility of blocks being elements of blocks. There is an
environmental variable to control if the top level blocks are
API call markers created from the calls into the BatchbufferLogger
from the application OR the actual ioctl intercepted with the
next level being the API call markers. An application starts
and ends a logging session. A session provides callbacks to
write the block data; with the interface of a callback a log
does not need to just write to a file, it could also write
over a network to remote application for the purpose of remote
debugging. A specific interface is also provided to write to a
file and additional tools are included to convert the log to JSON,
XML and text. The simple file format should allow others to be able
to take the data and use it however they see fit. The JSON output
alone can be quite illuminating to use when debugging/enhancing the
i965 driver for a single frame (or even single draw call) of a troublesome
application.

It is worth noting that i965 already has a batchbuffer decoder
dumper (also making use src/intel/tools/gen_decoder). However,
BatchbufferLogger has the following features which are not in the
current dumper and are likely quite tricky to implement into it.

 1. The biggest feature is that this BatchbufferLogger contents are
annotated by the GL/GLES API calls and driver text as well.
 2. This BatchbufferLogger provides an interface to application space
to allow for applications to start/end logging sessions as
they see fit. In addition, an application can have multilpe
independent sessions active.
 3. This BatchbufferLogger repeats the state of the GPU and HW
context on 3DPRIMITIVE and GPGPU_WALKER commands. By doing
so one can see the precise state the GPU is in at that command.
Coupled with (1), one can see what precise GPU state one
has when an application issues draw commands. Moreover, coupled
with the (previously) posted patches for apitrace, one
can compare differences at specific trace points within a
trace from apitrace to help diagnose regressions.

In addition, as suggested by Chris Wilson, I have also implemented an
i965 blackbox recorder at https://github.com/krogueintel/i965-blackbox
which will perform the necessary GL/GLES API interception to
produce annotated batchbuffer recordings to assist in GPU hang
debugging. The BatchbufferLogger (again at Chris Wilson's suggestion)
has the ability to walk batchbuffers BEFORE kernel submit and to
also add an EXEC_OBJECT_CAPTURE execobject2 whose contents are
the ID of the execbuffer2 ioctl submitted. The aim is to greatly
assist in GPU hang debugging for troublesome applications.

For those interested, this BatchbufferLogger is available on
github at https://github.com/krogueintel/asem/tree/batchbuffer-logger-v3.

The patch series is organized into the following blocks:

0001-0003: Define the BatchbufferLogger interfaces
0004-0004: Minor fix to i965 driver
0005-0005: Hooking of BatchbufferLogger into i965
0006-0009: Fixes and enhancements to intel/compiler,
   intel/tools and intel/common.
0010-0011: Implementation of BatchBufferLogger
0012-0014: Tools to decode log to JSON, XML and text
0015-0015: Command line tool for disassembling shader
   binaries.
0016-0016: Meson build support

v3:
  As requested b

[Mesa-dev] [PATCH 3/3] i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for each bo used in batchbuffer is correct

2017-12-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 91a6506..549ea3e 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -758,6 +758,7 @@ execbuffer(int fd,
};
 
unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
+   bool detected_out_of_bounds_write = false;
 
if (in_fence != -1) {
   execbuf.rsvd2 = in_fence;
@@ -787,6 +788,20 @@ execbuffer(int fd,
  batch->validation_list[i].offset);
  bo->gtt_offset = batch->validation_list[i].offset;
   }
+
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ if (!brw_bo_padding_is_good(bo)) {
+detected_out_of_bounds_write = true;
+fprintf(stderr,
+"Detected buffer out-of-bounds write from brw_bo %p "
+"(GEM %u, label = \"%s\")\n",
+bo, bo->gem_handle, bo->name);
+ }
+  }
+   }
+
+   if (unlikely(detected_out_of_bounds_write)) {
+  abort();
}
 
if (ret == 0 && out_fence != NULL)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] intel/common:add debug flag for adding and checking padding on BO's

2017-12-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_debug.c | 1 +
 src/intel/common/gen_debug.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f..2154b23 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
{ "nohiz",   DEBUG_NO_HIZ },
{ "color",   DEBUG_COLOR },
{ "reemit",  DEBUG_REEMIT },
+   { "check_oob",   DEBUG_OUT_OF_BOUND_CHK },
{ NULL,0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index da5b5a5..92fc68b 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ  (1ull << 39)
 #define DEBUG_COLOR   (1ull << 40)
 #define DEBUG_REEMIT  (1ull << 41)
+#define DEBUG_OUT_OF_BOUND_CHK(1ull << 42)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965: add noise padding to buffer object and function to check if noise is correct

2017-12-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 68 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h | 12 ++
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 52b5bf9..7167165 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -367,7 +367,14 @@ retry:
   bo->size = bo_size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  bo->padding.size = 0;
+  bo->padding.value = NULL;
+  bo->padding.tmp = NULL;
+  if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+ bo->padding.size = getpagesize();
+  }
+
+  struct drm_i915_gem_create create = { .size = bo_size + bo->padding.size 
};
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -378,6 +385,31 @@ retry:
  goto err;
   }
 
+  if (unlikely(bo->padding.size > 0)) {
+ struct drm_i915_gem_pwrite pwrite;
+
+ bo->padding.value = calloc(bo->padding.size, 1);
+ bo->padding.tmp = calloc(bo->padding.size, 1);
+ if (!bo->padding.value || !bo->padding.tmp) {
+goto err_free;
+ }
+
+ for (uint32_t i = 0; i < bo->padding.size; ++i) {
+bo->padding.value[i] = rand() & 0xFF;
+ }
+
+ pwrite.handle = create.handle;
+ pwrite.pad = 0;
+ pwrite.offset = bo_size;
+ pwrite.size = bo->padding.size;
+ pwrite.data_ptr = (__u64) (uintptr_t) bo->padding.value;
+ ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, );
+
+ if (ret != 0) {
+goto err_free;
+ }
+  }
+
   bo->gem_handle = create.handle;
 
   bo->bufmgr = bufmgr;
@@ -424,6 +456,26 @@ err:
return NULL;
 }
 
+bool
+brw_bo_padding_is_good(struct brw_bo *bo)
+{
+   if (bo->padding.size > 0) {
+  struct drm_i915_gem_pread pread;
+  int ret;
+
+  assert(bo->padding.tmp && bo->padding.value);
+  pread.handle = bo->gem_handle;
+  pread.pad = 0;
+  pread.offset = bo->size;
+  pread.size = bo->padding.size;
+  pread.data_ptr = (__u64) (uintptr_t) bo->padding.tmp;
+  ret = drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_PREAD, );
+  assert(ret == 0);
+  return memcmp(bo->padding.tmp, bo->padding.value, bo->padding.size) == 0;
+   }
+   return true;
+}
+
 struct brw_bo *
 brw_bo_alloc(struct brw_bufmgr *bufmgr,
  const char *name, uint64_t size, uint64_t alignment)
@@ -598,6 +650,17 @@ bo_free(struct brw_bo *bo)
   DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
   bo->gem_handle, bo->name, strerror(errno));
}
+
+   if (unlikely(INTEL_DEBUG & DEBUG_OUT_OF_BOUND_CHK)) {
+  if (bo->padding.value) {
+ free(bo->padding.value);
+  }
+
+  if (bo->padding.tmp) {
+ free(bo->padding.tmp);
+  }
+   }
+
free(bo);
 }
 
@@ -1156,6 +1219,9 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, 
int prime_fd)
bo->name = "prime";
bo->reusable = false;
bo->external = true;
+   bo->padding.size = 0;
+   bo->padding.value = NULL;
+   bo->padding.tmp = NULL;
 
struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, _tiling))
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 0ae541c..4fff866 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -165,6 +165,16 @@ struct brw_bo {
 * Boolean of whether this buffer is cache coherent
 */
bool cache_coherent;
+
+   /**
+* pointer and size to memory holding values for padding for the
+* purpose of checking out-of-bound writes.
+*/
+   struct {
+  uint32_t size;
+  uint8_t *value;
+  uint8_t *tmp;
+   } padding;
 };
 
 #define BO_ALLOC_BUSY   (1<<0)
@@ -342,6 +352,8 @@ uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
 int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
  uint64_t *result);
 
+bool brw_bo_padding_is_good(struct brw_bo *bo);
+
 /** @{ */
 
 #if defined(__cplusplus)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] GEM BO padding to find OOB buffer writes

2017-12-08 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series adds a new debug option to pad each GEM BO
allocated by the brw_bufmgr with random noise values which
are then checked after each batchbuffer dispatch to the kernel.
This can be quite valuable to find diffucult to track down
heisenberg style bugs.

A possible follow-up series would be to write to stderr (or
another logging mechanism) if the OOB write is to a GEM BO that
backs a GL buffer object; that features would be quite useful for
application developers.

Kevin Rogovin (3):
  intel/common:add debug flag for adding and checking padding on BO's
  i965: add noise padding to buffer object and function to check if
noise is correct
  i965: if DEBUG_OUT_OF_BOUND_CHK is up, check that noise padding for
each bo used in batchbuffer is correct

 src/intel/common/gen_debug.c  |  1 +
 src/intel/common/gen_debug.h  |  1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 68 ++-
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 12 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 ++
 5 files changed, 96 insertions(+), 1 deletion(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] i965: set ASTC5x5 workaround texture type tracking on texture validate

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

One of the presteps in each draw (and compute) call is to validate
the textures. This is the perfect place (since all texture units
are looped through) to see if ASTC5x5 and/or textures with an
auxilary surface are accessed by the GPU.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/intel_tex_validate.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c 
b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 2b7798c940..812c0c7793 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -188,11 +188,24 @@ brw_validate_textures(struct brw_context *brw)
struct gl_context *ctx = >ctx;
const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
 
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
for (int unit = 0; unit <= max_enabled_unit; unit++) {
   struct gl_texture_unit *tex_unit = >Texture.Unit[unit];
 
   if (tex_unit->_Current) {
+ struct intel_texture_object *tex =
+intel_texture_object(tex_unit->_Current);
+ struct intel_mipmap_tree *mt = tex->mt;
+
  intel_finalize_mipmap_tree(brw, unit);
+ if (mt && mt->aux_usage != ISL_AUX_USAGE_NONE) {
+brw->astc5x5_wa.texture_with_auxilary_present = true;
+ }
+ if (tex->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ tex->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) {
+brw->astc5x5_wa.texture_astc5x5_present = true;
+ }
   }
}
 }
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] i965: define astc5x5 workaround infrastructure

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Some GEN's have a bug in the sample where if the sampler accesses
a texture with an auxialry surface and an ASTC5x5 texture without
having the texture cache invalidated between such accesses, then
the GPU will hang. This patch defines the infrastructure to
implement the needed workaround for such hardware.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.c   | 63 +++
 src/mesa/drivers/dri/i965/brw_context.h   | 23 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  1 +
 3 files changed, 87 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index dd55b43669..f2e9b9779a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1060,6 +1060,12 @@ brwCreateContext(gl_api api,
if (ctx->Extensions.INTEL_performance_query)
   brw_init_performance_queries(brw);
 
+   brw->astc5x5_wa.required = (devinfo->gen == 9);
+   brw->astc5x5_wa.mode = BRW_ASTC5x5_WA_MODE_NONE;
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
+   brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
 
@@ -1134,6 +1140,63 @@ intelDestroyContext(__DRIcontext * driContextPriv)
driContextPriv->driverPrivate = NULL;
 }
 
+void
+brw_set_astc5x5_wa_mode(struct brw_context *brw,
+enum brw_astc5x5_wa_mode_t mode)
+{
+   if (!brw->astc5x5_wa.required ||
+   mode == BRW_ASTC5x5_WA_MODE_NONE ||
+   brw->astc5x5_wa.mode == mode) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.mode != BRW_ASTC5x5_WA_MODE_NONE) {
+  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+
+   brw->astc5x5_wa.mode = mode;
+}
+
+static void
+resolve_to_disable_aux_on_samplers(struct brw_context *brw)
+{
+   struct gl_context *ctx = >ctx;
+   const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
+
+   for (int unit = 0; unit <= max_enabled_unit; unit++) {
+  struct gl_texture_unit *tex_unit = >Texture.Unit[unit];
+  struct gl_texture_object *tex_obj = tex_unit->_Current;
+  if (tex_obj) {
+ struct intel_mipmap_tree *mt = intel_texture_object(tex_obj)->mt;
+ if (mt && mt->aux_usage != ISL_AUX_USAGE_NONE) {
+intel_miptree_prepare_access(brw, mt,
+ 0, INTEL_REMAINING_LEVELS,
+ 0, INTEL_REMAINING_LAYERS,
+ ISL_AUX_USAGE_NONE, false);
+ }
+  }
+   }
+}
+
+void
+brw_astc5x5_perform_wa(struct brw_context *brw)
+{
+   if (!brw->astc5x5_wa.required) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.texture_astc5x5_present) {
+  if (brw->astc5x5_wa.texture_with_auxilary_present) {
+ /* resolve so that auxilary buffers are not needed
+  * by any sampler */
+ resolve_to_disable_aux_on_samplers(brw);
+  }
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else if (brw->astc5x5_wa.texture_with_auxilary_present) {
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
+}
+
 GLboolean
 intelUnbindContext(__DRIcontext * driContextPriv)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index b3d7c6baf8..37dfe45592 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -165,6 +165,12 @@ enum brw_cache_id {
BRW_MAX_CACHE
 };
 
+enum brw_astc5x5_wa_mode_t {
+   BRW_ASTC5x5_WA_MODE_NONE,
+   BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5,
+   BRW_ASTC5x5_WA_MODE_HAS_AUX,
+};
+
 enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1230,6 +1236,18 @@ struct brw_context
 */
bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
 
+   /* Certain GEN's have a hardware bug where the sampler hangs if it attempts
+* to access auxilary buffers and an ASTC5x5 compressed buffer. The 
workaround
+* is to invalidate the texture cache between such access.
+*/
+   struct {
+  bool required;
+  enum brw_astc5x5_wa_mode_t mode;
+  bool texture_astc5x5_present;
+  bool texture_with_auxilary_present;
+  bool blorp_sampling_from_astc5x5;
+   } astc5x5_wa;
+
__DRIcontext *driContext;
struct intel_screen *screen;
 };
@@ -1663,6 +1681,11 @@ void brw_query_internal_format(struct gl_context *ctx, 
GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
 
+/* brw_context::astc5x5_wa */
+void brw_set_astc5x5_wa_mode(struct brw_context *brw,
+  

[Mesa-dev] [PATCH 4/5] i965: use ASTC5x5 workaround in brw_draw

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Perform the ASTC5x5 workaround tasks for drawing; note that
the function does not do anything and immediately returns
if the bug is not present on the hardware.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c | 6 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 5 +
 2 files changed, 11 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e..f335c2bd64 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -684,6 +684,12 @@ brw_prepare_drawing(struct gl_context *ctx,
brw_predraw_resolve_inputs(brw, true);
brw_predraw_resolve_framebuffer(brw);
 
+   /* if necessary, perform astc5x5 workarounds to make sure any sampler does
+* not sample sample from a surface using an auxilary buffer within the
+* same batch of sampling from a surface with an ASTC5x5 format
+*/
+   brw_astc5x5_perform_wa(brw);
+
/* Bind all inputs, derive varying and size information:
 */
brw_merge_inputs(brw, arrays);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index adf60a840b..ccdb537227 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -447,6 +447,11 @@ brw_aux_surface_disabled(const struct brw_context *brw,
 {
const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 
+   if (brw->astc5x5_wa.required &&
+   brw->astc5x5_wa.texture_astc5x5_present) {
+  return true;
+   }
+
for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
   const struct intel_renderbuffer *irb =
  intel_renderbuffer(fb->_ColorDrawBuffers[i]);
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965: use ASTC5x5 workaround in brw_compute

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Perform the ASTC5x5 workaround tasks for compute; note that
the function does not do anything and immediately returns
if the bug is not present on the hardware.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_compute.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 9be7523bab..e338539dc1 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -179,6 +179,12 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
brw_predraw_resolve_inputs(brw, false);
 
+   /* if necessary, perform astc5x5 workarounds to make sure any sampler does
+* not sample sample from a surface using an auxilary buffer within the
+* same batch of sampling from a surface with an ASTC5x5 format
+*/
+   brw_astc5x5_perform_wa(brw);
+
/* Flush the batch if the batch/state buffers are nearly full.  We can
 * grow them if needed, but this is not free, so we'd like to avoid it.
 */
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series implements a needed workaround for Gen9 for ASTC5x5
sampler reads. The crux of the work around is to make sure that the
sampler does not read an ASTC5x5 texture and a surface with an auxilary
buffer without having a texture cache invalidate between such accesses.


Kevin Rogovin (5):
  i965: define astc5x5 workaround infrastructure
  i965: ASTC5x5 workaround logic for blorp
  i965: set ASTC5x5 workaround texture type tracking on texture validate
  i965: use ASTC5x5 workaround in brw_draw
  i965: use ASTC5x5 workaround in brw_compute

 src/mesa/drivers/dri/i965/brw_compute.c  |  6 +++
 src/mesa/drivers/dri/i965/brw_context.c  | 63 
 src/mesa/drivers/dri/i965/brw_context.h  | 23 +
 src/mesa/drivers/dri/i965/brw_draw.c |  6 +++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  5 ++
 src/mesa/drivers/dri/i965/genX_blorp_exec.c  |  5 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c|  1 +
 src/mesa/drivers/dri/i965/intel_tex_image.c  | 16 --
 src/mesa/drivers/dri/i965/intel_tex_validate.c   | 13 +
 9 files changed, 134 insertions(+), 4 deletions(-)

-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965: ASTC5x5 workaround logic for blorp

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Blorp will only read from an ASTC5x5 texture if it copies from
such a surface, that can only if an application is fetching
such pixels. Because an ASTC5x3 texture can never be a render
target, we do not need to worry about blorp reading such surfaces
on framebuffer blits, or any other copying from a framebuffer.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  5 +
 src/mesa/drivers/dri/i965/intel_tex_image.c | 16 
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 87e90fde91..73f72d2603 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -230,6 +230,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
struct gl_context *ctx = >ctx;
bool check_aperture_failed_once = false;
 
+   if (brw->astc5x5_wa.blorp_sampling_from_astc5x5) {
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else {
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
/* Flush the sampler and render caches.  We definitely need to flush the
 * sampler cache so that we get updated contents from the render cache for
 * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 37c8e24f03..60028bb67a 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -759,10 +759,18 @@ intel_get_tex_sub_image(struct gl_context *ctx,
DBG("%s\n", __func__);
 
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-  if (intel_gettexsubimage_blorp(brw, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth, format, type,
- pixels, >Pack))
+  bool blorp_success;
+
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 =
+ (texImage->TexFormat == MESA_FORMAT_RGBA_ASTC_5x5 ||
+  texImage->TexFormat == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5);
+  blorp_success = intel_gettexsubimage_blorp(brw, texImage,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels,
+ >Pack);
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+  if (blorp_success)
  return;
 
   perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/18] intel/tools/BatchbufferLogger (output-xml): add outputter to XML

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am|   6 +-
 src/intel/tools/.gitignore |   1 +
 src/intel/tools/i965_batchbuffer_dump_show_xml.cpp | 217 +
 3 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/tools/i965_batchbuffer_dump_show_xml.cpp

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index d1a87e6991..32b30edc4e 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -30,7 +30,8 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
-intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show
+intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show \
+   tools/i965_batchbuffer_dump_show_xml
 
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
@@ -103,3 +104,6 @@ tools/i965_batchbuffer_logger_sh: 
tools/i965_batchbuffer_logger_sh.in
 
 tools_i965_batchbuffer_dump_show_SOURCES = \
tools/i965_batchbuffer_dump_show.c
+
+tools_i965_batchbuffer_dump_show_xml_SOURCES = \
+   tools/i965_batchbuffer_dump_show_xml.cpp
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index 7d95bcdf3a..7180e4ba4b 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -2,3 +2,4 @@
 /aubinator_error_decode
 /i965_batchbuffer_logger_sh
 /i965_batchbuffer_dump_show
+/i965_batchbuffer_dump_show_xml
diff --git a/src/intel/tools/i965_batchbuffer_dump_show_xml.cpp 
b/src/intel/tools/i965_batchbuffer_dump_show_xml.cpp
new file mode 100644
index 00..64916897ea
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_dump_show_xml.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "tools/i965_batchbuffer_logger_output.h"
+
+static
+std::string
+xml_value_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::ostringstream str;
+   unsigned int i;
+   char c;
+
+   str << "";
+   return str.str();
+}
+
+static
+std::string
+xml_string_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::ostringstream str;
+   unsigned int i;
+   char c;
+
+   for(i = 0; i < len; ++i) {
+  if (std::fread(, sizeof(char), 1, pfile) != sizeof(char)) {
+ break;
+  }
+  switch(c) {
+  case '<':
+ str << "";
+ break;
+  case '>':
+ str << "";
+ break;
+  case '&':
+ str << "";
+ break;
+  case '\n':
+ str << "";
+ break;
+  case '\"':
+ str << "";
+ break;
+  default:
+ str << c;
+  }
+   }
+
+   return str.str();
+}
+
+static
+bool
+legal_tag_char(char c)
+{
+   switch(c)
+   {
+   case '-':
+   case '_':
+   case '.':
+  return true;
+   }
+   return !std::isspace(c) && std::isalnum(c);
+}
+
+static
+std::string
+xml_tag_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::string return_value;
+   unsigned int i;
+   char c;
+
+   for(i = 0; i < len; ++i) {
+  if (std::fread(, sizeof(char), 1, pfile) != sizeof(char)) {
+ break;
+  }
+  if (legal_tag_char(c)) {
+ if (i == 0 && (std::isdigit(c) || c == '.' || c == '-')) {
+return_value.push_back('_');
+ }
+ return_value.push_back(c);
+  } else {
+ return_value.push_back('_');
+  }
+   }
+
+   return return_value;
+}
+
+static
+void
+handle_block_begin(const struct i965_batchbuffer_logger_header *hdr,
+   std::vecto

[Mesa-dev] [PATCH 18/18] intel/tools: add command line GEN shader disassembler tool

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am   |  21 ++-
 src/intel/tools/.gitignore|   1 +
 src/intel/tools/gen_shader_disassembler.c | 221 ++
 3 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/tools/gen_shader_disassembler.c

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index dd68d8f173..71eb3253c3 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -32,7 +32,8 @@ intellib_LTLIBRARIES = \
 
 intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show \
tools/i965_batchbuffer_dump_show_xml \
-   tools/i965_batchbuffer_dump_show_json
+   tools/i965_batchbuffer_dump_show_json \
+   tools/gen_shader_disassembler
 
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
@@ -111,3 +112,21 @@ tools_i965_batchbuffer_dump_show_xml_SOURCES = \
 
 tools_i965_batchbuffer_dump_show_json_SOURCES = \
tools/i965_batchbuffer_dump_show_json.cpp
+
+tools_gen_shader_disassembler_SOURCES = \
+   tools/gen_shader_disassembler.c \
+   tools/disasm.c \
+   tools/gen_disasm.h
+
+tools_gen_shader_disassembler_LDADD = \
+   common/libintel_common.la \
+   compiler/libintel_compiler.la \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(PTHREAD_LIBS) \
+   $(EXPAT_LIBS) \
+   $(ZLIB_LIBS)
+
+tools_gen_shader_disassembler_CFLAGS = \
+   $(AM_CFLAGS) \
+   $(EXPAT_CFLAGS) \
+   $(ZLIB_CFLAGS)
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index ea4dc23c20..e9b22c89aa 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -4,3 +4,4 @@
 /i965_batchbuffer_dump_show
 /i965_batchbuffer_dump_show_xml
 /i965_batchbuffer_dump_show_json
+/gen_shader_disassembler
diff --git a/src/intel/tools/gen_shader_disassembler.c 
b/src/intel/tools/gen_shader_disassembler.c
new file mode 100644
index 00..bd6c400fcc
--- /dev/null
+++ b/src/intel/tools/gen_shader_disassembler.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "compiler/brw_inst.h"
+#include "compiler/brw_eu.h"
+
+static
+void
+print_opcodes(const void *data, int data_sz,
+  struct gen_device_info *devinfo,
+  bool print_offsets)
+{
+   for (int offset = 0; offset < data_sz;) {
+  const brw_inst *insn = data + offset;
+  bool compacted;
+  brw_inst uncompacted;
+  enum opcode opcode;
+  const struct opcode_desc *desc;
+
+  if (print_offsets) {
+ printf("0x%08x: ", offset);
+  }
+
+  compacted = brw_inst_cmpt_control(devinfo, insn);
+  if (compacted) {
+ brw_compact_inst *compacted_insn;
+ compacted_insn = (void*)insn;
+ brw_uncompact_instruction(devinfo, , compacted_insn);
+ insn = 
+ offset += 8;
+  } else {
+ offset += 16;
+  }
+
+  opcode = brw_inst_opcode(devinfo, insn);
+  desc = brw_opcode_desc(devinfo, opcode);
+  if (desc) {
+ printf("(0x%08x) %s", opcode, desc->name);
+  } else {
+ printf("(0x%08x) UnknownOpcode", opcode);
+  }
+
+  if (compacted) {
+ printf(" {Compacted}");
+  }
+
+  printf("\n");
+   }
+}
+
+static
+void
+print_disassembly(const void *data, int data_sz,
+  struct gen_device_info *devinfo,
+  bool print_offsets)
+{
+   struct annotation_info annotation_info = {
+  .ann_count = 1,
+  .ann_size = 2,
+   };
+   annotation_info.mem_ctx = ralloc_context(NULL);
+  

[Mesa-dev] [PATCH 17/18] intel/tools/BatchbufferLogger (output-json): add json outputter

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am|   6 +-
 src/intel/tools/.gitignore |   1 +
 .../tools/i965_batchbuffer_dump_show_json.cpp  | 253 +
 3 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/tools/i965_batchbuffer_dump_show_json.cpp

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 32b30edc4e..dd68d8f173 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -31,7 +31,8 @@ intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
 intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show \
-   tools/i965_batchbuffer_dump_show_xml
+   tools/i965_batchbuffer_dump_show_xml \
+   tools/i965_batchbuffer_dump_show_json
 
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
@@ -107,3 +108,6 @@ tools_i965_batchbuffer_dump_show_SOURCES = \
 
 tools_i965_batchbuffer_dump_show_xml_SOURCES = \
tools/i965_batchbuffer_dump_show_xml.cpp
+
+tools_i965_batchbuffer_dump_show_json_SOURCES = \
+   tools/i965_batchbuffer_dump_show_json.cpp
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index 7180e4ba4b..ea4dc23c20 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -3,3 +3,4 @@
 /i965_batchbuffer_logger_sh
 /i965_batchbuffer_dump_show
 /i965_batchbuffer_dump_show_xml
+/i965_batchbuffer_dump_show_json
diff --git a/src/intel/tools/i965_batchbuffer_dump_show_json.cpp 
b/src/intel/tools/i965_batchbuffer_dump_show_json.cpp
new file mode 100644
index 00..bf48396424
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_dump_show_json.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "tools/i965_batchbuffer_logger_output.h"
+
+static
+std::string
+json_name_from_file(unsigned int len, std::FILE *pfile)
+{
+   std::string return_value;
+   unsigned int i;
+   char c;
+
+   for(i = 0; i < len; ++i) {
+  if (std::fread(, sizeof(char), 1, pfile) != sizeof(char)) {
+ break;
+  }
+  if(c != '\n') {
+ return_value.push_back(c);
+  }
+   }
+
+   return return_value;
+}
+
+static
+std::string
+json_block_name_from_file(const struct i965_batchbuffer_logger_header *hdr,
+  std::FILE *pfile)
+{
+   std::ostringstream return_value;
+
+   return_value << json_name_from_file(hdr->name_length, pfile);
+   if (hdr->value_length > 0) {
+  return_value << ":" << json_name_from_file(hdr->value_length, pfile);
+   }
+
+   return return_value.str();
+}
+
+static
+void
+json_print_value_line(std::vector::const_iterator begin,
+  std::vector::const_iterator end)
+{
+   for(; begin != end; ++begin) {
+  switch(*begin) {
+  case '\t':
+ std::cout << "\\t";
+ break;
+  case '\"':
+ std::cout << "\\\"";
+ break;
+  case '\\':
+ /* This is silly but required; apirtace will place control
+  * character codes (sometimes) within string values (typically
+  * from shader sources). So for example if a shader source
+  * has (within a comment) something like "Famous"
+  * then the detailed function value will have then \"Famour\"
+  * within its string value. The below silly block of code
+  * just checks if there is a non-white character and if so,
+  * just assume that the \ is a control code.
+  */
+ if (begin !=

[Mesa-dev] [PATCH 13/18] intel/tools/BatchbufferLogger: install i965_batchbuffer non-driver interface headers

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 9919b5f241..c308b816f9 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -20,6 +20,7 @@
 # IN THE SOFTWARE.
 
 intellibdir = $(libdir)
+intelincludedir = $(includedir)
 
 noinst_PROGRAMS += \
tools/aubinator \
@@ -28,6 +29,9 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
+intelinclude_HEADERS = tools/i965_batchbuffer_logger_app.h \
+   tools/i965_batchbuffer_logger_output.h
+
 tools_aubinator_SOURCES = \
tools/aubinator.c \
tools/disasm.c \
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/18] intel/tools/BatchbufferLogger (txt-output): example txt dumper

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am  |   5 ++
 src/intel/tools/.gitignore   |   1 +
 src/intel/tools/i965_batchbuffer_dump_show.c | 129 +++
 3 files changed, 135 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_dump_show.c

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 062c225c89..d1a87e6991 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -30,6 +30,8 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
+intelbin_PROGRAMS = tools/i965_batchbuffer_dump_show
+
 intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
 CLEANFILES += $(intelbin_SCRIPTS)
 
@@ -98,3 +100,6 @@ tools_libi965_batchbuffer_logger_la_LDFLAGS = \
 intel_sed_prefix_vars = sed -e 's,[@]libdir[@],$(intellibdir),g'
 tools/i965_batchbuffer_logger_sh: tools/i965_batchbuffer_logger_sh.in
$(intel_sed_prefix_vars) < tools/i965_batchbuffer_logger_sh.in > 
tools/i965_batchbuffer_logger_sh
+
+tools_i965_batchbuffer_dump_show_SOURCES = \
+   tools/i965_batchbuffer_dump_show.c
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index fa9bf70808..7d95bcdf3a 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -1,3 +1,4 @@
 /aubinator
 /aubinator_error_decode
 /i965_batchbuffer_logger_sh
+/i965_batchbuffer_dump_show
diff --git a/src/intel/tools/i965_batchbuffer_dump_show.c 
b/src/intel/tools/i965_batchbuffer_dump_show.c
new file mode 100644
index 00..0e8c372e63
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_dump_show.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "tools/i965_batchbuffer_logger_output.h"
+
+static
+void
+print_tabs(int l)
+{
+   for(int i = 0; i < l; ++i) {
+  printf("\t");
+   }
+}
+
+static
+void
+print_instrumentation_message(int block_level,
+  const struct i965_batchbuffer_logger_header *hdr,
+  FILE *pfile)
+{
+   bool line_start;
+   uint32_t i;
+   char c = 0;
+   size_t num_read;
+
+   /* assume name has no EOLs */
+   print_tabs(block_level);
+   for (i = 0; !feof(pfile) && i < hdr->name_length; ++i) {
+  num_read = fread(, sizeof(c), 1, pfile);
+  if (num_read != 1)
+ return;
+  printf("%c", c);
+   }
+
+   if (hdr->value_length > 0) {
+  printf(" : ");
+   }
+
+   /* print the value (if there is one). */
+   for (i = 0, line_start = false; !feof(pfile) && i < hdr->value_length; ++i) 
{
+  num_read = fread(, sizeof(c), 1, pfile);
+
+  if (num_read != 1)
+ break;
+
+  if (line_start) {
+ print_tabs(block_level);
+  }
+
+  printf("%c", c);
+  line_start = (c == '\n');
+   }
+
+   if (c != '\n') {
+  printf("\n");
+   }
+}
+
+
+int
+main(int argc, char **argv)
+{
+   FILE *pfile;
+   int block_level = 0;
+
+   if (argc != 2) {
+  return -1;
+   }
+
+   pfile = fopen(argv[1], "r");
+   if (pfile == NULL) {
+  return -1;
+   }
+
+   while (!feof(pfile)) {
+  struct i965_batchbuffer_logger_header hdr;
+  size_t num_read;
+
+  num_read = fread(, sizeof(hdr), 1, pfile);
+  if (num_read != 1)
+ break;
+
+  switch(hdr.type) {
+ case I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN: {
+print_instrumentation_message(block_level, , pfile);
+++block_level;
+ }
+ break;
+
+ case I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END: {
+--block_level;
+

[Mesa-dev] [PATCH 08/18] intel/compiler:add function to give option to print offsets into assembly

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/compiler/brw_eu.c | 11 ++-
 src/intel/compiler/brw_eu.h |  3 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
index bc297a21b3..8969ae5bda 100644
--- a/src/intel/compiler/brw_eu.c
+++ b/src/intel/compiler/brw_eu.c
@@ -339,6 +339,15 @@ const unsigned *brw_get_program( struct brw_codegen *p,
 void
 brw_disassemble(const struct gen_device_info *devinfo,
 const void *assembly, int start, int end, FILE *out)
+{
+   brw_disassemble_print_offset_option(devinfo, assembly, start, end, out,
+   false);
+}
+
+void
+brw_disassemble_print_offset_option(const struct gen_device_info *devinfo,
+const void *assembly, int start, int end,
+FILE *out, bool print_offsets)
 {
bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
 
@@ -346,7 +355,7 @@ brw_disassemble(const struct gen_device_info *devinfo,
   const brw_inst *insn = assembly + offset;
   brw_inst uncompacted;
   bool compacted = brw_inst_cmpt_control(devinfo, insn);
-  if (0)
+  if (print_offsets)
  fprintf(out, "0x%08x: ", offset);
 
   if (compacted) {
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 95503d5513..497cf9e575 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -128,6 +128,9 @@ int brw_disassemble_inst(FILE *file, const struct 
gen_device_info *devinfo,
  const struct brw_inst *inst, bool is_compacted);
 void brw_disassemble(const struct gen_device_info *devinfo,
  const void *assembly, int start, int end, FILE *out);
+void brw_disassemble_print_offset_option(const struct gen_device_info *devinfo,
+ const void *assembly, int start, int 
end,
+ FILE *out, bool print_offsets);
 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
 
 brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/18] intel/compiler: fix for memmove argument on annotating error

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Without this fix, disassembling of GEN shaders with GPU commands
that the disassembler does not know would result in errors being
added to the annotator which would crash when more than one error
was added.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/compiler/intel_asm_annotation.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/intel_asm_annotation.c 
b/src/intel/compiler/intel_asm_annotation.c
index b07a545a12..7aa222f04e 100644
--- a/src/intel/compiler/intel_asm_annotation.c
+++ b/src/intel/compiler/intel_asm_annotation.c
@@ -181,8 +181,9 @@ annotation_insert_error(struct annotation_info *annotation, 
unsigned offset,
  continue;
 
   if (offset + sizeof(brw_inst) != next->offset) {
- memmove(next, cur,
- (annotation->ann_count - i + 2) * sizeof(struct annotation));
+ int count;
+ count = annotation->ann_count - i - 1;
+ memmove(next, cur, count * sizeof(struct annotation));
  cur->error = NULL;
  cur->error_length = 0;
  cur->block_end = NULL;
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/18] intel/tools/disasm: gen_disasm_disassemble to take const void* instead of void*

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/disasm.c | 6 +++---
 src/intel/tools/gen_disasm.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/intel/tools/disasm.c b/src/intel/tools/disasm.c
index 251acd313d..27f99e21f9 100644
--- a/src/intel/tools/disasm.c
+++ b/src/intel/tools/disasm.c
@@ -44,14 +44,14 @@ is_send(uint32_t opcode)
 }
 
 static int
-gen_disasm_find_end(struct gen_disasm *disasm, void *assembly, int start)
+gen_disasm_find_end(struct gen_disasm *disasm, const void *assembly, int start)
 {
struct gen_device_info *devinfo = >devinfo;
int offset = start;
 
/* This loop exits when send-with-EOT or when opcode is 0 */
while (true) {
-  brw_inst *insn = assembly + offset;
+  const brw_inst *insn = assembly + offset;
 
   if (brw_inst_cmpt_control(devinfo, insn)) {
  offset += 8;
@@ -70,7 +70,7 @@ gen_disasm_find_end(struct gen_disasm *disasm, void 
*assembly, int start)
 }
 
 void
-gen_disasm_disassemble(struct gen_disasm *disasm, void *assembly,
+gen_disasm_disassemble(struct gen_disasm *disasm, const void *assembly,
int start, FILE *out)
 {
struct gen_device_info *devinfo = >devinfo;
diff --git a/src/intel/tools/gen_disasm.h b/src/intel/tools/gen_disasm.h
index d2764bb90b..8d82a58617 100644
--- a/src/intel/tools/gen_disasm.h
+++ b/src/intel/tools/gen_disasm.h
@@ -32,7 +32,7 @@ struct gen_disasm;
 
 struct gen_disasm *gen_disasm_create(int pciid);
 void gen_disasm_disassemble(struct gen_disasm *disasm,
-void *assembly, int start, FILE *out);
+const void *assembly, int start, FILE *out);
 
 void gen_disasm_destroy(struct gen_disasm *disasm);
 
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/18] intel/tools/disasm: make sure that entire range is disassembled

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Without this patch, if a shader has errors, the disassembly of the
shader often stops after the first opcode that has errors.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/disasm.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/intel/tools/disasm.c b/src/intel/tools/disasm.c
index 6109727d56..aaad8a4dfa 100644
--- a/src/intel/tools/disasm.c
+++ b/src/intel/tools/disasm.c
@@ -74,7 +74,7 @@ gen_disasm_disassemble(struct gen_disasm *disasm, const void 
*assembly,
int start, FILE *out)
 {
struct gen_device_info *devinfo = >devinfo;
-   int end = gen_disasm_find_end(disasm, assembly, start);
+   int i, offset, end = gen_disasm_find_end(disasm, assembly, start);
 
/* Make a dummy annotation structure that brw_validate_instructions
 * can work from.
@@ -92,15 +92,20 @@ gen_disasm_disassemble(struct gen_disasm *disasm, const 
void *assembly,
brw_validate_instructions(devinfo, assembly, start, end, _info);
struct annotation *annotation = annotation_info.ann;
 
-   for (int i = 0; i < annotation_info.ann_count; i++) {
-  int start_offset = annotation[i].offset;
+   for (i = 0, offset = start; i < annotation_info.ann_count; i++) {
   int end_offset = annotation[i + 1].offset;
 
-  brw_disassemble(devinfo, assembly, start_offset, end_offset, out);
+  brw_disassemble(devinfo, assembly, offset, end_offset, out);
 
   if (annotation[i].error) {
  fputs(annotation[i].error, out);
   }
+
+  offset = end_offset;
+   }
+
+   if (offset < end) {
+  brw_disassemble(devinfo, assembly, offset, end, out);
}
 
ralloc_free(annotation_info.mem_ctx);
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/18] intel/tools/BatchbufferLogger : add shell script for batchbuffer logger

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/Makefile.tools.am   |   8 ++
 src/intel/tools/.gitignore|   1 +
 src/intel/tools/i965_batchbuffer_logger_sh.in | 108 ++
 3 files changed, 117 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger_sh.in

diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index c308b816f9..062c225c89 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -19,6 +19,7 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+intelbindir = $(bindir)
 intellibdir = $(libdir)
 intelincludedir = $(includedir)
 
@@ -29,6 +30,9 @@ noinst_PROGRAMS += \
 intellib_LTLIBRARIES = \
tools/libi965_batchbuffer_logger.la
 
+intelbin_SCRIPTS = tools/i965_batchbuffer_logger_sh
+CLEANFILES += $(intelbin_SCRIPTS)
+
 intelinclude_HEADERS = tools/i965_batchbuffer_logger_app.h \
tools/i965_batchbuffer_logger_output.h
 
@@ -90,3 +94,7 @@ tools_libi965_batchbuffer_logger_la_CXXFLAGS = \
 
 tools_libi965_batchbuffer_logger_la_LDFLAGS = \
-no-undefined -module -avoid-version -shared -shrext .so
+
+intel_sed_prefix_vars = sed -e 's,[@]libdir[@],$(intellibdir),g'
+tools/i965_batchbuffer_logger_sh: tools/i965_batchbuffer_logger_sh.in
+   $(intel_sed_prefix_vars) < tools/i965_batchbuffer_logger_sh.in > 
tools/i965_batchbuffer_logger_sh
diff --git a/src/intel/tools/.gitignore b/src/intel/tools/.gitignore
index 27437f9eef..fa9bf70808 100644
--- a/src/intel/tools/.gitignore
+++ b/src/intel/tools/.gitignore
@@ -1,2 +1,3 @@
 /aubinator
 /aubinator_error_decode
+/i965_batchbuffer_logger_sh
diff --git a/src/intel/tools/i965_batchbuffer_logger_sh.in 
b/src/intel/tools/i965_batchbuffer_logger_sh.in
new file mode 100644
index 00..0746173417
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_sh.in
@@ -0,0 +1,108 @@
+#!/bin/bash
+# -*- mode: sh -*-
+
+function show_help() {
+cat <https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/18] intel/tools/disasm: add gen_disasm_assembly_length function

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

The length function is needed if one wishes to save GEN binary
shaders to file.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/disasm.c | 7 +++
 src/intel/tools/gen_disasm.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/intel/tools/disasm.c b/src/intel/tools/disasm.c
index 27f99e21f9..6109727d56 100644
--- a/src/intel/tools/disasm.c
+++ b/src/intel/tools/disasm.c
@@ -106,6 +106,13 @@ gen_disasm_disassemble(struct gen_disasm *disasm, const 
void *assembly,
ralloc_free(annotation_info.mem_ctx);
 }
 
+int
+gen_disasm_assembly_length(struct gen_disasm *disasm,
+   const void *assembly, int start)
+{
+   return gen_disasm_find_end(disasm, assembly, start) - start;
+}
+
 struct gen_disasm *
 gen_disasm_create(int pciid)
 {
diff --git a/src/intel/tools/gen_disasm.h b/src/intel/tools/gen_disasm.h
index 8d82a58617..8b95a710e2 100644
--- a/src/intel/tools/gen_disasm.h
+++ b/src/intel/tools/gen_disasm.h
@@ -33,6 +33,8 @@ struct gen_disasm;
 struct gen_disasm *gen_disasm_create(int pciid);
 void gen_disasm_disassemble(struct gen_disasm *disasm,
 const void *assembly, int start, FILE *out);
+int gen_disasm_assembly_length(struct gen_disasm *disasm,
+   const void *assembly, int start);
 
 void gen_disasm_destroy(struct gen_disasm *disasm);
 
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/18] Batchbuffer Logger for Intel GPU (v2)

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

This patch series defines and implements a BatchbufferLogger
for Intel GEN. The main purpose of the BatchbufferLogger is
to strongly correlate API calls to data added to a batchbuffer.
In addition to this function, the BatchbufferLogger also tracks
GPU state (respecting HW context as well). The logger intercepts
drmIoctl recording the information needed to decode a bachbuffer
(such as GEM BO creation/deletion, HW context create/delete,
and most importantly execbuffer2). When the execbuffer2 returns
from the kernel, the BatchbufferLogger will log information
in its log of what was added when and in addition log the
GPU state (at the point in the batchbuffer) of 3DPRIMITIVE and
GPGPU_WALKER commands.

It is an application's requirment to tell the BatchbufferLogger
just before and just after an API call. Because of the need
to intercept drmIoctl, having an application link against
BatchbufferLogger is not robust. Instead, an application is
to use dlsym to fetch the correction a function pointer that
returns the BatchbufferLogger's application interface. The
interface of the BatchbufferLogger is defined in patch 0002.
A script is also provided to use the BatchbufferLogger in
an easier way than needing to set environmental variables.

On the subject of application integration, I have a -very-
small patch series that enabled BatchbufferLogger for
apitrace. I can share these patches if anyone asks, but I
cannot submit them to apitrace until atleast the BatchbufferLogger
is in Mesa with a stable application interface.

The log emitted by the BatchbufferLogger is a sequence of blocks
with possibility of blocks being elements of blocks. The top level
blocks are the API call markers created from the calls into the
BatchbufferLogger from the application. An application starts
and ends a logging session. A session provides callbacks to
write the block data; with the interface of a callback a log
does not need to just write to a file, it could also write
over a network to remote application for the purpose of remote
debugging. A specific interface is also provided to write to a
file and additional tools are included to convert the log to JSON,
XML and text. The simple file format should allow others to be able
to take the data and use it however they see fit. The JSON output
alone can be quite illuminating to use when debugging/enhancing the
i965 driver for a single frame (or even single draw call) of a troublesome
application.

It is worth noting that i965 already has a batchbuffer decoder
dumper (also making use src/intel/tools/gen_decoder). However,
BatchbufferLogger has the following features which are not in the
current dumper and are likely quite tricky to implement into it.

 1. The biggest feature is that this BatchbufferLogger contents are
annotated by the GL/GLES API calls and driver text as well.
 2. This BatchbufferLogger provides an interface to application space
to allow for applications to start/end logging sessions as
they see fit. In addition, an application can have multilpe
independent sessions active.
 3. This BatchbufferLogger repeats the state of the GPU and HW
context on 3DPRIMITIVE and GPGPU_WALKER commands. By doing
so one can see the precise state the GPU is in at that command.
Coupled with (1), one can see what precise GPU state one
has when an application issues draw commands. Moreover, coupled
with the (previously) posted patches for apitrace, one
can compare differences at specific trace points within a
trace from apitrace to help diagnose regressions.

In addition, as suggested by Chris Wilson, I have also implemented an
i965 blackbox recorded at https://github.com/krogueintel/i965-blackbox
which will perform the necessary GL/GLES API interception to
produce annotated batchbuffer recordings to assist in GPU hang
debugging. The BatchbufferLogger (again at Chris Wilson's suggestion)
has the ability to walk batchbuffers BEFORE kernel submit and to
also add an EXEC_OBJECT_CAPTURE execobject2 whose contents are
the ID of the execbuffer2 ioctl submitted. The aim is to greatly
assist in GPU hang debugging for troublesome applications.

For those interested, this BatchbufferLogger is available on
github at https://github.com/krogueintel/asem/tree/batchbuffer-logger.

The patch series is organized into the following blocks:

0001-0003: Define the BatchbufferLogger interfaces
0004-0004: Minor fix to i965 driver
0005-0005: Hooking of BatchbufferLogger into i965
0006-0011: Fixes and enhancements to intel/compiler,
   intel/tools and intel/common.
0012-0014: Implementation of BatchBufferLogger
0015-0017: Tools to decode log to JSON, XML and text
0018-0018: Command line tool for disassembling shader
   binaries.

v2:
  bug fix (spotted by Chris Wilson) for when i965 moves batchbuffer content
  add application session interface for more customizable logging
  add driver interface to add anno

[Mesa-dev] [PATCH 06/18] intel/common/gen_decoder: make useable from C++ source

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/common/gen_decoder.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index 8b00b6edc2..e3b2457dfd 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -34,6 +34,10 @@
 extern "C" {
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct gen_spec;
 struct gen_group;
 struct gen_field;
@@ -185,6 +189,9 @@ void gen_print_group(FILE *out,
  struct gen_group *group,
  uint64_t offset, const uint32_t *p,
  bool color);
+#ifdef __cplusplus
+}
+#endif
 
 #ifdef __cplusplus
 }
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/18] intel/tools: define BatchBufferLogger driver interface

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Define the driver interface for BatchbufferLogger. The
interface assumes that for any -thread- there is only
one batchbuffer to which commands are to be added. A
driver needs to provide the information on what is the
active batchbuffer on a calling thread and also a function
that given a batchbuffer, returns where in the batchbuffer
the driver is currently "at".

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/i965_batchbuffer_logger.h | 186 ++
 1 file changed, 186 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger.h

diff --git a/src/intel/tools/i965_batchbuffer_logger.h 
b/src/intel/tools/i965_batchbuffer_logger.h
new file mode 100644
index 00..bc219dfea4
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef I965_BATCHBUFFER_LOGGER_DRIVER_H
+#define I965_BATCHBUFFER_LOGGER_DRIVER_H
+
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * i965_batchbuffer_logger tracks a batchbuffers
+ * by the pair (GEM-BO handle, File descriptor)
+ * pair.
+ */
+struct i965_logged_batchbuffer {
+   /**
+* GEM BO of the batch buffer, this is the BO
+* sent to kernel to execute commands on the
+* GPU
+*/
+   uint32_t gem_bo;
+
+   /**
+* The file descriptor of the GEM BO
+*/
+   int fd;
+
+   /**
+* Opaque pointer used by the driver associated
+* to the batch buffer; an i965_batchbuffer_logger
+* does NOT use this value to identify a batchbuffer.
+* It is for the driver to use to help it compute
+* where it is in a specified batchbuffer.
+*/
+   const void *driver_data;
+};
+
+/**
+ * A counter is to be used by a driver to provide counts of elements
+ * added to a batchbuffer when the counter is active. Actions acting
+ * on a counter are placed on the active batchbuffer of the calling
+ * thread, not executed immediately. The actual counting (in addition
+ * to activation, deactivation, reset, and relase) are performed when
+ * the batchbuffer is intercepted by the logger.
+ */
+struct i965_batchbuffer_counter {
+   void *opaque;
+};
+
+/**
+ * An i965_batchbuffer_logger object represents the hooking
+ * of a GPU driver.
+ */
+struct i965_batchbuffer_logger {
+   /**
+* To be called by the driver to inform the batchbuffer logger
+* that a batch buffer was created, but NOT sent to the kernel
+* to be executed by the driver.
+*/
+   void (*aborted_batchbuffer)(struct i965_batchbuffer_logger *logger,
+   int fd, uint32_t gem_bo);
+
+   /**
+* To be called by the driver if it migrates commands from one
+* batchbuffer to another batchbuffer.
+*/
+   void (*migrate_batchbuffer)(struct i965_batchbuffer_logger *logger,
+   const struct i965_logged_batchbuffer *from,
+   const struct i965_logged_batchbuffer *to);
+
+   /**
+* To be called by the driver to add log-message data to the
+* batchbuffer log. The message will be added to the log of
+* the batchbuffer dst. If counter is non-NULL the values
+* in the counter are also emitted to the log.
+*/
+   void (*add_message)(struct i965_batchbuffer_logger *logger,
+   const struct i965_logged_batchbuffer *dst,
+   const char *fmt, va_list ap);
+
+   /**
+* call to release the i965_batchbuffer_logger
+*/
+   void (*release_driver)(struct i965_batchbuffer_logger *logger);
+
+   /**
+* Create a counter object. If filename is non-NULL, then
+* the values of the counter will be emitted to the named
+* file when the counter is deleted.
+*/
+   str

[Mesa-dev] [PATCH 05/18] i965: Enable BatchbufferLogger in i965 driver

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

The interface for BatchbufferLogger is that it is active
only if it is LD_PRELOAD'ed. Thus, the i965 driver is to
use dlsym to see if it is there, and if so fetch the object
at intel_screen creation.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 22 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  8 -
 src/mesa/drivers/dri/i965/brw_context.c   | 34 
 src/mesa/drivers/dri/i965/brw_context.h   | 12 +++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 29 +++--
 src/mesa/drivers/dri/i965/intel_screen.c  | 46 +--
 src/mesa/drivers/dri/i965/intel_screen.h  |  3 ++
 7 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 17036b53bc..6a70fc50d8 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -65,6 +65,8 @@
 #include "string.h"
 
 #include "i915_drm.h"
+#include "intel_screen.h"
+#include "tools/i965_batchbuffer_logger.h"
 
 #ifdef HAVE_VALGRIND
 #include 
@@ -104,6 +106,7 @@ struct bo_cache_bucket {
 };
 
 struct brw_bufmgr {
+   struct intel_screen *screen;
int fd;
 
mtx_t lock;
@@ -621,9 +624,14 @@ bo_unreference_final(struct brw_bo *bo, time_t time)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
struct bo_cache_bucket *bucket;
+   struct i965_batchbuffer_logger *bb_logger =
+  bufmgr->screen->batchbuffer_logger;
 
DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
 
+   if(bb_logger != NULL) {
+  bb_logger->aborted_batchbuffer(bb_logger, bufmgr->fd, bo->gem_handle);
+   }
bucket = bucket_for_size(bufmgr, bo->size);
/* Put the buffer into our internal cache for reuse if we can. */
if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
@@ -1064,6 +1072,12 @@ brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
free(bufmgr);
 }
 
+int
+brw_bufmgr_fd(const struct brw_bufmgr *bufmgr)
+{
+   return bufmgr->fd;
+}
+
 static int
 bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
uint32_t stride)
@@ -1367,9 +1381,14 @@ gem_param(int fd, int name)
  * \param fd File descriptor of the opened DRM device.
  */
 struct brw_bufmgr *
-brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
+brw_bufmgr_init(struct intel_screen *screen)
 {
struct brw_bufmgr *bufmgr;
+   struct gen_device_info *devinfo;
+   int fd;
+
+   devinfo = >devinfo;
+   fd = screen->driScrnPriv->fd;
 
bufmgr = calloc(1, sizeof(*bufmgr));
if (bufmgr == NULL)
@@ -1385,6 +1404,7 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
 * fd so that its namespace does not clash with another.
 */
bufmgr->fd = fd;
+   bufmgr->screen = screen;
 
if (mtx_init(>lock, mtx_plain) != 0) {
   free(bufmgr);
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index ee91324043..f4bffef4e4 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -46,6 +46,7 @@ extern "C" {
 
 struct gen_device_info;
 struct brw_context;
+struct intel_screen;
 
 struct brw_bo {
/**
@@ -274,6 +275,11 @@ void brw_bo_wait_rendering(struct brw_bo *bo);
  */
 void brw_bufmgr_destroy(struct brw_bufmgr *bufmgr);
 
+/**
+ * Returns the file descriptor of the buffer manager
+ */
+int brw_bufmgr_fd(const struct brw_bufmgr *bufmgr);
+
 /**
  * Get the current tiling (and resulting swizzling) mode for the bo.
  *
@@ -313,7 +319,7 @@ int brw_bo_busy(struct brw_bo *bo);
 int brw_bo_madvise(struct brw_bo *bo, int madv);
 
 /* drm_bacon_bufmgr_gem.c */
-struct brw_bufmgr *brw_bufmgr_init(struct gen_device_info *devinfo, int fd);
+struct brw_bufmgr *brw_bufmgr_init(struct intel_screen *screen);
 struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
const char *name,
unsigned int handle);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 19d5a2e350..456316deb7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -75,6 +75,8 @@
 #include "util/debug.h"
 #include "isl/isl.h"
 
+#include "tools/i965_batchbuffer_logger.h"
+
 /***
  * Mesa's Driver Functions
  ***/
@@ -1062,6 +1064,7 @@ brwCreateContext(gl_api api,
 
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
+   brw->have_active_batchbuffer = true;
 
brw_disk_cache_init(brw);
 
@@ -1076,6 +1079,7 @@ intelDe

[Mesa-dev] [PATCH 04/18] i965: assign BindingTableEntryCount of INTERFACE_DESCRIPTOR_DATA

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 453b8e4add..1474d9a150 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4249,6 +4249,7 @@ genX(upload_cs_state)(struct brw_context *brw)
   .SamplerStatePointer = stage_state->sampler_offset,
   .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 
4),
   .BindingTablePointer = stage_state->bind_bo_offset,
+  .BindingTableEntryCount = prog_data->binding_table.size_bytes / 4,
   .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
   .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
   .SharedLocalMemorySize = encode_slm_size(GEN_GEN,
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/18] intel/tools: define BatchbufferLogger application interface

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Define the application interface to BatchbufferLogger. The
BatchbufferLogger needs from the application when a GL/GLES
API call is issues and returns. It will use this information
to correctly correlate batchbuffer additions to GL/GLES API
calls.

In addition, because the BatchbufferLogger will be intercepting
drmIoctl, an application does NOT link against it. Instead, one
needs to LD_PRELOAD the .so.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/i965_batchbuffer_logger_app.h | 157 ++
 1 file changed, 157 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger_app.h

diff --git a/src/intel/tools/i965_batchbuffer_logger_app.h 
b/src/intel/tools/i965_batchbuffer_logger_app.h
new file mode 100644
index 00..b8405cd475
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_app.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef I965_BATCHBUFFER_LOGGER_APP_H
+#define I965_BATCHBUFFER_LOGGER_APP_H
+
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration value specifying the kind of data to write;
+ * Data written to a a i965_batchbuffer_logger_session
+ * can mark the start of a block, then end of a block
+ * of item within a block. Eac type has an optional
+ * name and value strings.
+ */
+enum i965_batchbuffer_logger_message_type_t {
+   I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN, /* start of a block */
+   I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END,   /* end of a block */
+   I965_BATCHBUFFER_LOGGER_MESSAGE_VALUE,   /* value in block */
+};
+
+/**
+ * An i965_batchbuffer_logger_session_params represents how
+ * to emit batchbuffer logger data.
+ */
+struct i965_batchbuffer_logger_session_params {
+   /**
+* Client data opaque pointer passed back to
+* function callbacks.
+*/
+   void *client_data;
+
+   /**
+* Function called by i965_batchbuffer_logger_app to write
+* data for the sessions.
+* \param client_data the pointer value in
+*i965_batchbuffer_logger_session_params::client_data
+* \param tp the message type
+* \param name of the data
+* \param name_length length of the name data
+* \param value of the data
+* \param value_length length of the value data
+*/
+   void (*write)(void *client_data,
+ enum i965_batchbuffer_logger_message_type_t tp,
+ const void *name, uint32_t name_length,
+ const void *value, uint32_t value_length);
+
+   /**
+* Function called by i965_batchbuffer_logger_app just
+* before an execbuffer2 call is sent to the kernel.
+* \param client_data the pointer value in
+*i965_batchbuffer_logger_session_params::client_data
+* \param id The unique ID of the execbuffer2 ioctl, this ID
+*   is written in the block information of each execbuffer2
+*   ioctl via write().
+*/
+   void (*pre_execbuffer2_ioctl)(void *client_data, unsigned int id);
+
+   /**
+* Function called by i965_batchbuffer_logger_app to close,
+* i.e. delete the session.
+*/
+   void (*close)(void *client_data);
+};
+
+/**
+ * An i965_batchbuffer_logger_session represents a logging
+ * session created; the batchbuffer logger can have multiple
+ * sessions active simutaneously.
+ */
+struct i965_batchbuffer_logger_session {
+   void *opaque;
+};
+
+/**
+ * An i965_batchbuffer_logger_app represents the hooking
+ * of an application into an i965_batchbuffer_logger
+ */
+struct i965_batchbuffer_logger_app {
+  /**
+   * To be called by the app before a GL/GLES or GLX/EGL call
+   */
+  void (*pre_call)(struct i965_batchbuffer_logger_app*,
+   unsig

[Mesa-dev] [PATCH 03/18] intel/tools: BatchBufferLogger define output file format of tool

2017-11-13 Thread kevin . rogovin
From: Kevin Rogovin <kevin.rogo...@intel.com>

Define the output format of the BatchbufferLogger. The output
is a sequence of blocks where blocks can have member blocks
or values. The top level blocks come from the application
calling into the BatchBufferLogger when an GL/GLES API call
is started and returns.

Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com>
---
 src/intel/tools/i965_batchbuffer_logger_output.h | 66 
 1 file changed, 66 insertions(+)
 create mode 100644 src/intel/tools/i965_batchbuffer_logger_output.h

diff --git a/src/intel/tools/i965_batchbuffer_logger_output.h 
b/src/intel/tools/i965_batchbuffer_logger_output.h
new file mode 100644
index 00..4e4c988d3c
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_output.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef I965_BATCHBUFFER_LOGGER_OUTPUT_H
+#define I965_BATCHBUFFER_LOGGER_OUTPUT_H
+
+#include 
+#include "i965_batchbuffer_logger_app.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * FILE format of the log is a seqence of (HEADER, NAME-DATA, VALUE-DATA)
+ * tuples where HEADER is the struct i965_batchbuffer_logger_header packed
+ * via fwrite, NAME-DATA is a sequence of HEADER.name_length characers and
+ * VALUE-DATA is a seqnece of HEADER.value_length characters. The start
+ * and end of blocks are used to give nested structure to the data (for
+ * example to make better JSON or XML output). Block endings will have that
+ * the name and value lengths are both ALWAYS 0. For other types, name should
+ * never be zero, but value can be (typically for blocks).
+ */
+
+struct i965_batchbuffer_logger_header {
+   enum i965_batchbuffer_logger_message_type_t type;
+
+   /**
+* length of the string for the name must be 0 for type
+* I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END
+*/
+   uint32_t name_length;
+
+   /**
+* length of the string for the value, must be 0 for types
+* I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN and
+* I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END
+*/
+   uint32_t value_length;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >