Re: [Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop

2017-06-14 Thread Timothy Arceri

On 15/06/17 15:34, Jason Ekstrand wrote:
On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri > wrote:


If all the swizzles are inside the src channels range than we can just
grab the srcs we need rather than converting everything.

perf report convert_float() going from ~10% -> ~7% for the when
running the following glean test:

glean -o -v -v -v -t +pointAtten

Cc: Jason Ekstrand >
---

  Hi Jason,

  I've only perf tested the above glean test. What did you use to
benchmark
  this when you wrote it?


The teximage-colors test has a benchmark flag which I added at the 
time.  I trust that a lot more than some random glean test. :-)


Cool thanks :) I'm seeing upto x5 improvement in some tests otherwise 
largely unchanged :)




--Jason

  Thanks,
  Tim

  src/mesa/main/format_utils.c | 84
+---
  1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 65e65d4..1649ac0 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
   *
   * \param   DST_TYPEthe C datatype of the destination
   * \param   DST_CHANS   the number of destination channels
   * \param   SRC_TYPEthe C datatype of the source
   * \param   SRC_CHANS   the number of source channels
   * \param   CONVan expression for converting from the
source data,
   *  storred in the variable "src", to the
destination
   *  format
   */
  #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE,
SRC_CHANS, CONV) \
-   do {   \
-  int s, j;   \
-  for (s = 0; s < count; ++s) {   \
- for (j = 0; j < SRC_CHANS; ++j) {\
-SRC_TYPE src = typed_src[j];  \
-tmp[j] = CONV;\
- }\
-  \
- typed_dst[0] = tmp[swizzle_x];   \
- if (DST_CHANS > 1) { \
-typed_dst[1] = tmp[swizzle_y];\
-if (DST_CHANS > 2) {  \
-   typed_dst[2] = tmp[swizzle_z]; \
-   if (DST_CHANS > 3) {   \
-  typed_dst[3] = tmp[swizzle_w];  \
-   }  \
-} \
- }\
- typed_src += SRC_CHANS;  \
- typed_dst += DST_CHANS;  \
-  }   \
+   do {  \
+  bool fast_path = false;\
+  if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)   \
+ fast_path = true;   \
+  if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \
+  swizzle_y < SRC_CHANS) \
+ fast_path = true;   \
+  if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \
+  swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
+ fast_path = true;   \
+  if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \
+  swizzle_y < SRC_CHANS &&   \
+  swizzle_z < SRC_CHANS &&   \
+  swizzle_w < SRC_CHANS) \
+ fast_path = true;   \
+ \
+  /* The fast path avoids copying/converting srcs we \
+   * will never use. \
+   */\
+  if (fast_path) {   \
+ for (int s = 0; s < count; ++s) {   \
+SRC_TYPE src = typed_src[swizzle_x]; \
+tmp[swizzle_x] = CONV;   \
+typed_dst[0] = tmp[swizzle_x];   \
+if (DST_CHANS > 1) { \
+   SRC_TYPE src = typed_src[swizzle_y];  \
+   tmp[swizzle_y] = CONV;\
+   typed_dst[1] = tmp[swizzle_y];\
+   if (DST_CHANS > 2) {  \
+  SRC_TYPE src = typed_src[swizzle_z];   \
 

Re: [Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop

2017-06-14 Thread Jason Ekstrand
On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri 
wrote:

> If all the swizzles are inside the src channels range than we can just
> grab the srcs we need rather than converting everything.
>
> perf report convert_float() going from ~10% -> ~7% for the when
> running the following glean test:
>
> glean -o -v -v -v -t +pointAtten
>
> Cc: Jason Ekstrand 
> ---
>
>  Hi Jason,
>
>  I've only perf tested the above glean test. What did you use to benchmark
>  this when you wrote it?
>

The teximage-colors test has a benchmark flag which I added at the time.  I
trust that a lot more than some random glean test. :-)

--Jason


>  Thanks,
>  Tim
>
>  src/mesa/main/format_utils.c | 84 ++
> +++---
>  1 file changed, 63 insertions(+), 21 deletions(-)
>
> diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
> index 65e65d4..1649ac0 100644
> --- a/src/mesa/main/format_utils.c
> +++ b/src/mesa/main/format_utils.c
> @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
>   *
>   * \param   DST_TYPEthe C datatype of the destination
>   * \param   DST_CHANS   the number of destination channels
>   * \param   SRC_TYPEthe C datatype of the source
>   * \param   SRC_CHANS   the number of source channels
>   * \param   CONVan expression for converting from the source data,
>   *  storred in the variable "src", to the destination
>   *  format
>   */
>  #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS,
> CONV) \
> -   do {   \
> -  int s, j;   \
> -  for (s = 0; s < count; ++s) {   \
> - for (j = 0; j < SRC_CHANS; ++j) {\
> -SRC_TYPE src = typed_src[j];  \
> -tmp[j] = CONV;\
> - }\
> -  \
> - typed_dst[0] = tmp[swizzle_x];   \
> - if (DST_CHANS > 1) { \
> -typed_dst[1] = tmp[swizzle_y];\
> -if (DST_CHANS > 2) {  \
> -   typed_dst[2] = tmp[swizzle_z]; \
> -   if (DST_CHANS > 3) {   \
> -  typed_dst[3] = tmp[swizzle_w];  \
> -   }  \
> -} \
> - }\
> - typed_src += SRC_CHANS;  \
> - typed_dst += DST_CHANS;  \
> -  }   \
> +   do {  \
> +  bool fast_path = false;\
> +  if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)   \
> + fast_path = true;   \
> +  if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \
> +  swizzle_y < SRC_CHANS) \
> + fast_path = true;   \
> +  if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \
> +  swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
> + fast_path = true;   \
> +  if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \
> +  swizzle_y < SRC_CHANS &&   \
> +  swizzle_z < SRC_CHANS &&   \
> +  swizzle_w < SRC_CHANS) \
> + fast_path = true;   \
> + \
> +  /* The fast path avoids copying/converting srcs we \
> +   * will never use. \
> +   */\
> +  if (fast_path) {   \
> + for (int s = 0; s < count; ++s) {   \
> +SRC_TYPE src = typed_src[swizzle_x]; \
> +tmp[swizzle_x] = CONV;   \
> +typed_dst[0] = tmp[swizzle_x];   \
> +if (DST_CHANS > 1) { \
> +   SRC_TYPE src = typed_src[swizzle_y];  \
> +   tmp[swizzle_y] = CONV;\
> +   typed_dst[1] = tmp[swizzle_y];\
> +   if (DST_CHANS > 2) {  \
> +  SRC_TYPE src = typed_src[swizzle_z];   \
> +  tmp[swizzle_z] = CONV; \
> +  typed_dst[2] = tmp[swizzle_z]; \
> +  if (DST_CHANS > 3) {   \
> + SRC_TYPE src = typed_src[swizzle_w];\
> + tmp[swizzle_w] = CONV;  \
> + typed_dst[3] = tmp[swizzle_w];  \
> +

[Mesa-dev] [PATCH 1/2] mesa: make _mesa_swizzle_and_convert() static

2017-06-14 Thread Timothy Arceri
---
 src/mesa/main/format_utils.c | 91 
 src/mesa/main/format_utils.h |  9 -
 2 files changed, 49 insertions(+), 51 deletions(-)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index d16d69c..65e65d4 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -33,20 +33,27 @@ const mesa_array_format RGBA32_FLOAT =
 const mesa_array_format RGBA8_UBYTE =
MESA_ARRAY_FORMAT(1, 0, 0, 1, 4, 0, 1, 2, 3);
 
 const mesa_array_format RGBA32_UINT =
MESA_ARRAY_FORMAT(4, 0, 0, 0, 4, 0, 1, 2, 3);
 
 const mesa_array_format RGBA32_INT =
MESA_ARRAY_FORMAT(4, 1, 0, 0, 4, 0, 1, 2, 3);
 
 static void
+swizzle_and_convert(void *void_dst, enum mesa_array_format_datatype dst_type,
+int num_dst_channels, const void *void_src,
+enum mesa_array_format_datatype src_type,
+int num_src_channels, const uint8_t swizzle[4],
+bool normalized, int count);
+
+static void
 invert_swizzle(uint8_t dst[4], const uint8_t src[4])
 {
int i, j;
 
dst[0] = MESA_FORMAT_SWIZZLE_NONE;
dst[1] = MESA_FORMAT_SWIZZLE_NONE;
dst[2] = MESA_FORMAT_SWIZZLE_NONE;
dst[3] = MESA_FORMAT_SWIZZLE_NONE;
 
for (i = 0; i < 4; ++i)
@@ -408,23 +415,23 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, 
size_t dst_stride,
}
 
if (src_array_format && dst_array_format) {
   assert(_mesa_array_format_is_normalized(src_array_format) ==
  _mesa_array_format_is_normalized(dst_array_format));
 
   compute_src2dst_component_mapping(src2rgba, rgba2dst, rebase_swizzle,
 src2dst);
 
   for (row = 0; row < height; ++row) {
- _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
-   src, src_type, src_num_channels,
-   src2dst, normalized, width);
+ swizzle_and_convert(dst, dst_type, dst_num_channels,
+ src, src_type, src_num_channels,
+ src2dst, normalized, width);
  src += src_stride;
  dst += dst_stride;
   }
   return;
}
 
/* At this point, we're fresh out of fast-paths and we need to convert
 * to float, uint32, or, if we're lucky, uint8.
 */
dst_integer = false;
@@ -497,134 +504,134 @@ _mesa_format_convert(void *void_dst, uint32_t 
dst_format, size_t dst_stride,
* of the packed formats are unsigned, so we can just always use
* _mesa_swizzle_and_convert for signed formats, which is aware of the
* truncation problem.
*/
   common_type = is_signed ? MESA_ARRAY_FORMAT_TYPE_INT :
 MESA_ARRAY_FORMAT_TYPE_UINT;
   if (src_array_format) {
  compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
 rebased_src2rgba);
  for (row = 0; row < height; ++row) {
-_mesa_swizzle_and_convert(tmp_uint + row * width, common_type, 4,
-  src, src_type, src_num_channels,
-  rebased_src2rgba, normalized, width);
+swizzle_and_convert(tmp_uint + row * width, common_type, 4,
+src, src_type, src_num_channels,
+rebased_src2rgba, normalized, width);
 src += src_stride;
  }
   } else {
  for (row = 0; row < height; ++row) {
 _mesa_unpack_uint_rgba_row(src_format, width,
src, tmp_uint + row * width);
 if (rebase_swizzle)
-   _mesa_swizzle_and_convert(tmp_uint + row * width, common_type, 
4,
- tmp_uint + row * width, common_type, 
4,
- rebase_swizzle, false, width);
+   swizzle_and_convert(tmp_uint + row * width, common_type, 4,
+   tmp_uint + row * width, common_type, 4,
+   rebase_swizzle, false, width);
 src += src_stride;
  }
   }
 
   /* At this point, we have already done the truncation if the source is
* signed but the destination is unsigned, so no need to force the
* _mesa_swizzle_and_convert path.
*/
   if (dst_format_is_mesa_array_format) {
  for (row = 0; row < height; ++row) {
-_mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
-  tmp_uint + row * width, common_type, 4,
-  rgba2dst, normalized, width);
+swizzle_and_convert(dst, dst_type, dst_num_channels,
+tmp_uint + row * width, common_type, 4,
+rgba2dst, normalized, width);
 dst += 

[Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop

2017-06-14 Thread Timothy Arceri
If all the swizzles are inside the src channels range than we can just
grab the srcs we need rather than converting everything.

perf report convert_float() going from ~10% -> ~7% for the when
running the following glean test:

glean -o -v -v -v -t +pointAtten

Cc: Jason Ekstrand 
---

 Hi Jason,

 I've only perf tested the above glean test. What did you use to benchmark
 this when you wrote it?

 Thanks,
 Tim

 src/mesa/main/format_utils.c | 84 +---
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 65e65d4..1649ac0 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
  *
  * \param   DST_TYPEthe C datatype of the destination
  * \param   DST_CHANS   the number of destination channels
  * \param   SRC_TYPEthe C datatype of the source
  * \param   SRC_CHANS   the number of source channels
  * \param   CONVan expression for converting from the source data,
  *  storred in the variable "src", to the destination
  *  format
  */
 #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, CONV) \
-   do {   \
-  int s, j;   \
-  for (s = 0; s < count; ++s) {   \
- for (j = 0; j < SRC_CHANS; ++j) {\
-SRC_TYPE src = typed_src[j];  \
-tmp[j] = CONV;\
- }\
-  \
- typed_dst[0] = tmp[swizzle_x];   \
- if (DST_CHANS > 1) { \
-typed_dst[1] = tmp[swizzle_y];\
-if (DST_CHANS > 2) {  \
-   typed_dst[2] = tmp[swizzle_z]; \
-   if (DST_CHANS > 3) {   \
-  typed_dst[3] = tmp[swizzle_w];  \
-   }  \
-} \
- }\
- typed_src += SRC_CHANS;  \
- typed_dst += DST_CHANS;  \
-  }   \
+   do {  \
+  bool fast_path = false;\
+  if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)   \
+ fast_path = true;   \
+  if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \
+  swizzle_y < SRC_CHANS) \
+ fast_path = true;   \
+  if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \
+  swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
+ fast_path = true;   \
+  if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \
+  swizzle_y < SRC_CHANS &&   \
+  swizzle_z < SRC_CHANS &&   \
+  swizzle_w < SRC_CHANS) \
+ fast_path = true;   \
+ \
+  /* The fast path avoids copying/converting srcs we \
+   * will never use. \
+   */\
+  if (fast_path) {   \
+ for (int s = 0; s < count; ++s) {   \
+SRC_TYPE src = typed_src[swizzle_x]; \
+tmp[swizzle_x] = CONV;   \
+typed_dst[0] = tmp[swizzle_x];   \
+if (DST_CHANS > 1) { \
+   SRC_TYPE src = typed_src[swizzle_y];  \
+   tmp[swizzle_y] = CONV;\
+   typed_dst[1] = tmp[swizzle_y];\
+   if (DST_CHANS > 2) {  \
+  SRC_TYPE src = typed_src[swizzle_z];   \
+  tmp[swizzle_z] = CONV; \
+  typed_dst[2] = tmp[swizzle_z]; \
+  if (DST_CHANS > 3) {   \
+ SRC_TYPE src = typed_src[swizzle_w];\
+ tmp[swizzle_w] = CONV;  \
+ typed_dst[3] = tmp[swizzle_w];  \
+  }  \
+   } \
+}\
+typed_src += SRC_CHANS;  \
+typed_dst += DST_CHANS;  \
+ }   \
+  } else {   \
+   

[Mesa-dev] [PATCH] automake: increase the MESA_GIT_SHA1 hash id length from 7 to 10 digits

2017-06-14 Thread Brian Paul
The SCons build has been using 10 digits of the git hash id for the
MESA_GIT_SHA1 string in git_sha1.h for about a year now.  I bumped it
up after running into a case where a 7-digit hash ID was ambiguous.

This patch makes the same change for the autotools build.

The command "git log | grep "^commit" | cut -b 8-14 | sort | uniq -d"
shows there are currently 17 cases where 7 digits of hash id are
ambiguous on master (probably quite a few more if we'd consider other
branches).

Instead of using "git log -n 1 --oneline" use
"git rev-parse --short=10 HEAD" to get the HEAD hash id.
---
 src/Makefile.am | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Makefile.am b/src/Makefile.am
index aa5f8aa..401e632 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -28,9 +28,9 @@ git_sha1.h.tmp:
@touch git_sha1.h.tmp
@if test -e $(top_srcdir)/.git; then \
if which git > /dev/null; then \
-   git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \
-   sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
-   > git_sha1.h.tmp ; \
+   git --git-dir=$(top_srcdir)/.git rev-parse --short=10 HEAD 
| \
+  sed 's/^\(.*\)/#define MESA_GIT_SHA1 "git-\1"/' \
+  > git_sha1.h.tmp ; \
fi \
fi
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600: remove unnecessary NULL check in r600_shader_select

2017-06-14 Thread Vlad Golovkin
r600_shader_select is always called through the macro SELECT_SHADER_OR_FAIL,
which never passes NULL pointers as parameter 'dirty'.
---
 src/gallium/drivers/r600/r600_state_common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 8ace7793f0..51c4c6dc30 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -803,8 +803,7 @@ static int r600_shader_select(struct pipe_context *ctx,
sel->num_shaders++;
}
 
-   if (dirty)
-   *dirty = true;
+   *dirty = true;
 
shader->next_variant = sel->current;
sel->current = shader;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH V2] mesa: stop assigning unused storage for non-bindless opaque types

2017-06-14 Thread Timothy Arceri
The storage was once used by get_sampler_uniform_value() but that
was fixed long ago to use the uniform storage assigned by the
linker.

By not assigning storage for images/samplers the constant buffer
for gallium drivers will be reduced which could result in small
perf improvements.

V2: rebase on ARB_bindless_texture
---
 src/mesa/program/ir_to_mesa.cpp | 42 ++---
 1 file changed, 6 insertions(+), 36 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 775211c..54b848a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2440,62 +2440,32 @@ private:
 
 } /* anonymous namespace */
 
 void
 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
bool /* row_major */,
const glsl_type * /* record_type */,
const enum glsl_interface_packing,
bool /* last_field */)
 {
-   /* atomics don't get real storage */
-   if (type->contains_atomic())
+   /* opaque types don't use storage in the param list unless they are
+* bindless textures or images.
+*/
+   if (type->contains_opaque() && !var->data.bindless)
   return;
 
-   gl_register_file file;
-   if (type->without_array()->is_sampler() && !var->data.bindless) {
-  file = PROGRAM_SAMPLER;
-   } else {
-  file = PROGRAM_UNIFORM;
-   }
-
int index = _mesa_lookup_parameter_index(params, name);
if (index < 0) {
   unsigned size = type_size(type) * 4;
 
-  index = _mesa_add_parameter(params, file, name, size, type->gl_type,
- NULL, NULL);
-
-  /* Sampler uniform values are stored in prog->SamplerUnits,
-   * and the entry in that array is selected by this index we
-   * store in ParameterValues[].
-   */
-  if (file == PROGRAM_SAMPLER) {
-unsigned location;
-const bool found =
-   this->shader_program->UniformHash->get(location,
-  
params->Parameters[index].Name);
-assert(found);
-
-if (!found)
-   return;
-
-struct gl_uniform_storage *storage =
->shader_program->data->UniformStorage[location];
-
- assert(storage->type->is_sampler() &&
-storage->opaque[shader_type].active);
-
-for (unsigned int j = 0; j < size / 4; j++)
-params->ParameterValues[index + j][0].f =
-   storage->opaque[shader_type].index + j;
-  }
+  index = _mesa_add_parameter(params, PROGRAM_UNIFORM, name, size,
+  type->gl_type, NULL, NULL);
}
 
/* The first part of the uniform that's processed determines the base
 * location of the whole uniform (for structures).
 */
if (this->idx < 0)
   this->idx = index;
 }
 
 /**
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] i965: Only do depth resolves prior to clearing when needed

2017-06-14 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_clear.c | 35 +--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index f5a990d..99ddc4e 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -164,8 +164,39 @@ brw_fast_clear_depth(struct gl_context *ctx)
 * flags out of the HiZ buffer into the real depth buffer.
 */
if (mt->fast_clear_color.f32[0] != ctx->Depth.Clear) {
-  intel_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
-   0, INTEL_REMAINING_LAYERS, true, false);
+  for (uint32_t level = mt->first_level; level <= mt->last_level; level++) 
{
+ if (!intel_miptree_level_has_hiz(mt, level))
+continue;
+
+ for (uint32_t layer = 0; layer < mt->level[level].depth; layer++) {
+if (level == depth_irb->mt_level &&
+layer >= depth_irb->mt_layer &&
+layer < depth_irb->mt_layer + num_layers) {
+   /* We're going to clear this layer anyway.  Leave it alone. */
+   continue;
+}
+
+enum isl_aux_state aux_state =
+   intel_miptree_get_aux_state(mt, level, layer);
+
+if (aux_state != ISL_AUX_STATE_CLEAR &&
+aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
+   /* This slice doesn't have any fast-cleared bits. */
+   continue;
+}
+
+/* If we got here, then the level may have fast-clear bits that
+ * use the old clear value.  We need to do a depth resolve to get
+ * rid of their use of the clear value before we can change it.
+ * Fortunately, few applications ever change their depth clear
+ * value so this shouldn't happen often.
+ */
+intel_hiz_exec(brw, mt, level, layer, 1,
+   BLORP_HIZ_OP_DEPTH_RESOLVE);
+intel_miptree_set_aux_state(brw, mt, level, layer, 1,
+ISL_AUX_STATE_RESOLVED);
+ }
+  }
   mt->fast_clear_color.f32[0] = ctx->Depth.Clear;
}
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965: Move surface resolves back to draw/dispatch time

2017-06-14 Thread Jason Ekstrand
This is effectively a revert of 388f02729bbf88ba104f4f8ee1fdf005a240969c
though much code has been added since.  Kristian initially moved it to
try and avoid locking problems with meta-based resolves.  Now that meta
is gone from the resolve path (for good this time, we hope), we can move
it back.  The problem with having it in intel_update_state was that the
UpdateState hook gets called by core mesa directly and all sorts of
things will cause a UpdateState to get called which may trigger resolves
at inopportune times.  In particular, it gets called by _mesa_Clear and,
if we have a HiZ buffer in the INVALID_AUX state, causes a HiZ resolve
right before the clear which is pointless.  By moving it back to
try_draw_prims time, we know it will only get called right before a draw
which is where we want it.
---
 src/mesa/drivers/dri/i965/brw_compute.c |   2 +
 src/mesa/drivers/dri/i965/brw_context.c | 123 
 src/mesa/drivers/dri/i965/brw_context.h |   2 +
 src/mesa/drivers/dri/i965/brw_draw.c| 139 
 4 files changed, 143 insertions(+), 123 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 8046153..2867a14 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -188,6 +188,8 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
brw_validate_textures(brw);
 
+   brw_predraw_resolve_inputs(brw);
+
const int sampler_state_size = 16; /* 16 bytes */
estimated_buffer_space_needed = 512; /* batchbuffer commands */
estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT *
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 5433f90..b8db9d0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -168,140 +168,17 @@ intel_update_framebuffer(struct gl_context *ctx,
  fb->DefaultGeometry.NumSamples);
 }
 
-static bool
-intel_disable_rb_aux_buffer(struct brw_context *brw, const struct brw_bo *bo)
-{
-   const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
-   bool found = false;
-
-   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-  const struct intel_renderbuffer *irb =
- intel_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-  if (irb && irb->mt->bo == bo) {
- found = brw->draw_aux_buffer_disabled[i] = true;
-  }
-   }
-
-   return found;
-}
-
 static void
 intel_update_state(struct gl_context * ctx)
 {
GLuint new_state = ctx->NewState;
struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_object *tex_obj;
-   struct intel_renderbuffer *depth_irb;
 
if (ctx->swrast_context)
   _swrast_InvalidateState(ctx, new_state);
 
brw->NewGLState |= new_state;
 
-   _mesa_unlock_context_textures(ctx);
-
-   intel_prepare_render(brw);
-
-   /* Resolve the depth buffer's HiZ buffer. */
-   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-   if (depth_irb && depth_irb->mt) {
-  intel_miptree_prepare_depth(brw, depth_irb->mt,
-  depth_irb->mt_level,
-  depth_irb->mt_layer,
-  depth_irb->layer_count);
-   }
-
-   memset(brw->draw_aux_buffer_disabled, 0,
-  sizeof(brw->draw_aux_buffer_disabled));
-
-   /* Resolve depth buffer and render cache of each enabled texture. */
-   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
-   for (int i = 0; i <= maxEnabledUnit; i++) {
-  if (!ctx->Texture.Unit[i]._Current)
-continue;
-  tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
-  if (!tex_obj || !tex_obj->mt)
-continue;
-
-  /* We need inte_texture_object::_Format to be valid */
-  intel_finalize_mipmap_tree(brw, i);
-
-  bool aux_supported;
-  intel_miptree_prepare_texture(brw, tex_obj->mt, tex_obj->_Format,
-_supported);
-
-  if (!aux_supported && brw->gen >= 9 &&
-  intel_disable_rb_aux_buffer(brw, tex_obj->mt->bo)) {
- perf_debug("Sampling renderbuffer with non-compressible format - "
-"turning off compression");
-  }
-
-  brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
-
-  if (tex_obj->base.StencilSampling ||
-  tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
- intel_update_r8stencil(brw, tex_obj->mt);
-  }
-   }
-
-   /* Resolve color for each active shader image. */
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-  const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
-
-  if (unlikely(prog && prog->info.num_images)) {
- for (unsigned j = 0; j < prog->info.num_images; j++) {
-struct gl_image_unit *u =
-   >ImageUnits[prog->sh.ImageUnits[j]];
-tex_obj = intel_texture_object(u->TexObj);
-
-  

[Mesa-dev] [PATCH 3/5] i965: Simplify HiZ clears a bit

2017-06-14 Thread Jason Ekstrand
No need for all that switching when we can just assign a nice little
variable with the number of layers.
---
 src/mesa/drivers/dri/i965/brw_clear.c | 25 -
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 138997d..f5a990d 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -158,6 +158,8 @@ brw_fast_clear_depth(struct gl_context *ctx)
   break;
}
 
+   const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;
+
/* If we're clearing to a new clear value, then we need to resolve any clear
 * flags out of the HiZ buffer into the real depth buffer.
 */
@@ -167,27 +169,16 @@ brw_fast_clear_depth(struct gl_context *ctx)
   mt->fast_clear_color.f32[0] = ctx->Depth.Clear;
}
 
-   if (depth_att->Layered) {
-  intel_hiz_exec(brw, mt, depth_irb->mt_level,
- depth_irb->mt_layer, depth_irb->layer_count,
- BLORP_HIZ_OP_DEPTH_CLEAR);
-   } else {
-  intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer, 1,
- BLORP_HIZ_OP_DEPTH_CLEAR);
-   }
+   intel_hiz_exec(brw, mt, depth_irb->mt_level,
+  depth_irb->mt_layer, num_layers,
+  BLORP_HIZ_OP_DEPTH_CLEAR);
 
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
 * buffer.
 */
-   if (depth_att->Layered) {
-  intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
-  depth_irb->mt_layer, depth_irb->layer_count,
-  ISL_AUX_STATE_CLEAR);
-   } else {
-  intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
-  depth_irb->mt_layer, 1,
-  ISL_AUX_STATE_CLEAR);
-   }
+   intel_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
+   depth_irb->mt_layer, num_layers,
+   ISL_AUX_STATE_CLEAR);
 
return true;
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965/clear: Don't perform redundant depth clears

2017-06-14 Thread Jason Ekstrand
We already have this little optimization for color clears.  Now that
we're actually tracking whether or not a slice has any fast-clear
blocks, It's easy enough to add for depth clears too.
---
 src/mesa/drivers/dri/i965/brw_clear.c | 34 ---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 ++-
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 99ddc4e..5fa4ae7 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -200,9 +200,37 @@ brw_fast_clear_depth(struct gl_context *ctx)
   mt->fast_clear_color.f32[0] = ctx->Depth.Clear;
}
 
-   intel_hiz_exec(brw, mt, depth_irb->mt_level,
-  depth_irb->mt_layer, num_layers,
-  BLORP_HIZ_OP_DEPTH_CLEAR);
+   bool need_clear = false;
+   for (unsigned a = 0; a < num_layers; a++) {
+  enum isl_aux_state aux_state =
+ intel_miptree_get_aux_state(mt, depth_irb->mt_level,
+ depth_irb->mt_layer + a);
+
+  if (aux_state != ISL_AUX_STATE_CLEAR) {
+ need_clear = true;
+ break;
+  }
+   }
+
+   if (!need_clear) {
+  /* If all of the layers we intend to clear are already in the clear
+   * state then simply updating the miptree fast clear value is sufficient
+   * to change their clear value.
+   */
+  return true;
+   }
+
+   for (unsigned a = 0; a < num_layers; a++) {
+  enum isl_aux_state aux_state =
+ intel_miptree_get_aux_state(mt, depth_irb->mt_level,
+ depth_irb->mt_layer + a);
+
+  if (aux_state != ISL_AUX_STATE_CLEAR) {
+ intel_hiz_exec(brw, mt, depth_irb->mt_level,
+depth_irb->mt_layer + a, 1,
+BLORP_HIZ_OP_DEPTH_CLEAR);
+  }
+   }
 
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
 * buffer.
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index c19d2d5..8b893dd 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2217,8 +2217,20 @@ intel_miptree_prepare_hiz_access(struct brw_context *brw,
  uint32_t level, uint32_t layer,
  bool hiz_supported, bool fast_clear_supported)
 {
+   enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, level, 
layer);
+
+   /* On Sandy Bridge, any usage of depth with HiZ enabled is liable to flush
+* out clear color blocks.  If the slice is in the clear state, it should
+* now be considered to be in the compressed with clear state.
+*/
+   if (brw->gen == 6 && aux_state == ISL_AUX_STATE_CLEAR && hiz_supported) {
+  assert(fast_clear_supported);
+  intel_miptree_set_aux_state(brw, mt, level, layer, 1,
+  ISL_AUX_STATE_COMPRESSED_CLEAR);
+   }
+
enum blorp_hiz_op hiz_op = BLORP_HIZ_OP_NONE;
-   switch (intel_miptree_get_aux_state(mt, level, layer)) {
+   switch (aux_state) {
case ISL_AUX_STATE_CLEAR:
case ISL_AUX_STATE_COMPRESSED_CLEAR:
   if (!hiz_supported || !fast_clear_supported)
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] i965: Enable non-CCS_E fast-clears on gen9+

2017-06-14 Thread Jason Ekstrand
Sky Lake and above can fast-clear exactly the same set of formats as
older hardware.  The only restriction is that you can't *texture* from
it unless the format supports CCS_E but you can fast-clear and render to
it just fine.  All of the code exists and now that we have sane resolves,
we can trivially turn it on.

Reviewed-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 02e74ca..c19d2d5 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -207,13 +207,7 @@ intel_miptree_supports_non_msrt_fast_clear(struct 
brw_context *brw,
if (!brw->format_supported_as_render_target[mt->format])
   return false;
 
-   if (brw->gen >= 9) {
-  mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
-  const enum isl_format isl_format =
- brw_isl_format_for_mesa_format(linear_format);
-  return isl_format_supports_ccs_e(>screen->devinfo, isl_format);
-   } else
-  return true;
+   return true;
 }
 
 /* On Gen9 support for color buffer compression was extended to single
@@ -257,16 +251,12 @@ intel_miptree_supports_lossless_compressed(struct 
brw_context *brw,
if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
   return false;
 
-   /* Fast clear mechanism and lossless compression go hand in hand. */
+   /* Fast clear support is a pre-requisite for lossless compression */
if (!intel_miptree_supports_non_msrt_fast_clear(brw, mt))
   return false;
 
-   /* Fast clear can be also used to clear srgb surfaces by using equivalent
-* linear format. This trick, however, can't be extended to be used with
-* lossless compression and therefore a check is needed to see if the format
-* really is linear.
-*/
-   return _mesa_get_srgb_format_linear(mt->format) == mt->format;
+   enum isl_format isl_format = brw_isl_format_for_mesa_format(mt->format);
+   return isl_format_supports_ccs_e(>screen->devinfo, isl_format);
 }
 
 /**
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101432] make check DispatchSanity_test.GL31_CORE regression

2017-06-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101432

Michel Dänzer  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #1 from Michel Dänzer  ---
Thanks for the report, fixed in Git:

Module: Mesa
Branch: master
Commit: 1c00af4264d795bf1fb3d13b7a966722a5984c4a
URL:   
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c00af4264d795bf1fb3d13b7a966722a5984c4a

Author: Samuel Pitoiset 
Date:   Wed Jun 14 18:08:09 2017 +0200

mesa: fix 'make check' by moving bindless functions at the right place

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix 'make check' by moving bindless functions at the right place

2017-06-14 Thread Michel Dänzer
On 15/06/17 01:08 AM, Samuel Pitoiset wrote:
> Fixes: 5f249b9f05e ("mapi: add GL_ARB_bindless_texture entry points")
> Reported-by: Mark Janes 
> Signed-off-by: Samuel Pitoiset 

Pushed, thanks!


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101432] make check DispatchSanity_test.GL31_CORE regression

2017-06-14 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101432

Bug ID: 101432
   Summary: make check DispatchSanity_test.GL31_CORE regression
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: All
Status: NEW
  Keywords: regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org

mesa: b6d56c747cbce7b9ca297be1c6f2c2b7ca91842d (master 17.2.0-devel)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv/i965: drop libdrm_intel dependency completely

2017-06-14 Thread Jason Ekstrand
On Wed, Jun 14, 2017 at 5:22 PM, Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> With Ken's work to drop the library dependency on libdrm_intel, we now
> only depend on libdrm for the kernel uapi headers it provides. It
> seems like we're better off just embeddeding those headers ourselves,
> making the lives of people developping news features tightly
> integrated with the kernel a tiny bit easier.
>
> This change also makes it a bit more obvious what cflags/libs are
> required by the i915 drivers vs i965, by renaming INTEL_CFLAGS/LIBS
> into I915_CFLAGS/LIBS.
>
> Headers were generated from drm-tip on the following commit :
>
>commit cafd1e4df1e6e039268c4e4b1a55c88915d21f2e
>Author: Rodrigo Vivi 
>Date:   Wed Jun 14 12:56:57 2017 -0700
>
>drm-tip: 2017y-06m-14d-19h-56m-24s UTC integration manifest
>
> v2: Use installed files from the kernel (Daniel Vetter)
>

I'm not even going to pretend to review the build system patches.  However,
I do think that this is something we should do.  Both are

Acked-by: Jason Ekstrand 


>
> Signed-off-by: Lionel Landwerlin 
> ---
>  configure.ac|6 +-
>  src/gallium/drivers/i915/Automake.inc   |2 +-
>  src/gallium/targets/pipe-loader/Makefile.am |2 +-
>  src/gallium/winsys/i915/drm/Makefile.am |2 +-
>  src/intel/Makefile.drm.am   |   22 +
>  src/intel/Makefile.sources  |6 +
>  src/intel/Makefile.tools.am |3 +-
>  src/intel/Makefile.vulkan.am|4 +-
>  src/intel/drm/drm.h |  925 +
>  src/intel/drm/drm_fourcc.h  |  358 +++
>  src/intel/drm/drm_mode.h|  739 ++
>  src/intel/drm/i915_drm.h| 1459
> +++
>  src/mesa/drivers/dri/i915/Makefile.am   |4 +-
>  src/mesa/drivers/dri/i965/Makefile.am   |2 +-
>  14 files changed, 3520 insertions(+), 14 deletions(-)
>  create mode 100644 src/intel/Makefile.drm.am
>  create mode 100644 src/intel/drm/drm.h
>  create mode 100644 src/intel/drm/drm_fourcc.h
>  create mode 100644 src/intel/drm/drm_mode.h
>  create mode 100644 src/intel/drm/i915_drm.h
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10.5/24] st/mesa: don't set sampler states for TBOs

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

---

This prevents a crash later in the series, and it's generally a good
thing to do.

 src/mesa/state_tracker/st_atom_sampler.c | 11 +--
 src/mesa/state_tracker/st_texture.c  |  5 +++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_sampler.c 
b/src/mesa/state_tracker/st_atom_sampler.c
index 21af5ef..7a85a55 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -227,20 +227,21 @@ void
 st_convert_sampler_from_unit(const struct st_context *st,
  struct pipe_sampler_state *sampler,
  GLuint texUnit)
 {
const struct gl_texture_object *texobj;
struct gl_context *ctx = st->ctx;
const struct gl_sampler_object *msamp;
 
texobj = ctx->Texture.Unit[texUnit]._Current;
assert(texobj);
+   assert(texobj->Target != GL_TEXTURE_BUFFER);
 
msamp = _mesa_get_samplerobj(ctx, texUnit);
 
st_convert_sampler(st, texobj, msamp, sampler);
 
sampler->lod_bias += ctx->Texture.Unit[texUnit].LodBias;
sampler->seamless_cube_map |= ctx->Texture.CubeMapSeamless;
 }
 
 
@@ -248,37 +249,43 @@ st_convert_sampler_from_unit(const struct st_context *st,
  * Update the gallium driver's sampler state for fragment, vertex or
  * geometry shader stage.
  */
 static void
 update_shader_samplers(struct st_context *st,
enum pipe_shader_type shader_stage,
const struct gl_program *prog,
struct pipe_sampler_state *samplers,
unsigned *out_num_samplers)
 {
+   struct gl_context *ctx = st->ctx;
GLbitfield samplers_used = prog->SamplersUsed;
GLbitfield free_slots = ~prog->SamplersUsed;
GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
unsigned unit, num_samplers;
const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];
 
if (samplers_used == 0x0)
   return;
 
num_samplers = util_last_bit(samplers_used);
 
/* loop over sampler units (aka tex image units) */
for (unit = 0; samplers_used; unit++, samplers_used >>= 1) {
   struct pipe_sampler_state *sampler = samplers + unit;
+  unsigned tex_unit = prog->SamplerUnits[unit];
 
-  if (samplers_used & 1) {
- st_convert_sampler_from_unit(st, sampler, prog->SamplerUnits[unit]);
+  /* Don't update the sampler for TBOs. cso_context will not bind sampler
+   * states that are NULL.
+   */
+  if (samplers_used & 1 &&
+  ctx->Texture.Unit[tex_unit]._Current->Target != GL_TEXTURE_BUFFER) {
+ st_convert_sampler_from_unit(st, sampler, tex_unit);
  states[unit] = sampler;
   } else {
  states[unit] = NULL;
   }
}
 
/* For any external samplers with multiplaner YUV, stuff the additional
 * sampler states we need at the end.
 *
 * Just re-use the existing sampler-state from the primary slot.
diff --git a/src/mesa/state_tracker/st_texture.c 
b/src/mesa/state_tracker/st_texture.c
index 9de3b9a..07c3844 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -505,26 +505,27 @@ st_destroy_bound_image_handles(struct st_context *st)
  * Create a texture handle from a texture unit.
  */
 static GLuint64
 st_create_texture_handle_from_unit(struct st_context *st,
struct gl_program *prog, GLuint texUnit)
 {
struct gl_context *ctx = st->ctx;
struct gl_texture_object *texObj;
struct pipe_context *pipe = st->pipe;
struct pipe_sampler_view *view;
-   struct pipe_sampler_state sampler;
+   struct pipe_sampler_state sampler = {0};
 
if (!st_update_single_texture(st, , texUnit, prog->sh.data->Version))
   return 0;
 
-   st_convert_sampler_from_unit(st, , texUnit);
+   if (view->target != PIPE_BUFFER)
+  st_convert_sampler_from_unit(st, , texUnit);
 
texObj = ctx->Texture.Unit[texUnit]._Current;
assert(texObj);
 
return pipe->create_texture_handle(pipe, view, );
 }
 
 
 /**
  * Create an image handle from an image unit.
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/10] i965: perf: keep on reading reports until delimiting timestamp

2017-06-14 Thread Lionel Landwerlin
Due to an underlying hardware race condition, we have no guarantee
that all the reports coming from the OA buffer related to the workload
we're trying to measure have landed to memory by the time all the work
submitted has completed. That means we need to keep on reading the OA
stream until we read a report with a timestamp older than the
timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
performance query.

v2: fix uninitialized offset variable to 0 (Lionel)

v3: rework the reading to avoid blocking the user of the API unless
requested (Rob)

v4: fix a bug that makes the i965 driver reading the perf stream when
not necessary, leading to very long counter accumulation times
(Lionel)

Signed-off-by: Lionel Landwerlin 
Cc: Robert Bragg 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++
 1 file changed, 113 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d10141bf07a..d11784c0352 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
int refcount;
int len;
uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
+   uint32_t last_timestamp;
 };
 
 /**
@@ -244,6 +245,11 @@ struct brw_perf_query_object
  struct brw_bo *bo;
 
  /**
+  * Address of mapped of @bo
+  */
+ void *map;
+
+ /**
   * The MI_REPORT_PERF_COUNT command lets us specify a unique
   * ID that will be reflected in the resulting OA report
   * that's written by the GPU. This is the ID we're expecting
@@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
}
 }
 
-static bool
-read_oa_samples(struct brw_context *brw)
+enum OaReadStatus {
+   OA_READ_STATUS_ERROR,
+   OA_READ_STATUS_UNFINISHED,
+   OA_READ_STATUS_FINISHED,
+};
+
+static enum OaReadStatus
+read_oa_samples_until(struct brw_context *brw,
+  uint32_t start_timestamp,
+  uint32_t end_timestamp)
 {
+   struct exec_node *tail_node =
+  exec_list_get_tail(>perfquery.sample_buffers);
+   struct brw_oa_sample_buf *tail_buf =
+  exec_node_data(struct brw_oa_sample_buf, tail_node, link);
+   uint32_t last_timestamp = tail_buf->last_timestamp;
+
while (1) {
   struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
+  uint32_t offset;
   int len;
 
   while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
@@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
 
  if (len < 0) {
 if (errno == EAGAIN)
-   return true;
+   return ((last_timestamp - start_timestamp) >=
+   (end_timestamp - start_timestamp)) ?
+  OA_READ_STATUS_FINISHED :
+  OA_READ_STATUS_UNFINISHED;
 else {
DBG("Error reading i915 perf samples: %m\n");
-   return false;
 }
- } else {
+ } else
 DBG("Spurious EOF reading i915 perf samples\n");
-return false;
- }
+
+ return OA_READ_STATUS_ERROR;
   }
 
   buf->len = len;
   exec_list_push_tail(>perfquery.sample_buffers, >link);
+
+  /* Go through the reports and update the last timestamp. */
+  offset = 0;
+  while (offset < buf->len) {
+ const struct drm_i915_perf_record_header *header =
+(const struct drm_i915_perf_record_header *) >buf[offset];
+ uint32_t *report = (uint32_t *) (header + 1);
+
+ if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
+last_timestamp = report[1];
+
+ offset += header->size;
+  }
+
+  buf->last_timestamp = last_timestamp;
}
 
unreachable("not reached");
+   return OA_READ_STATUS_ERROR;
+}
+
+/**
+ * Try to read all the reports until either the delimiting timestamp
+ * or an error arises.
+ */
+static bool
+read_oa_samples_for_query(struct brw_context *brw,
+  struct brw_perf_query_object *obj)
+{
+   uint32_t *start;
+   uint32_t *last;
+   uint32_t *end;
+
+   /* We need the MI_REPORT_PERF_COUNT to land before we can start
+* accumulate. */
+   assert(!brw_batch_references(>batch, obj->oa.bo) &&
+  !brw_bo_busy(obj->oa.bo));
+
+   /* Map the BO once here and let accumulate_oa_reports() unmap
+* it. */
+   if (obj->oa.map == NULL)
+  obj->oa.map = brw_bo_map(brw, obj->oa.bo, MAP_READ);
+
+   start = last = obj->oa.map;
+   end = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
+
+   if (start[0] != obj->oa.begin_report_id) {
+  DBG("Spurious start report id=%"PRIu32"\n", start[0]);
+  return true;
+   }
+   if (end[0] != (obj->oa.begin_report_id + 1)) {
+  DBG("Spurious end report id=%"PRIu32"\n", end[0]);
+  

[Mesa-dev] [PATCH 07/10] i965: ensure isolated timer reports while idle don't confuse filtering

2017-06-14 Thread Lionel Landwerlin
From: Robert Bragg 

From experimentation in IGT, we found that the OA unit might label
some report as "idle" (using an invalid context ID), right after a
report for a given context. Deltas generated by those reports actually
belong to the previous context, even though they're not labelled as
such.

This change makes ensure that while reading OA reports, we only
consider the GPU actually idle after 2 reports with an invalid context
ID.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d11784c0352..c6574df302a 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -863,6 +863,7 @@ accumulate_oa_reports(struct brw_context *brw,
struct exec_node *first_samples_node;
bool in_ctx = true;
uint32_t ctx_id;
+   int out_duration = 0;
 
assert(o->Ready);
assert(obj->oa.map != NULL);
@@ -937,10 +938,16 @@ accumulate_oa_reports(struct brw_context *brw,
if (in_ctx && report[2] != ctx_id) {
   DBG("i915 perf: Switch AWAY (observed by ID change)\n");
   in_ctx = false;
+  out_duration = 0;
} else if (in_ctx == false && report[2] == ctx_id) {
   DBG("i915 perf: Switch TO\n");
   in_ctx = true;
-  add = false;
+
+  /* We didn't *really* Switch AWAY in the case that we
+   * e.g. saw a single periodic report while idle...
+   */
+  if (out_duration >= 1)
+ add = false;
} else if (in_ctx) {
   assert(report[2] == ctx_id);
   DBG("i915 perf: Continuation IN\n");
@@ -948,6 +955,7 @@ accumulate_oa_reports(struct brw_context *brw,
   assert(report[2] != ctx_id);
   DBG("i915 perf: Continuation OUT\n");
   add = false;
+  out_duration++;
}
 }
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/10] i965: use gen_device_info rather then brw_context

2017-06-14 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index c6574df302a..45be9b1a988 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -503,9 +503,11 @@ emit_mi_report_perf_count(struct brw_context *brw,
   uint32_t offset_in_bytes,
   uint32_t report_id)
 {
+   const struct gen_device_info *devinfo = >screen->devinfo;
+
assert(offset_in_bytes % 64 == 0);
 
-   if (brw->gen < 8) {
+   if (devinfo->gen < 8) {
   BEGIN_BATCH(3);
   OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
   OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
@@ -856,6 +858,7 @@ static void
 accumulate_oa_reports(struct brw_context *brw,
   struct brw_perf_query_object *obj)
 {
+   const struct gen_device_info *devinfo = >screen->devinfo;
struct gl_perf_query_object *o = >base;
uint32_t *start;
uint32_t *last;
@@ -934,7 +937,7 @@ accumulate_oa_reports(struct brw_context *brw,
  * For Haswell we can rely on the HW to stop the progress
  * of OA counters while any other context is acctive.
  */
-if (brw->gen >= 8) {
+if (devinfo->gen >= 8) {
if (in_ctx && report[2] != ctx_id) {
   DBG("i915 perf: Switch AWAY (observed by ID change)\n");
   in_ctx = false;
@@ -1603,6 +1606,7 @@ add_basic_stat_reg(struct brw_perf_query_info *query,
 static void
 init_pipeline_statistic_query_registers(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = >screen->devinfo;
struct brw_perf_query_info *query = append_query_info(brw);
 
query->kind = PIPELINE_STATS;
@@ -1618,7 +1622,7 @@ init_pipeline_statistic_query_registers(struct 
brw_context *brw)
add_basic_stat_reg(query, VS_INVOCATION_COUNT,
   "N vertex shader invocations");
 
-   if (brw->gen == 6) {
+   if (devinfo->gen == 6) {
   add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
"SO_PRIM_STORAGE_NEEDED",
"N geometry shader stream-out primitives (total)");
@@ -1667,7 +1671,7 @@ init_pipeline_statistic_query_registers(struct 
brw_context *brw)
add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
   "N primitives leaving clipping");
 
-   if (brw->is_haswell || brw->gen == 8)
+   if (devinfo->is_haswell || devinfo->gen == 8)
   add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
"N fragment shader invocations",
"N fragment shader invocations");
@@ -1677,7 +1681,7 @@ init_pipeline_statistic_query_registers(struct 
brw_context *brw)
 
add_basic_stat_reg(query, PS_DEPTH_COUNT, "N z-pass fragments");
 
-   if (brw->gen >= 7)
+   if (devinfo->gen >= 7)
   add_basic_stat_reg(query, CS_INVOCATION_COUNT,
  "N compute shader invocations");
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] i965: Add Gen8+ INTEL_performance_query support

2017-06-14 Thread Lionel Landwerlin
From: Robert Bragg 

Enables access to OA unit metrics on Gen8+ via INTEL_performance_query.

Signed-off-by: Robert Bragg 
---
 src/mesa/drivers/dri/i965/Makefile.am |   8 +-
 src/mesa/drivers/dri/i965/brw_defines.h   |   6 +
 src/mesa/drivers/dri/i965/brw_performance_query.c | 276 --
 3 files changed, 266 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/Makefile.am 
b/src/mesa/drivers/dri/i965/Makefile.am
index 31ba460b1f5..3a749cb6d74 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -116,7 +116,7 @@ EXTRA_DIST = \
 # .c and .h files in one go so we don't hit problems with parallel
 # make and multiple invocations of the same script trying to write
 # to the same files.
-brw_oa_hsw.h: brw_oa.py brw_oa_hsw.xml
-   $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py 
--header=$(builddir)/brw_oa_hsw.h --chipset=hsw $(srcdir)/brw_oa_hsw.xml
-brw_oa_hsw.c: brw_oa.py brw_oa_hsw.xml
-   $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py 
--code=$(builddir)/brw_oa_hsw.c --chipset=hsw $(srcdir)/brw_oa_hsw.xml
+brw_oa_%.h: brw_oa.py brw_oa_%.xml Makefile.am
+   $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py 
--header=$(builddir)/brw_oa_$(*).h --chipset=$(*) $(srcdir)/brw_oa_$(*).xml
+brw_oa_%.c: brw_oa.py brw_oa_%.xml Makefile.am
+   $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_oa.py 
--code=$(builddir)/brw_oa_$(*).c --chipset=$(*) $(srcdir)/brw_oa_$(*).xml
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 312dddafd77..c98f4a699ce 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1350,6 +1350,12 @@ enum brw_pixel_shader_coverage_mask_mode {
 
 #define GEN6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2))
 
+#define GEN8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2))
+
+/* Bitfields for the URB_WRITE message, DW2 of message header: */
+#define URB_WRITE_PRIM_END 0x1
+#define URB_WRITE_PRIM_START   0x2
+#define URB_WRITE_PRIM_TYPE_SHIFT  2
 
 /* Maximum number of entries that can be addressed using a binding table
  * pointer of type SURFTYPE_BUFFER
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 1c9ddf52ea3..d10141bf07a 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -72,16 +72,33 @@
 #include "brw_defines.h"
 #include "brw_performance_query.h"
 #include "brw_oa_hsw.h"
+#include "brw_oa_bdw.h"
+#include "brw_oa_chv.h"
+#include "brw_oa_sklgt2.h"
+#include "brw_oa_sklgt3.h"
+#include "brw_oa_sklgt4.h"
+#include "brw_oa_bxt.h"
 #include "intel_batchbuffer.h"
 
 #define FILE_DEBUG_FLAG DEBUG_PERFMON
 
 /*
- * The largest OA format we can use on Haswell includes:
- * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
+ * The largest OA formats we can use include:
+ * For Haswell:
+ *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
+ * For Gen8+
+ *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
  */
 #define MAX_OA_REPORT_COUNTERS 62
 
+#define OAREPORT_REASON_MASK   0x3f
+#define OAREPORT_REASON_SHIFT  19
+#define OAREPORT_REASON_TIMER  (1<<0)
+#define OAREPORT_REASON_TRIGGER1   (1<<1)
+#define OAREPORT_REASON_TRIGGER2   (1<<2)
+#define OAREPORT_REASON_CTX_SWITCH (1<<3)
+#define OAREPORT_REASON_GO_TRANSITION  (1<<4)
+
 #define I915_PERF_OA_SAMPLE_SIZE (8 +   /* drm_i915_perf_record_header */ \
   256)  /* OA counter report */
 
@@ -482,12 +499,21 @@ emit_mi_report_perf_count(struct brw_context *brw,
 {
assert(offset_in_bytes % 64 == 0);
 
-   BEGIN_BATCH(3);
-   OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
-   OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- offset_in_bytes);
-   OUT_BATCH(report_id);
-   ADVANCE_BATCH();
+   if (brw->gen < 8) {
+  BEGIN_BATCH(3);
+  OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
+  OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+offset_in_bytes);
+  OUT_BATCH(report_id);
+  ADVANCE_BATCH();
+   } else {
+  BEGIN_BATCH(4);
+  OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT);
+  OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+  offset_in_bytes);
+  OUT_BATCH(report_id);
+  ADVANCE_BATCH();
+   }
 }
 
 /**
@@ -571,6 +597,28 @@ accumulate_uint32(const uint32_t *report0,
*accumulator += (uint32_t)(*report1 - *report0);
 }
 
+static void
+accumulate_uint40(int a_index,
+  const uint32_t *report0,
+  const uint32_t *report1,
+  uint64_t *accumulator)
+{
+   const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
+   const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
+  

[Mesa-dev] [PATCH 03/10] i965: perf: fix codegen with single operand equation

2017-06-14 Thread Lionel Landwerlin
We did support single value operand equations, but not single variable
operand ones. In particular we were failing on "$Sampler0Bottleneck".

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_oa.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index bf950b140da..254c512a7da 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -214,7 +214,9 @@ def output_rpn_equation_code(set, counter, equation, 
counter_vars):
 value = stack[-1]
 
 if value in hw_vars:
-value = hw_vars[value];
+value = hw_vars[value]
+if value in counter_vars:
+value = read_funcs[value[1:]] + "(brw, query, accumulator)"
 
 c("\nreturn " + value + ";")
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] i965: Add Gen8+ sys_vars for generated OA code

2017-06-14 Thread Lionel Landwerlin
From: Robert Bragg 

In preparation for adding XML OA metric set descriptions for Gen 8 and 9
which will result in auto generated code that depends on a number of new
system variables ($EuSubslicesTotalCount, $EuThreadsCount and
$SliceMask) this adds corresponding members to brw->perf.sys_vars.

Signed-off-by: Robert Bragg 
---
 src/mesa/drivers/dri/i965/brw_context.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index c15c0193584..6cce2e536ef 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1088,6 +1088,9 @@ struct brw_context
  uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
  uint64_t n_eus;   /** $EuCoresTotalCount */
  uint64_t n_eu_slices; /** $EuSlicesTotalCount */
+ uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
+ uint64_t eu_threads_count;/** $EuThreadsCount */
+ uint64_t slice_mask;  /** $SliceMask */
  uint64_t subslice_mask;   /** $SubsliceMask */
  uint64_t gt_min_freq; /** $GpuMinFrequency */
  uint64_t gt_max_freq; /** $GpuMaxFrequency */
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/10] i965: Add performance query OA support on Gen 8 & 9

2017-06-14 Thread Lionel Landwerlin
Hi,

This series has been used by a few people for some time already. Now
that the kernel patches are making their way to upstream, here are the
userspace side ones.

This series depends on [1] which includes kernel header files to the
tree, so we don't have to depend on a specific version of libdrm to
have this compile.

Cheers,

[1] : https://patchwork.freedesktop.org/series/25801/

Lionel Landwerlin (6):
  intel: common: add flag to identify platforms by name
  i965: perf: fix codegen with single operand equation
  i965: perf: keep on reading reports until delimiting timestamp
  i965: use gen_device_info rather then brw_context
  i965: perf: add support for Kabylake
  i965: perf: add support for Geminilake

Robert Bragg (4):
  i965: Add Gen8+ sys_vars for generated OA code
  i965: Add XML OA metric sets for Gen8+
  i965: Add Gen8+ INTEL_performance_query support
  i965: ensure isolated timer reports while idle don't confuse filtering

 src/intel/common/gen_device_info.c|23 +-
 src/intel/common/gen_device_info.h| 3 +
 src/mesa/drivers/dri/i965/Makefile.am |17 +-
 src/mesa/drivers/dri/i965/Makefile.sources|20 +-
 src/mesa/drivers/dri/i965/brw_context.h   | 3 +
 src/mesa/drivers/dri/i965/brw_defines.h   | 6 +
 src/mesa/drivers/dri/i965/brw_oa.py   | 4 +-
 src/mesa/drivers/dri/i965/brw_oa_bdw.xml  | 15051 
 src/mesa/drivers/dri/i965/brw_oa_bxt.xml  |  9211 
 src/mesa/drivers/dri/i965/brw_oa_chv.xml  |  9569 +
 src/mesa/drivers/dri/i965/brw_oa_glk.xml  |  9124 
 src/mesa/drivers/dri/i965/brw_oa_hsw.xml  |26 +-
 src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml   | 10455 ++
 src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml   | 10500 ++
 src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml   | 10925 ++
 src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml   | 10499 ++
 src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml   | 10522 ++
 src/mesa/drivers/dri/i965/brw_performance_query.c |   436 +-
 18 files changed, 96326 insertions(+), 68 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_bdw.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_bxt.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_chv.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_glk.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml

--
2.11.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/10] intel: common: add flag to identify platforms by name

2017-06-14 Thread Lionel Landwerlin
The perf infrastructure needs to identify specific platforms, not just
generations.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_device_info.c | 23 +--
 src/intel/common/gen_device_info.h |  3 +++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/intel/common/gen_device_info.c 
b/src/intel/common/gen_device_info.c
index 75284a66419..fee112ce25b 100644
--- a/src/intel/common/gen_device_info.c
+++ b/src/intel/common/gen_device_info.c
@@ -412,7 +412,6 @@ static const struct gen_device_info gen_device_info_chv = {
 
 #define GEN9_LP_FEATURES   \
GEN9_FEATURES,  \
-   .is_broxton = 1,\
.gt = 1,\
.has_llc = false,   \
.num_slices = 1,\
@@ -463,6 +462,7 @@ static const struct gen_device_info gen_device_info_chv = {
 
 static const struct gen_device_info gen_device_info_skl_gt1 = {
GEN9_FEATURES, .gt = 1,
+   .is_skylake = true,
.num_slices = 1,
.l3_banks = 2,
.urb.size = 192,
@@ -470,18 +470,21 @@ static const struct gen_device_info 
gen_device_info_skl_gt1 = {
 
 static const struct gen_device_info gen_device_info_skl_gt2 = {
GEN9_FEATURES, .gt = 2,
+   .is_skylake = true,
.num_slices = 1,
.l3_banks = 4,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt3 = {
GEN9_FEATURES, .gt = 3,
+   .is_skylake = true,
.num_slices = 2,
.l3_banks = 8,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt4 = {
GEN9_FEATURES, .gt = 4,
+   .is_skylake = true,
.num_slices = 3,
.l3_banks = 12,
/* From the "L3 Allocation and Programming" documentation:
@@ -497,11 +500,13 @@ static const struct gen_device_info 
gen_device_info_skl_gt4 = {
 
 static const struct gen_device_info gen_device_info_bxt = {
GEN9_LP_FEATURES,
+   .is_broxton = true,
.l3_banks = 2,
 };
 
 static const struct gen_device_info gen_device_info_bxt_2x6 = {
GEN9_LP_FEATURES_2X6,
+   .is_broxton = true,
.l3_banks = 1,
 };
 /*
@@ -570,12 +575,14 @@ static const struct gen_device_info 
gen_device_info_kbl_gt4 = {
 
 static const struct gen_device_info gen_device_info_glk = {
GEN9_LP_FEATURES,
+   .is_geminilake = true,
.l3_banks = 2,
 };
 
 /*TODO: Initialize l3_banks when we know the number. */
 static const struct gen_device_info gen_device_info_glk_2x6 = {
-   GEN9_LP_FEATURES_2X6
+   GEN9_LP_FEATURES_2X6,
+   .is_geminilake = true,
 };
 
 #define GEN10_HW_INFO   \
@@ -606,22 +613,26 @@ static const struct gen_device_info 
gen_device_info_glk_2x6 = {
 
 static const struct gen_device_info gen_device_info_cnl_2x8 = {
/* GT0.5 */
-   GEN10_FEATURES(1, 1, 2)
+   GEN10_FEATURES(1, 1, 2),
+   .is_cannonlake = true,
 };
 
 static const struct gen_device_info gen_device_info_cnl_3x8 = {
/* GT1 */
-   GEN10_FEATURES(1, 1, 3)
+   GEN10_FEATURES(1, 1, 3),
+   .is_cannonlake = true,
 };
 
 static const struct gen_device_info gen_device_info_cnl_4x8 = {
/* GT 1.5 */
-   GEN10_FEATURES(1, 2, 6)
+   GEN10_FEATURES(1, 2, 6),
+   .is_cannonlake = true,
 };
 
 static const struct gen_device_info gen_device_info_cnl_5x8 = {
/* GT2 */
-   GEN10_FEATURES(2, 2, 6)
+   GEN10_FEATURES(2, 2, 6),
+   .is_cannonlake = true,
 };
 
 bool
diff --git a/src/intel/common/gen_device_info.h 
b/src/intel/common/gen_device_info.h
index 62076305194..2dd3d1b7688 100644
--- a/src/intel/common/gen_device_info.h
+++ b/src/intel/common/gen_device_info.h
@@ -40,8 +40,11 @@ struct gen_device_info
bool is_baytrail;
bool is_haswell;
bool is_cherryview;
+   bool is_skylake;
bool is_broxton;
bool is_kabylake;
+   bool is_geminilake;
+   bool is_cannonlake;
 
bool has_hiz_and_separate_stencil;
bool must_use_separate_stencil;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] aubinator: import intel_aub.h from libdrm

2017-06-14 Thread Lionel Landwerlin
This enables us to compile aubinator without the libdrm dependency.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_aub.h | 153 
 1 file changed, 153 insertions(+)
 create mode 100644 src/intel/tools/intel_aub.h

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
new file mode 100644
index 000..5f0aba8e68e
--- /dev/null
+++ b/src/intel/tools/intel_aub.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Eric Anholt 
+ *
+ */
+
+/** @file intel_aub.h
+ *
+ * The AUB file is a file format used by Intel's internal simulation
+ * and other validation tools.  It can be used at various levels by a
+ * driver to input state to the simulated hardware or a replaying
+ * debugger.
+ *
+ * We choose to dump AUB files using the trace block format for ease
+ * of implementation -- dump out the blocks of memory as plain blobs
+ * and insert ring commands to execute the batchbuffer blob.
+ */
+
+#ifndef _INTEL_AUB_H
+#define _INTEL_AUB_H
+
+#define AUB_MI_NOOP(0)
+#define AUB_MI_BATCH_BUFFER_START  (0x31 << 23)
+#define AUB_PIPE_CONTROL   (0x7a02)
+
+/* DW0: instruction type. */
+
+#define CMD_AUB(7 << 29)
+
+#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16))
+/* DW1 */
+# define AUB_HEADER_MAJOR_SHIFT24
+# define AUB_HEADER_MINOR_SHIFT16
+
+#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16))
+#define CMD_AUB_DUMP_BMP   (CMD_AUB | (1 << 23) | (0x9e << 16))
+
+/* DW1 */
+#define AUB_TRACE_OPERATION_MASK   0x00ff
+#define AUB_TRACE_OP_COMMENT   0x
+#define AUB_TRACE_OP_DATA_WRITE0x0001
+#define AUB_TRACE_OP_COMMAND_WRITE 0x0002
+#define AUB_TRACE_OP_MMIO_WRITE0x0003
+// operation = TRACE_DATA_WRITE, Type
+#define AUB_TRACE_TYPE_MASK0xff00
+#define AUB_TRACE_TYPE_NOTYPE  (0 << 8)
+#define AUB_TRACE_TYPE_BATCH   (1 << 8)
+#define AUB_TRACE_TYPE_VERTEX_BUFFER   (5 << 8)
+#define AUB_TRACE_TYPE_2D_MAP  (6 << 8)
+#define AUB_TRACE_TYPE_CUBE_MAP(7 << 8)
+#define AUB_TRACE_TYPE_VOLUME_MAP  (9 << 8)
+#define AUB_TRACE_TYPE_1D_MAP  (10 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_URB(12 << 8)
+#define AUB_TRACE_TYPE_INDEX_BUFFER(13 << 8)
+#define AUB_TRACE_TYPE_GENERAL (14 << 8)
+#define AUB_TRACE_TYPE_SURFACE (15 << 8)
+
+
+// operation = TRACE_COMMAND_WRITE, Type =
+#define AUB_TRACE_TYPE_RING_HWB(1 << 8)
+#define AUB_TRACE_TYPE_RING_PRB0   (2 << 8)
+#define AUB_TRACE_TYPE_RING_PRB1   (3 << 8)
+#define AUB_TRACE_TYPE_RING_PRB2   (4 << 8)
+
+// Address space
+#define AUB_TRACE_ADDRESS_SPACE_MASK   0x00ff
+#define AUB_TRACE_MEMTYPE_GTT  (0 << 16)
+#define AUB_TRACE_MEMTYPE_LOCAL(1 << 16)
+#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16)
+#define AUB_TRACE_MEMTYPE_PCI  (3 << 16)
+#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16)
+
+/* DW2 */
+
+/**
+ * aub_state_struct_type enum values are encoded with the top 16 bits
+ * representing the type to be delivered to the .aub file, and the bottom 16
+ * bits representing the subtype.  This macro performs the encoding.
+ */
+#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype))
+
+enum aub_state_struct_type {
+   AUB_TRACE_VS_STATE =
ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1),
+   AUB_TRACE_GS_STATE =
ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2),
+   AUB_TRACE_CLIP_STATE =  ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 
3),
+   AUB_TRACE_SF_STATE =   

[Mesa-dev] [PATCH 0/2] Anv/i965: Drop dependency on libdrm's kernel headers

2017-06-14 Thread Lionel Landwerlin
Hi,

While working with changes that span from kernel to user space, I've
been wondering whether we need to depend on libdrm's header files at
all for the anv & i965 drivers. Indeed with Ken's recent changes, we
depend on libdrm for 1 or 2 functions wrapping an ioctl (with
drmGetDevices2 being the only function actually containing so logic)
and for its kernel header files. The latter which we could just embed
ourselves given how the userspace & kernelspace drivers closely
interact.

I've only included the minimal set of header files we need from the
kernel for anv & i965. Maybe other drivers would be interested and
maybe we should put all the kernel drm uapi headers into include?

Cheers,

Lionel Landwerlin (2):
  aubinator: import intel_aub.h from libdrm
  anv/i965: drop libdrm_intel dependency completely

 configure.ac|6 +-
 src/gallium/drivers/i915/Automake.inc   |2 +-
 src/gallium/targets/pipe-loader/Makefile.am |2 +-
 src/gallium/winsys/i915/drm/Makefile.am |2 +-
 src/intel/Makefile.drm.am   |   22 +
 src/intel/Makefile.sources  |6 +
 src/intel/Makefile.tools.am |3 +-
 src/intel/Makefile.vulkan.am|4 +-
 src/intel/drm/drm.h |  925 +
 src/intel/drm/drm_fourcc.h  |  358 +++
 src/intel/drm/drm_mode.h|  739 ++
 src/intel/drm/i915_drm.h| 1459 +++
 src/intel/tools/intel_aub.h |  153 +++
 src/mesa/drivers/dri/i915/Makefile.am   |4 +-
 src/mesa/drivers/dri/i965/Makefile.am   |2 +-
 15 files changed, 3673 insertions(+), 14 deletions(-)
 create mode 100644 src/intel/Makefile.drm.am
 create mode 100644 src/intel/drm/drm.h
 create mode 100644 src/intel/drm/drm_fourcc.h
 create mode 100644 src/intel/drm/drm_mode.h
 create mode 100644 src/intel/drm/i915_drm.h
 create mode 100644 src/intel/tools/intel_aub.h

--
2.11.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] i965: Use snoop bo for accessing query results on !llc

2017-06-14 Thread Ian Romanick
On 06/09/2017 06:01 AM, Chris Wilson wrote:
> Ony non-llc architectures where we are primarily reading back the

  On

> results of the GPU queries, then we can improve performance by using a
> cacheable mapping of the results. Unfortunately, enabling snooping makes
> the writes from the GPU slower, which may adversely affect pipelined
> query operations (where the results are used directly by the GPU and not
> CPU).
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c| 21 +
>  src/mesa/drivers/dri/i965/brw_bufmgr.h|  2 ++
>  src/mesa/drivers/dri/i965/gen6_queryobj.c |  2 ++
>  3 files changed, 25 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 9028b538c6..824bc55fb2 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -626,6 +626,27 @@ brw_bo_unreference(struct brw_bo *bo)
> }
>  }
>  
> +static bool __brw_bo_set_caching(struct brw_bo *bo, int caching)
> +{
> +   struct drm_i915_gem_caching arg = {
> +  .handle = bo->gem_handle,
> +  .caching = caching
> +   };
> +   return drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, ) == 
> 0;
> +}
> +
> +void brw_bo_set_cache_coherent(struct brw_bo *bo)
> +{
> +   if (bo->cache_coherent)
> +  return;
> +
> +   if (!__brw_bo_set_caching(bo, I915_CACHING_CACHED))
> +  return;
> +
> +   bo->reusable = false;
> +   bo->cache_coherent = true;
> +}
> +
>  static void
>  set_domain(struct brw_context *brw, const char *action,
> struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain)
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 214b75bf1a..188d6c5ee0 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -188,6 +188,8 @@ void brw_bo_unreference(struct brw_bo *bo);
>  #define MAP_INTERNAL_MASK   (0xff << 24)
>  #define MAP_RAW (0x01 << 24)
>  
> +void brw_bo_set_cache_coherent(struct brw_bo *bo);
> +
>  /**
>   * Maps the buffer into userspace.
>   *
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 18af608166..5c95a4bae9 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -316,6 +316,8 @@ static int gen6_alloc_query(struct brw_context *brw,
>brw_bo_unreference(query->bo);
>  
> query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
> +   brw_bo_set_cache_coherent(query->bo);
> +
> query->results = brw_bo_map(brw, query->bo,
> MAP_READ | MAP_COHERENT | MAP_ASYNC);
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] i965: Replace open-coded gen6 queryobj offsets with simple helpers

2017-06-14 Thread Ian Romanick
On 06/09/2017 06:01 AM, Chris Wilson wrote:
> Lots of places open-coded the assumed layout of the predicate/results
> within the query object, replace those with simple helpers.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_conditional_render.c |  4 ++--
>  src/mesa/drivers/dri/i965/brw_context.h| 14 ++
>  src/mesa/drivers/dri/i965/gen6_queryobj.c  |  6 +++---
>  src/mesa/drivers/dri/i965/hsw_queryobj.c   | 18 +-
>  4 files changed, 28 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c 
> b/src/mesa/drivers/dri/i965/brw_conditional_render.c
> index 046a42b5f5..197c35efe2 100644
> --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c
> +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
> @@ -66,13 +66,13 @@ set_predicate_for_occlusion_query(struct brw_context *brw,
> query->bo,
> I915_GEM_DOMAIN_INSTRUCTION,
> 0, /* write domain */
> -   0 /* offset */);
> +   gen6_query_results_offset(query, 0));
> brw_load_register_mem64(brw,
> MI_PREDICATE_SRC1,
> query->bo,
> I915_GEM_DOMAIN_INSTRUCTION,
> 0, /* write domain */
> -   8 /* offset */);
> +   gen6_query_results_offset(query, 1));
>  }
>  
>  static void
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index d1503312d4..c5acb83ad0 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -427,6 +427,20 @@ struct brw_query_object {
> bool flushed;
>  };
>  
> +#define GEN6_QUERY_PREDICATE (2)
> +#define GEN6_QUERY_RESULTS (0)
> +
> +static inline unsigned gen6_query_predicate_offset(const struct 
> brw_query_object *query)

static inline unsigned
gen6_query_predicate_offset(const struct brw_query_object *query)

> +{
> +   return GEN6_QUERY_PREDICATE * sizeof(uint64_t);
> +}
> +
> +static inline unsigned gen6_query_results_offset(const struct 
> brw_query_object *query,
> +unsigned idx)

ditto.

> +{
> +   return (GEN6_QUERY_RESULTS + idx) * sizeof(uint64_t);
> +}
> +
>  enum brw_gpu_ring {
> UNKNOWN_RING,
> RENDER_RING,
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: FORMAT_FEATURE_TRANSFER_SRC/DST_BIT_KHR not used with VkFormatProperties.bufferFeatures

2017-06-14 Thread Lionel Landwerlin

Out of curiosity, does this fix a test (maybe upcoming one)?

Otherwise sounds fair :

Reviewed-by: Lionel Landwerlin 

Thanks!

On 14/06/17 17:55, Andres Gomez wrote:

VK_FORMAT_FEATURE_TRANSFER_[SRC|DST]_BIT_KHR is a flag value of the
VkFormatFeatureFlagBits enum that can only be hold and checked against
the linearTilingFeatures or optimalTilingFeatures members of the
VkFormatProperties struct but not the bufferFeatures member.

>From the Vulkan® 1.0.51, with the VK_KHR_maintenance1 extension,
section 32.3.2 docs for VkFormatProperties:

"* linearTilingFeatures is a bitmask of VkFormatFeatureFlagBits
   specifying features supported by images created with a tiling
   parameter of VK_IMAGE_TILING_LINEAR.

 * optimalTilingFeatures is a bitmask of VkFormatFeatureFlagBits
   specifying features supported by images created with a tiling
   parameter of VK_IMAGE_TILING_OPTIMAL.

 * bufferFeatures is a bitmask of VkFormatFeatureFlagBits
   specifying features supported by buffers."

 ...

 Bits which can be set in the VkFormatProperties features
 linearTilingFeatures, optimalTilingFeatures, and bufferFeatures
 are:

 typedef enum VkFormatFeatureFlagBits {

 ...

   VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = 0x4000,
   VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = 0x8000,

 ...

 } VkFormatFeatureFlagBits;

 ...

 The following bits may be set in linearTilingFeatures and
 optimalTilingFeatures, specifying that the features are supported
 by images or image views created with the queried
 vkGetPhysicalDeviceFormatProperties::format:

 ...

 * VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR specifies that an image
   can be used as a source image for copy commands.

 * VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR specifies that an image
   can be used as a destination image for copy commands and clear
   commands."

Cc: Jason Ekstrand 
Cc: Iago Toral Quiroga 
Cc: Lionel Landwerlin 
Signed-off-by: Andres Gomez 
---
  src/intel/vulkan/anv_formats.c | 5 -
  1 file changed, 5 deletions(-)

diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index 104d4f7a5fe..0bc81d12dab 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -378,11 +378,6 @@ get_buffer_format_properties(const struct gen_device_info 
*devinfo,
 if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT)
flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
  
-   if (flags) {

-  flags |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
-   VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
-   }
-
 return flags;
  }
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 9/9] i965: Pack simple pipelined query objects into the same buffer

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:40 AM PDT Chris Wilson wrote:
> Reuse the same query object buffer for multiple queries within the same
> batch.
> 
> A task for the future is propagating the GL_NO_MEMORY errors.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c   |  3 +++
>  src/mesa/drivers/dri/i965/brw_context.h   | 10 ---
>  src/mesa/drivers/dri/i965/brw_queryobj.c  | 16 +--
>  src/mesa/drivers/dri/i965/gen6_queryobj.c | 44 
> ++-
>  4 files changed, 55 insertions(+), 18 deletions(-)

The benefit is saving memory, right?

The downside seems to be increased WaitQuery() latencies:

- Start Query A
- End Query A
- Start Query B
- Batch Flush
- End Query B
- WaitQuery for A

The query BO also contains B, and both batches refer to it, so it seems
like WaitQuery() would wait for two batches to complete instead of one.

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] i965: Use snoop bo for accessing query results on !llc

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:38 AM PDT Chris Wilson wrote:
> Ony non-llc architectures where we are primarily reading back the
> results of the GPU queries, then we can improve performance by using a
> cacheable mapping of the results. Unfortunately, enabling snooping makes
> the writes from the GPU slower, which may adversely affect pipelined
> query operations (where the results are used directly by the GPU and not
> CPU).

We're essentially writing two DWords, and reading two DWords - so we aren't
primarily reading.  However, with your next patch, where we want to be able
to asynchronously poll the status via CheckQuery()...we'll be reading a
bunch more.  It might make sense to mention this polling in the commit
message.

> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c| 21 +
>  src/mesa/drivers/dri/i965/brw_bufmgr.h|  2 ++
>  src/mesa/drivers/dri/i965/gen6_queryobj.c |  2 ++
>  3 files changed, 25 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 9028b538c6..824bc55fb2 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -626,6 +626,27 @@ brw_bo_unreference(struct brw_bo *bo)
> }
>  }
>  
> +static bool __brw_bo_set_caching(struct brw_bo *bo, int caching)

static bool
__brw_bo_set_caching(struct brw_bo *bo, int caching)

> +{
> +   struct drm_i915_gem_caching arg = {
> +  .handle = bo->gem_handle,
> +  .caching = caching
> +   };
> +   return drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, ) == 
> 0;
> +}
> +
> +void brw_bo_set_cache_coherent(struct brw_bo *bo)

void
brw_bo_set_cache_coherent(struct brw_bo *bo)

With those three changes,
Reviewed-by: Kenneth Graunke 

> +{
> +   if (bo->cache_coherent)
> +  return;
> +
> +   if (!__brw_bo_set_caching(bo, I915_CACHING_CACHED))
> +  return;
> +
> +   bo->reusable = false;
> +   bo->cache_coherent = true;
> +}
> +
>  static void
>  set_domain(struct brw_context *brw, const char *action,
> struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain)
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 214b75bf1a..188d6c5ee0 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -188,6 +188,8 @@ void brw_bo_unreference(struct brw_bo *bo);
>  #define MAP_INTERNAL_MASK   (0xff << 24)
>  #define MAP_RAW (0x01 << 24)
>  
> +void brw_bo_set_cache_coherent(struct brw_bo *bo);
> +
>  /**
>   * Maps the buffer into userspace.
>   *
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 18af608166..5c95a4bae9 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -316,6 +316,8 @@ static int gen6_alloc_query(struct brw_context *brw,
>brw_bo_unreference(query->bo);
>  
> query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
> +   brw_bo_set_cache_coherent(query->bo);
> +
> query->results = brw_bo_map(brw, query->bo,
> MAP_READ | MAP_COHERENT | MAP_ASYNC);
>  
> 



signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/9] i965: Use 'available' fence for polling query results

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:39 AM PDT Chris Wilson wrote:
> If we always write the 'available' flag after writing the final result
> of the query, we can probe that predicate to quickly query whether the
> result is ready from userspace. The primary advantage of checking the
> predicate is that it allows for more fine-grained queries, we do not
> have to wait for the batch to finish before the query is marked as
> ready.
> 
> We still do check the status of the batch after probing the query so
> that if the worst happens and the batch did hang without completing the
> query, we do not spin forever (although it is not as nice as completely
> eliminating the ioctl, the busy-ioctl is lightweight!).
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  4 +--
>  src/mesa/drivers/dri/i965/gen6_queryobj.c | 54 
> +--
>  2 files changed, 25 insertions(+), 33 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 117b1ecdca..44e0d31c6d 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -428,8 +428,8 @@ struct brw_query_object {
> bool flushed;
>  };
>  
> -#define GEN6_QUERY_PREDICATE (2)
> -#define GEN6_QUERY_RESULTS (0)
> +#define GEN6_QUERY_PREDICATE (0)
> +#define GEN6_QUERY_RESULTS (1)
>  
>  static inline unsigned gen6_query_predicate_offset(const struct 
> brw_query_object *query)
>  {
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 5c95a4bae9..ae7fd06c1c 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -40,8 +40,7 @@
>  #include "intel_buffer_objects.h"
>  
>  static inline void
> -set_query_availability(struct brw_context *brw, struct brw_query_object 
> *query,
> -   bool available)
> +set_query_available(struct brw_context *brw, struct brw_query_object *query)
>  {
> /* For platforms that support ARB_query_buffer_object, we write the
>  * query availability for "pipelined" queries.
> @@ -58,22 +57,12 @@ set_query_availability(struct brw_context *brw, struct 
> brw_query_object *query,
>  * PIPE_CONTROL with an immediate write will synchronize with
>  * those earlier writes, so we write 1 when the value has landed.
>  */
> -   if (brw->ctx.Extensions.ARB_query_buffer_object &&
> -   brw_is_query_pipelined(query)) {
> -  unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
>  
> -  if (available) {
> - /* Order available *after* the query results. */
> - flags |= PIPE_CONTROL_FLUSH_ENABLE;
> -  } else {
> - /* Make it unavailable *before* any pipelined reads. */
> - flags |= PIPE_CONTROL_CS_STALL;
> -  }
> -
> -  brw_emit_pipe_control_write(brw, flags,
> -  query->bo, 
> gen6_query_predicate_offset(query),
> -  available, 0);
> -   }
> +   brw_emit_pipe_control_write(brw,
> +   PIPE_CONTROL_WRITE_IMMEDIATE |
> +   PIPE_CONTROL_FLUSH_ENABLE,
> +   query->bo, gen6_query_predicate_offset(query),
> +   true, 0);
>  }
>  
>  static void
> @@ -139,12 +128,12 @@ write_xfb_overflow_streams(struct gl_context *ctx,
>  }
>  
>  static bool
> -check_xfb_overflow_streams(uint64_t *results, int count)
> +check_xfb_overflow_streams(const uint64_t *results, int count)
>  {
> bool overflow = false;
>  
> for (int i = 0; i < count; i++) {
> -  uint64_t *result_i = [4 * i];
> +  const uint64_t *result_i = [4 * i];
>  
>if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
>   overflow = true;
> @@ -214,16 +203,14 @@ emit_pipeline_stat(struct brw_context *brw, struct 
> brw_bo *bo,
>   */
>  static void
>  gen6_queryobj_get_results(struct gl_context *ctx,
> -  struct brw_query_object *query)
> +  struct brw_query_object *query,
> +  const uint64_t *results)
>  {
> struct brw_context *brw = brw_context(ctx);
>  
> if (query->bo == NULL)
>return;
>  
> -   brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT);
> -   uint64_t *results = query->results;
> -
> switch (query->Base.Target) {
> case GL_TIME_ELAPSED:
>/* The query BO contains the starting and ending timestamps.
> @@ -319,10 +306,10 @@ static int gen6_alloc_query(struct brw_context *brw,
> brw_bo_set_cache_coherent(query->bo);
>  
> query->results = brw_bo_map(brw, query->bo,
> -   MAP_READ | MAP_COHERENT | MAP_ASYNC);
> +   MAP_READ | MAP_WRITE | MAP_COHERENT | 
> 

Re: [Mesa-dev] [PATCH 04/15] i965: Prepare up/downsampling for isl based miptrees

2017-06-14 Thread Nanley Chery
On Tue, Jun 13, 2017 at 05:50:02PM +0300, Topi Pohjolainen wrote:
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 46 
> ++-
>  1 file changed, 38 insertions(+), 8 deletions(-)
> 
Patches 2-4 are
Reviewed-by: Nanley Chery 

> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 78a223a7f3..061860cdf6 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -2800,27 +2800,57 @@ intel_miptree_updownsample(struct brw_context *brw,
> struct intel_mipmap_tree *src,
> struct intel_mipmap_tree *dst)
>  {
> +   unsigned src_w, src_h, dst_w, dst_h;
> +
> +   if (src->surf.size > 0) {
> +  src_w = src->surf.logical_level0_px.width;
> +  src_h = src->surf.logical_level0_px.height;
> +   } else {
> +  src_w = src->logical_width0;
> +  src_h = src->logical_height0;
> +   }
> +
> +   if (dst->surf.size > 0) {
> +  dst_w = dst->surf.logical_level0_px.width;
> +  dst_h = dst->surf.logical_level0_px.height;
> +   } else {
> +  dst_w = dst->logical_width0;
> +  dst_h = dst->logical_height0;
> +   }
> +
> brw_blorp_blit_miptrees(brw,
> src, 0 /* level */, 0 /* layer */,
> src->format, SWIZZLE_XYZW,
> dst, 0 /* level */, 0 /* layer */, dst->format,
> -   0, 0,
> -   src->logical_width0, src->logical_height0,
> -   0, 0,
> -   dst->logical_width0, dst->logical_height0,
> +   0, 0, src_w, src_h,
> +   0, 0, dst_w, dst_h,
> GL_NEAREST, false, false /*mirror x, y*/,
> false, false);
>  
> if (src->stencil_mt) {
> +  if (src->stencil_mt->surf.size > 0) {
> + src_w = src->stencil_mt->surf.logical_level0_px.width;
> + src_h = src->stencil_mt->surf.logical_level0_px.height;
> +  } else {
> + src_w = src->stencil_mt->logical_width0;
> + src_h = src->stencil_mt->logical_height0;
> +  }
> +
> +  if (dst->stencil_mt->surf.size > 0) {
> + dst_w = dst->stencil_mt->surf.logical_level0_px.width;
> + dst_h = dst->stencil_mt->surf.logical_level0_px.height;
> +  } else {
> + dst_w = dst->stencil_mt->logical_width0;
> + dst_h = dst->stencil_mt->logical_height0;
> +  }
> +
>brw_blorp_blit_miptrees(brw,
>src->stencil_mt, 0 /* level */, 0 /* layer */,
>src->stencil_mt->format, SWIZZLE_XYZW,
>dst->stencil_mt, 0 /* level */, 0 /* layer */,
>dst->stencil_mt->format,
> -  0, 0,
> -  src->logical_width0, src->logical_height0,
> -  0, 0,
> -  dst->logical_width0, dst->logical_height0,
> +  0, 0, src_w, src_h,
> +  0, 0, dst_w, dst_h,
>GL_NEAREST, false, false /*mirror x, y*/,
>false, false /* decode/encode srgb */);
> }
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/9] i965: Map the query results for the life of the bo

2017-06-14 Thread Kenneth Graunke
On Wednesday, June 14, 2017 3:50:12 PM PDT Kenneth Graunke wrote:
> On Friday, June 9, 2017 6:01:37 AM PDT Chris Wilson wrote:
> > If we map the bo upon creation, we can avoid the latency of mmapping it
> > when querying, and later use the asynchronous, persistent map of the
> > predicate to do a quick query.
> > 
> > Signed-off-by: Chris Wilson 
> > Cc: Kenneth Graunke 
> > Cc: Matt Turner 
> > ---
> >  src/mesa/drivers/dri/i965/brw_bufmgr.c| 15 +
> >  src/mesa/drivers/dri/i965/brw_bufmgr.h|  2 ++
> >  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
> >  src/mesa/drivers/dri/i965/gen6_queryobj.c | 37 
> > ++-
> >  4 files changed, 44 insertions(+), 11 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> > b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > index 01590a0b0a..9028b538c6 100644
> > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > @@ -775,6 +775,21 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
> > unsigned flags)
> >return brw_bo_map_gtt(brw, bo, flags);
> >  }
> >  
> > +void
> > +brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
> > +{
> > +   unsigned domain;
> > +
> > +   if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
> > +  domain = I915_GEM_DOMAIN_GTT;
> > +   else if (can_map_cpu(bo, flags))
> > +  domain = I915_GEM_DOMAIN_CPU;
> > +   else
> > +  domain = I915_GEM_DOMAIN_GTT;
> > +
> > +   set_domain(brw, __func__, bo, domain, flags & MAP_WRITE ? domain : 0);
> > +}
> > +
> >  int
> >  brw_bo_unmap(struct brw_bo *bo)
> >  {
> > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> > b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> > index 3a397be695..214b75bf1a 100644
> > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> > @@ -196,6 +196,8 @@ void brw_bo_unreference(struct brw_bo *bo);
> >   */
> >  MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
> > unsigned flags);
> >  
> > +void brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned 
> > flags);
> > +
> >  /**
> >   * Reduces the refcount on the userspace mapping of the buffer
> >   * object.
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> > b/src/mesa/drivers/dri/i965/brw_context.h
> > index c5acb83ad0..117b1ecdca 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -419,6 +419,7 @@ struct brw_query_object {
> >  
> > /** Last query BO associated with this query. */
> > struct brw_bo *bo;
> > +   uint64_t *results;
> >  
> > /** Last index in bo with query data for this object. */
> > int last_index;
> > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> > b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> > index f913f986ae..18af608166 100644
> > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> > @@ -221,7 +221,9 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> > if (query->bo == NULL)
> >return;
> >  
> > -   uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ);
> > +   brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT);
> > +   uint64_t *results = query->results;
> > +
> > switch (query->Base.Target) {
> > case GL_TIME_ELAPSED:
> >/* The query BO contains the starting and ending timestamps.
> > @@ -296,7 +298,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> > default:
> >unreachable("Unrecognized query target in 
> > brw_queryobj_get_results()");
> > }
> > -   brw_bo_unmap(query->bo);
> >  
> > /* Now that we've processed the data stored in the query's buffer 
> > object,
> >  * we can release it.
> > @@ -307,6 +308,23 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> > query->Base.Ready = true;
> >  }
> >  
> > +static int gen6_alloc_query(struct brw_context *brw,
> > +struct brw_query_object *query)
> > +{
> > +   /* Since we're starting a new query, we need to throw away old results. 
> > */
> > +   if (query->bo)
> > +  brw_bo_unreference(query->bo);
> > +
> > +   query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
> > +   query->results = brw_bo_map(brw, query->bo,
> > +   MAP_READ | MAP_COHERENT | MAP_ASYNC);
> 
> I don't understand why you're using MAP_ASYNC here.  We're allocating a new
> BO here, and not using the BO_ALLOC_FOR_RENDER flag, so it will be idle.
> (brw_bufmgr.c:297 should ensure we never get a busy BO - if the cached BOs
> are busy, it will just allocate us a new one.)
> 
> So, MAP_ASYNC shouldn't avoid a stall.  It does, however, skip the
> SET_DOMAIN call, which means that it may not have the right domain
> for our new coherent mapping.  Hence, you need to whack it 

Re: [Mesa-dev] [PATCH 03/11] intel/genxml: Combine DataDWord{0, 1} fields in to ImmediateData field

2017-06-14 Thread Rafael Antognolli
Reviewed-by: Rafael Antognolli 

On Tue, Jun 13, 2017 at 11:28:22AM -0700, Anuj Phogat wrote:
> Signed-off-by: Anuj Phogat 
> ---
>  src/intel/genxml/gen10.xml | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
> index 04d89cb..64041c1 100644
> --- a/src/intel/genxml/gen10.xml
> +++ b/src/intel/genxml/gen10.xml
> @@ -3386,8 +3386,7 @@
>  
>  
>  
> -
> -
> +
>
>  
>
> -- 
> 2.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/11] intel/genxml: Rename StartInstanceLocation to StartingInstanceLocation

2017-06-14 Thread Rafael Antognolli
Hi Anuj,

On Tue, Jun 13, 2017 at 11:28:24AM -0700, Anuj Phogat wrote:
> This is required because we already have a macro defined with
> the name StartInstanceLocation.
> 
> Signed-off-by: Anuj Phogat 
> ---
>  src/intel/genxml/gen10.xml | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
> index 06260cf..d2bb130 100644
> --- a/src/intel/genxml/gen10.xml
> +++ b/src/intel/genxml/gen10.xml
> @@ -2570,7 +2570,7 @@
>
>  
>  
> -  
> +  

This looks weird since it is the only value in this instruction which
the name doesn't look like a macro (with all caps). But it's not the
first case in all the xml's, so it's probably fine:

Reviewed-by: Rafael Antognolli 

>
>  
>  
> -- 
> 2.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix 'make check' by moving bindless functions at the right place

2017-06-14 Thread Ian Romanick
On 06/14/2017 09:30 AM, Aaron Watry wrote:
> Looks like Mark beat me to reporting this one (was still bisecting
> when this patch was sent).
> 
> Tested-by: Aaron Watry 

If dispatch_sanity breaks, it's almost always the most recent commit in
src/mapi/glapi/gen.  That may save you some time bisecting when this
happens again. :)

> On Wed, Jun 14, 2017 at 11:08 AM, Samuel Pitoiset
>  wrote:
>> Fixes: 5f249b9f05e ("mapi: add GL_ARB_bindless_texture entry points")
>> Reported-by: Mark Janes 
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/mesa/main/tests/dispatch_sanity.cpp | 36 
>> -
>>  1 file changed, 18 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
>> b/src/mesa/main/tests/dispatch_sanity.cpp
>> index 47d0aa63bf4..724c22ee9b3 100644
>> --- a/src/mesa/main/tests/dispatch_sanity.cpp
>> +++ b/src/mesa/main/tests/dispatch_sanity.cpp
>> @@ -965,6 +965,24 @@ const struct function 
>> common_desktop_functions_possible[] = {
>> { "glBufferPageCommitmentARB", 43, -1 },
>> { "glNamedBufferPageCommitmentARB", 43, -1 },
>>
>> +   /* GL_ARB_bindless_texture */
>> +   { "glGetTextureHandleARB", 40, -1 },
>> +   { "glGetTextureSamplerHandleARB", 40, -1 },
>> +   { "glMakeTextureHandleResidentARB", 40, -1 },
>> +   { "glMakeTextureHandleNonResidentARB", 40, -1 },
>> +   { "glIsTextureHandleResidentARB", 40, -1 },
>> +   { "glGetImageHandleARB", 40, -1 },
>> +   { "glMakeImageHandleResidentARB", 40, -1 },
>> +   { "glMakeImageHandleNonResidentARB", 40, -1 },
>> +   { "glIsImageHandleResidentARB", 40, -1 },
>> +   { "glUniformHandleui64ARB", 40, -1 },
>> +   { "glUniformHandleui64vARB", 40, -1 },
>> +   { "glProgramUniformHandleui64ARB", 40, -1 },
>> +   { "glProgramUniformHandleui64vARB", 40, -1 },
>> +   { "glVertexAttribL1ui64ARB", 40, -1 },
>> +   { "glVertexAttribL1ui64vARB", 40, -1 },
>> +   { "glGetVertexAttribLui64vARB", 40, -1 },
>> +
>> { NULL, 0, -1 }
>>  };
>>
>> @@ -2374,24 +2392,6 @@ const struct function gles2_functions_possible[] = {
>> /* GL_KHR_blend_equation_advanced */
>> { "glBlendBarrierKHR", 20, -1 },
>>
>> -   /* GL_ARB_bindless_texture */
>> -   { "glGetTextureHandleARB", 40, -1 },
>> -   { "glGetTextureSamplerHandleARB", 40, -1 },
>> -   { "glMakeTextureHandleResidentARB", 40, -1 },
>> -   { "glMakeTextureHandleNonResidentARB", 40, -1 },
>> -   { "glIsTextureHandleResidentARB", 40, -1 },
>> -   { "glGetImageHandleARB", 40, -1 },
>> -   { "glMakeImageHandleResidentARB", 40, -1 },
>> -   { "glMakeImageHandleNonResidentARB", 40, -1 },
>> -   { "glIsImageHandleResidentARB", 40, -1 },
>> -   { "glUniformHandleui64ARB", 40, -1 },
>> -   { "glUniformHandleui64vARB", 40, -1 },
>> -   { "glProgramUniformHandleui64ARB", 40, -1 },
>> -   { "glProgramUniformHandleui64vARB", 40, -1 },
>> -   { "glVertexAttribL1ui64ARB", 40, -1 },
>> -   { "glVertexAttribL1ui64vARB", 40, -1 },
>> -   { "glGetVertexAttribLui64vARB", 40, -1 },
>> -
>> { NULL, 0, -1 }
>>  };
>>
>> --
>> 2.13.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix 'make check' by moving bindless functions at the right place

2017-06-14 Thread Ian Romanick
Reviewed-by: Ian Romanick 

On 06/14/2017 09:08 AM, Samuel Pitoiset wrote:
> Fixes: 5f249b9f05e ("mapi: add GL_ARB_bindless_texture entry points")
> Reported-by: Mark Janes 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/mesa/main/tests/dispatch_sanity.cpp | 36 
> -
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
> b/src/mesa/main/tests/dispatch_sanity.cpp
> index 47d0aa63bf4..724c22ee9b3 100644
> --- a/src/mesa/main/tests/dispatch_sanity.cpp
> +++ b/src/mesa/main/tests/dispatch_sanity.cpp
> @@ -965,6 +965,24 @@ const struct function 
> common_desktop_functions_possible[] = {
> { "glBufferPageCommitmentARB", 43, -1 },
> { "glNamedBufferPageCommitmentARB", 43, -1 },
>  
> +   /* GL_ARB_bindless_texture */
> +   { "glGetTextureHandleARB", 40, -1 },
> +   { "glGetTextureSamplerHandleARB", 40, -1 },
> +   { "glMakeTextureHandleResidentARB", 40, -1 },
> +   { "glMakeTextureHandleNonResidentARB", 40, -1 },
> +   { "glIsTextureHandleResidentARB", 40, -1 },
> +   { "glGetImageHandleARB", 40, -1 },
> +   { "glMakeImageHandleResidentARB", 40, -1 },
> +   { "glMakeImageHandleNonResidentARB", 40, -1 },
> +   { "glIsImageHandleResidentARB", 40, -1 },
> +   { "glUniformHandleui64ARB", 40, -1 },
> +   { "glUniformHandleui64vARB", 40, -1 },
> +   { "glProgramUniformHandleui64ARB", 40, -1 },
> +   { "glProgramUniformHandleui64vARB", 40, -1 },
> +   { "glVertexAttribL1ui64ARB", 40, -1 },
> +   { "glVertexAttribL1ui64vARB", 40, -1 },
> +   { "glGetVertexAttribLui64vARB", 40, -1 },
> +
> { NULL, 0, -1 }
>  };
>  
> @@ -2374,24 +2392,6 @@ const struct function gles2_functions_possible[] = {
> /* GL_KHR_blend_equation_advanced */
> { "glBlendBarrierKHR", 20, -1 },
>  
> -   /* GL_ARB_bindless_texture */
> -   { "glGetTextureHandleARB", 40, -1 },
> -   { "glGetTextureSamplerHandleARB", 40, -1 },
> -   { "glMakeTextureHandleResidentARB", 40, -1 },
> -   { "glMakeTextureHandleNonResidentARB", 40, -1 },
> -   { "glIsTextureHandleResidentARB", 40, -1 },
> -   { "glGetImageHandleARB", 40, -1 },
> -   { "glMakeImageHandleResidentARB", 40, -1 },
> -   { "glMakeImageHandleNonResidentARB", 40, -1 },
> -   { "glIsImageHandleResidentARB", 40, -1 },
> -   { "glUniformHandleui64ARB", 40, -1 },
> -   { "glUniformHandleui64vARB", 40, -1 },
> -   { "glProgramUniformHandleui64ARB", 40, -1 },
> -   { "glProgramUniformHandleui64vARB", 40, -1 },
> -   { "glVertexAttribL1ui64ARB", 40, -1 },
> -   { "glVertexAttribL1ui64vARB", 40, -1 },
> -   { "glGetVertexAttribLui64vARB", 40, -1 },
> -
> { NULL, 0, -1 }
>  };
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/9] i965: Map the query results for the life of the bo

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:37 AM PDT Chris Wilson wrote:
> If we map the bo upon creation, we can avoid the latency of mmapping it
> when querying, and later use the asynchronous, persistent map of the
> predicate to do a quick query.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c| 15 +
>  src/mesa/drivers/dri/i965/brw_bufmgr.h|  2 ++
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  src/mesa/drivers/dri/i965/gen6_queryobj.c | 37 
> ++-
>  4 files changed, 44 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 01590a0b0a..9028b538c6 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -775,6 +775,21 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
> unsigned flags)
>return brw_bo_map_gtt(brw, bo, flags);
>  }
>  
> +void
> +brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
> +{
> +   unsigned domain;
> +
> +   if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
> +  domain = I915_GEM_DOMAIN_GTT;
> +   else if (can_map_cpu(bo, flags))
> +  domain = I915_GEM_DOMAIN_CPU;
> +   else
> +  domain = I915_GEM_DOMAIN_GTT;
> +
> +   set_domain(brw, __func__, bo, domain, flags & MAP_WRITE ? domain : 0);
> +}
> +
>  int
>  brw_bo_unmap(struct brw_bo *bo)
>  {
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 3a397be695..214b75bf1a 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -196,6 +196,8 @@ void brw_bo_unreference(struct brw_bo *bo);
>   */
>  MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
> unsigned flags);
>  
> +void brw_bo_map_sync(struct brw_context *brw, struct brw_bo *bo, unsigned 
> flags);
> +
>  /**
>   * Reduces the refcount on the userspace mapping of the buffer
>   * object.
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index c5acb83ad0..117b1ecdca 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -419,6 +419,7 @@ struct brw_query_object {
>  
> /** Last query BO associated with this query. */
> struct brw_bo *bo;
> +   uint64_t *results;
>  
> /** Last index in bo with query data for this object. */
> int last_index;
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index f913f986ae..18af608166 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -221,7 +221,9 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> if (query->bo == NULL)
>return;
>  
> -   uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ);
> +   brw_bo_map_sync(brw, query->bo, MAP_READ | MAP_COHERENT);
> +   uint64_t *results = query->results;
> +
> switch (query->Base.Target) {
> case GL_TIME_ELAPSED:
>/* The query BO contains the starting and ending timestamps.
> @@ -296,7 +298,6 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> default:
>unreachable("Unrecognized query target in brw_queryobj_get_results()");
> }
> -   brw_bo_unmap(query->bo);
>  
> /* Now that we've processed the data stored in the query's buffer object,
>  * we can release it.
> @@ -307,6 +308,23 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> query->Base.Ready = true;
>  }
>  
> +static int gen6_alloc_query(struct brw_context *brw,
> +struct brw_query_object *query)
> +{
> +   /* Since we're starting a new query, we need to throw away old results. */
> +   if (query->bo)
> +  brw_bo_unreference(query->bo);
> +
> +   query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
> +   query->results = brw_bo_map(brw, query->bo,
> +   MAP_READ | MAP_COHERENT | MAP_ASYNC);

I don't understand why you're using MAP_ASYNC here.  We're allocating a new
BO here, and not using the BO_ALLOC_FOR_RENDER flag, so it will be idle.
(brw_bufmgr.c:297 should ensure we never get a busy BO - if the cached BOs
are busy, it will just allocate us a new one.)

So, MAP_ASYNC shouldn't avoid a stall.  It does, however, skip the
SET_DOMAIN call, which means that it may not have the right domain
for our new coherent mapping.  Hence, you need to whack it later with
your new brw_bo_map_sync() helper.

I think you can drop MAP_ASYNC, and drop brw_bo_map_sync() entirely,
with no ill-effects.  Or am I wrong?

> +
> +   /* For ARB_query_buffer_object: The result is not available */
> +   set_query_availability(brw, query, false);
> +
> +   return 0;
> +}
> +
>  /**
>   * Driver 

Re: [Mesa-dev] [PATCH 06/15] i965: Prepare slice validator for isl based miptrees

2017-06-14 Thread Nanley Chery
On Tue, Jun 13, 2017 at 05:50:04PM +0300, Topi Pohjolainen wrote:
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index 8479b285cb..0b85bc12ef 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -748,7 +748,13 @@ intel_miptree_check_level_layer(const struct 
> intel_mipmap_tree *mt,
>  
> assert(level >= mt->first_level);
> assert(level <= mt->last_level);
> -   assert(layer < mt->level[level].depth);
> +
> +   if (mt->surf.size > 0)
> +  assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ?
> + mt->surf.phys_level0_sa.depth :

Shouldn't we be minifying the depth here?

> + mt->surf.phys_level0_sa.array_len));
> +   else
> +  assert(layer < mt->level[level].depth);
>  }
>  
>  void intel_miptree_reference(struct intel_mipmap_tree **dst,
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/15] i965/miptree: Refactor mapping table alloc

2017-06-14 Thread Nanley Chery
On Wed, Jun 14, 2017 at 09:45:46PM +0300, Pohjolainen, Topi wrote:
> On Tue, Jun 13, 2017 at 04:31:26PM -0700, Nanley Chery wrote:
> > On Tue, Jun 13, 2017 at 05:49:59PM +0300, Topi Pohjolainen wrote:
> > > Signed-off-by: Topi Pohjolainen 
> > > ---
> > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 
> > > +--
> > >  1 file changed, 27 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > index 253d833b13..78a223a7f3 100644
> > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > @@ -285,6 +285,26 @@ 
> > > intel_depth_format_for_depthstencil_format(mesa_format format) {
> > > }
> > >  }
> > >  
> > > +static bool
> > > +create_mapping_table(GLenum target, unsigned first_level, unsigned 
> > > last_level,
> > > + unsigned depth0, struct intel_mipmap_level *table)
> > > +{
> > > +   for (unsigned level = first_level; level <= last_level; level++) {
> > > +  const unsigned d = target == GL_TEXTURE_3D ? depth0 >> level : 
> > > depth0;
> > 
> > There's a bug here. If the target is GL_TEXTURE_3D we should
> > minify(depth0, level) to avoid setting a depth of 0.
> 
> Oops, definitely.
> 
> > 
> > This seems to be more than a refactor. Prior to this patch,
> > brw_miptree_layout_gen6_hiz_stencil wouldn't shrink the number of slices
> > per mipmap level as the level increases, but does so now.
> 
> Right. I actually missed that. How do want to handle that? I could write a
> patch against brw_miptree_layout_gen6_hiz_stencil() doing the same thing there
> (modifying the argument given to intel_miptree_set_level_info() but keeping
> actual allocation size as it was in order to have space for level 0 qpitch).
> 

That should be sufficient.

> > 
> > -Nanley
> > 
> > > +
> > > +  table[level].slice = calloc(d, sizeof(*table[0].slice));
> > > +  if (!table[level].slice)
> > > + goto unwind;
> > > +   }
> > > +
> > > +   return true;
> > > +
> > > +unwind:
> > > +   for (unsigned level = first_level; level <= last_level; level++)
> > > +  free(table[level].slice);
> > > +
> > > +   return false;
> > > +}
> > >  
> > >  /**
> > >   * @param for_bo Indicates that the caller is
> > > @@ -424,6 +444,12 @@ intel_miptree_create_layout(struct brw_context *brw,
> > >}
> > > }
> > >  
> > > +   if (!create_mapping_table(target, first_level, last_level, depth0,
> > > + mt->level)) {
> > > +  free(mt);
> > > +  return NULL;
> > > +   }
> > > +
> > > /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 
> > > can
> > >  * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces 
> > > on
> > >  * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is 
> > > still
> > > @@ -1103,9 +1129,8 @@ intel_miptree_set_level_info(struct 
> > > intel_mipmap_tree *mt,
> > > DBG("%s level %d, depth %d, offset %d,%d\n", __func__,
> > > level, d, x, y);
> > >  
> > > -   assert(mt->level[level].slice == NULL);
> > > +   assert(mt->level[level].slice);
> > >  
> > > -   mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
> > > mt->level[level].slice[0].x_offset = mt->level[level].level_x;
> > > mt->level[level].slice[0].y_offset = mt->level[level].level_y;
> > >  }
> > > -- 
> > > 2.11.0
> > > 
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: Use mov_dpp for derivatives.

2017-06-14 Thread Connor Abbott
I was looking into WQM stuff today, and I realized that LLVM will no
longer mark this instruction as needing WQM, which seems like a
problem. Seems like we need a patch to LLVM. Other uses of DPP (e.g.
for the subgroup reduction stuff) won't want WQM, so I'm not sure
what's the best approach there. If we add an attribute, will LLVM
guarantee that we won't remove it?

On Sat, Jun 10, 2017 at 1:05 PM, Bas Nieuwenhuizen
 wrote:
> Slightly faster than bpermute, and seems supported since at least
> LLVM 3.9.
>
> v2: Since this supersedes bpermute, remove the bpermute code.
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/amd/common/ac_llvm_build.c   | 47 
> 
>  src/amd/common/ac_llvm_build.h   |  2 +-
>  src/amd/common/ac_nir_to_llvm.c  |  8 +++---
>  src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
>  src/gallium/drivers/radeonsi/si_pipe.h   |  2 +-
>  src/gallium/drivers/radeonsi/si_shader.c |  4 +--
>  6 files changed, 38 insertions(+), 27 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 237e9291d41..99d41bf52d6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -783,41 +783,52 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
>   */
>  LLVMValueRef
>  ac_build_ddxy(struct ac_llvm_context *ctx,
> - bool has_ds_bpermute,
> + bool has_mov_dpp,
>   uint32_t mask,
>   int idx,
>   LLVMValueRef lds,
>   LLVMValueRef val)
>  {
> -   LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
> +   LLVMValueRef thread_id, tl, trbl, args[5];
> LLVMValueRef result;
>
> -   thread_id = ac_get_thread_id(ctx);
> +   if (has_mov_dpp) {
> +   uint32_t tl_ctrl = 0, trbl_ctrl = 0;
>
> -   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> - LLVMConstInt(ctx->i32, mask, false), "");
> -
> -   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> -   LLVMConstInt(ctx->i32, idx, false), "");
> +   for (unsigned i = 0; i < 4; ++i) {
> +   tl_ctrl |= (i & mask) << (2 * i);
> +   trbl_ctrl |= ((i & mask) + idx) << (2 * i);
> +   }
>
> -   if (has_ds_bpermute) {
> -   args[0] = LLVMBuildMul(ctx->builder, tl_tid,
> -  LLVMConstInt(ctx->i32, 4, false), "");
> -   args[1] = val;
> +   args[0] = val;
> +   args[1] = LLVMConstInt(ctx->i32, tl_ctrl, false);
> +   args[2] = LLVMConstInt(ctx->i32, 0xf, false);
> +   args[3] = LLVMConstInt(ctx->i32, 0xf, false);
> +   args[4] = LLVMConstInt(ctx->i1, 1, false);
> tl = ac_build_intrinsic(ctx,
> -   "llvm.amdgcn.ds.bpermute", ctx->i32,
> -   args, 2,
> +   "llvm.amdgcn.mov.dpp.i32", ctx->i32,
> +   args, 5,
> AC_FUNC_ATTR_READNONE |
> AC_FUNC_ATTR_CONVERGENT);
>
> -   args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
> -  LLVMConstInt(ctx->i32, 4, false), "");
> +   args[1] = LLVMConstInt(ctx->i32, trbl_ctrl, false);
> trbl = ac_build_intrinsic(ctx,
> - "llvm.amdgcn.ds.bpermute", ctx->i32,
> - args, 2,
> + "llvm.amdgcn.mov.dpp.i32", ctx->i32,
> + args, 5,
>   AC_FUNC_ATTR_READNONE |
>   AC_FUNC_ATTR_CONVERGENT);
> } else {
> +   LLVMValueRef tl_tid, trbl_tid;
> +
> +   thread_id = ac_get_thread_id(ctx);
> +
> +   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> +   LLVMConstInt(ctx->i32, mask, false), "");
> +
> +   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> +   LLVMConstInt(ctx->i32, idx, false), 
> "");
> +
> +
> LLVMValueRef store_ptr, load_ptr0, load_ptr1;
>
> store_ptr = ac_build_gep0(ctx, lds, thread_id);
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index ebb78fbd79b..14260b05018 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -161,7 +161,7 @@ ac_get_thread_id(struct ac_llvm_context *ctx);
>
>  LLVMValueRef
>  ac_build_ddxy(struct ac_llvm_context *ctx,
> - bool has_ds_bpermute,
> + bool has_mov_dpp,
>   uint32_t mask,
>   int idx,
> 

Re: [Mesa-dev] [PATCH 2/5] st/mesa: remove redundant sample_mask checking

2017-06-14 Thread Samuel Pitoiset



On 06/14/2017 11:41 PM, Marek Olšák wrote:

From: Marek Olšák 

cso does that too
---
  src/mesa/state_tracker/st_atom_msaa.c | 7 +--
  src/mesa/state_tracker/st_context.h   | 1 -
  2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_msaa.c 
b/src/mesa/state_tracker/st_atom_msaa.c
index 0bdb9b2..814077f 100644
--- a/src/mesa/state_tracker/st_atom_msaa.c
+++ b/src/mesa/state_tracker/st_atom_msaa.c
@@ -55,26 +55,21 @@ void st_update_sample_mask( struct st_context *st )
  Also, there's an interface restriction here in theory it is
  encouraged this mask not be the same at each pixel. */
   sample_mask = (1 << nr_bits) - 1;
   if (st->ctx->Multisample.SampleCoverageInvert)
  sample_mask = ~sample_mask;
}
if (st->ctx->Multisample.SampleMask)
   sample_mask &= st->ctx->Multisample.SampleMaskValue;
 }
  
-   /* mask off unused bits or don't care? */

-
-   if (sample_mask != st->state.sample_mask) {
-  st->state.sample_mask = sample_mask;
-  cso_set_sample_mask(st->cso_context, sample_mask);
-   }
+   cso_set_sample_mask(st->cso_context, sample_mask);


Nice one! Sooo, we have three similar checks, st/mesa, cso and radeonsi, 
fun times. :)



  }
  
  void st_update_sample_shading( struct st_context *st )

  {
 if (!st->fp)
return;
  
 if (!st->ctx->Extensions.ARB_sample_shading)

return;
  
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h

index 6497587..2fe9d92 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -149,21 +149,20 @@ struct st_context
unsigned fb_height;
unsigned fb_num_samples;
unsigned fb_num_layers;
struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS];
struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS];
struct {
   unsigned num;
   boolean include;
   struct pipe_scissor_state rects[PIPE_MAX_WINDOW_RECTANGLES];
} window_rects;
-  unsigned sample_mask;
  
GLuint poly_stipple[32];  /**< In OpenGL's bottom-to-top order */
  
GLuint fb_orientation;

 } state;
  
 uint64_t dirty; /**< dirty states */
  
 /** This masks out unused shader resources. Only valid in draw calls. */

 uint64_t active_states;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/11] i965: Use blorp for depth/stencil clears on gen6+

2017-06-14 Thread Jason Ekstrand
On Wed, Jun 14, 2017 at 12:00 PM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Tue, Jun 06, 2017 at 10:00:06PM -0700, Jason Ekstrand wrote:
> > ---
> >  src/mesa/drivers/dri/i965/brw_blorp.c | 106
> ++
> >  src/mesa/drivers/dri/i965/brw_blorp.h |   4 ++
> >  src/mesa/drivers/dri/i965/brw_clear.c |   6 ++
> >  3 files changed, 116 insertions(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> > index 38925d9..a46b624 100644
> > --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> > @@ -930,6 +930,112 @@ brw_blorp_clear_color(struct brw_context *brw,
> struct gl_framebuffer *fb,
> >  }
> >
> >  void
> > +brw_blorp_clear_depth_stencil(struct brw_context *brw,
> > +  struct gl_framebuffer *fb,
> > +  GLbitfield mask, bool partial_clear)
> > +{
> > +   const struct gl_context *ctx = >ctx;
> > +   struct gl_renderbuffer *depth_rb =
> > +  fb->Attachment[BUFFER_DEPTH].Renderbuffer;
> > +   struct gl_renderbuffer *stencil_rb =
> > +  fb->Attachment[BUFFER_STENCIL].Renderbuffer;
> > +
> > +   if (!depth_rb || ctx->Depth.Mask == GL_FALSE)
> > +  mask &= ~BUFFER_BIT_DEPTH;
> > +
> > +   if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0)
> > +  mask &= ~BUFFER_BIT_STENCIL;
> > +
> > +   if (!(mask & (BUFFER_BITS_DEPTH_STENCIL)))
> > +  return;
> > +
> > +   uint32_t x0, x1, y0, y1, rb_name, rb_height;
> > +   if (depth_rb) {
> > +  rb_name = depth_rb->Name;
> > +  rb_height = depth_rb->Height;
> > +  if (stencil_rb) {
> > + assert(depth_rb->Width == stencil_rb->Width);
> > + assert(depth_rb->Height == stencil_rb->Height);
> > +  }
> > +   } else {
> > +  assert(stencil_rb);
> > +  rb_name = stencil_rb->Name;
> > +  rb_height = stencil_rb->Height;
> > +   }
> > +
> > +   x0 = fb->_Xmin;
> > +   x1 = fb->_Xmax;
> > +   if (rb_name != 0) {
> > +  y0 = fb->_Ymin;
> > +  y1 = fb->_Ymax;
> > +   } else {
> > +  y0 = rb_height - fb->_Ymax;
> > +  y1 = rb_height - fb->_Ymin;
> > +   }
> > +
> > +   /* If the clear region is empty, just return. */
> > +   if (x0 == x1 || y0 == y1)
> > +  return;
> > +
> > +   unsigned level, layer, num_layers;
> > +   struct isl_surf isl_tmp[4];
> > +   struct blorp_surf depth_surf, stencil_surf;
> > +
> > +   if (mask & BUFFER_BIT_DEPTH) {
> > +  struct intel_renderbuffer *irb = intel_renderbuffer(depth_rb);
> > +  struct intel_mipmap_tree *depth_mt =
> > + find_miptree(GL_DEPTH_BUFFER_BIT, irb);
> > +
> > +  level = irb->mt_level;
> > +  layer = irb_logical_mt_layer(irb);
> > +  num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
> > +
> > +  intel_miptree_set_all_slices_need_depth_resolve(depth_mt, level);
> > +
> > +  unsigned depth_level = level;
> > +  blorp_surf_for_miptree(brw, _surf, depth_mt, true,
> > + (1 << ISL_AUX_USAGE_HIZ),
> > + _level, layer, num_layers,
> _tmp[0]);
> > +  assert(depth_level == level);
> > +   }
> > +
> > +   uint8_t stencil_mask = 0;
> > +   if (mask & BUFFER_BIT_STENCIL) {
> > +  struct intel_renderbuffer *irb = intel_renderbuffer(stencil_rb);
> > +  struct intel_mipmap_tree *stencil_mt =
> > + find_miptree(GL_STENCIL_BUFFER_BIT, irb);
> > +
> > +  if (mask & BUFFER_BIT_DEPTH) {
> > + assert(level == irb->mt_level);
> > + assert(layer == irb_logical_mt_layer(irb));
> > + assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1);
> > +  } else {
> > + level = irb->mt_level;
> > + layer = irb_logical_mt_layer(irb);
> > + num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
> > +  }
> > +
> > +  stencil_mask = ctx->Stencil.WriteMask[0] & 0xff;
> > +
> > +  unsigned stencil_level = level;
> > +  blorp_surf_for_miptree(brw, _surf, stencil_mt, true,
> > + (1 << ISL_AUX_USAGE_HIZ),
>
> Why do we set hiz for stencil?
>
> I noticed that anv_blorp.c::anv_CmdClearDepthStencilImage() sets it to
> NONE
> for depth and stencil while get_blorp_surf_for_anv_image() has code to take
> the HIZ usage away for stencil (if given).
>

No reason.  I'm happy to make it 0 for no aux support on stencil.


> Otherwise looks good to me:
>
> Reviewed-by: Topi Pohjolainen 
>

Thanks!


> > + _level, layer, num_layers,
> _tmp[2]);
> > +   }
> > +
> > +   assert((mask & BUFFER_BIT_DEPTH) || stencil_mask);
> > +
> > +   struct blorp_batch batch;
> > +   blorp_batch_init(>blorp, , brw, 0);
> > +   blorp_clear_depth_stencil(, _surf, _surf,
> > + level, layer, num_layers,
> > + x0, y0, x1, y1,
> > + (mask & BUFFER_BIT_DEPTH),
> ctx->Depth.Clear,
> > 

[Mesa-dev] [PATCH] radeonsi: remove useless check in si_set_min_samples()

2017-06-14 Thread Samuel Pitoiset
CSO already takes care of this.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_state.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 1cd1f9190e0..facbc87e310 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2978,9 +2978,6 @@ static void si_set_min_samples(struct pipe_context *ctx, 
unsigned min_samples)
 {
struct si_context *sctx = (struct si_context *)ctx;
 
-   if (sctx->ps_iter_samples == min_samples)
-   return;
-
sctx->ps_iter_samples = min_samples;
sctx->do_update_shaders = true;
 
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] i965: Replace open-coded gen6 queryobj offsets with simple helpers

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:36 AM PDT Chris Wilson wrote:
> Lots of places open-coded the assumed layout of the predicate/results
> within the query object, replace those with simple helpers.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_conditional_render.c |  4 ++--
>  src/mesa/drivers/dri/i965/brw_context.h| 14 ++
>  src/mesa/drivers/dri/i965/gen6_queryobj.c  |  6 +++---
>  src/mesa/drivers/dri/i965/hsw_queryobj.c   | 18 +-
>  4 files changed, 28 insertions(+), 14 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c 
> b/src/mesa/drivers/dri/i965/brw_conditional_render.c
> index 046a42b5f5..197c35efe2 100644
> --- a/src/mesa/drivers/dri/i965/brw_conditional_render.c
> +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
> @@ -66,13 +66,13 @@ set_predicate_for_occlusion_query(struct brw_context *brw,
> query->bo,
> I915_GEM_DOMAIN_INSTRUCTION,
> 0, /* write domain */
> -   0 /* offset */);
> +   gen6_query_results_offset(query, 0));
> brw_load_register_mem64(brw,
> MI_PREDICATE_SRC1,
> query->bo,
> I915_GEM_DOMAIN_INSTRUCTION,
> 0, /* write domain */
> -   8 /* offset */);
> +   gen6_query_results_offset(query, 1));
>  }
>  
>  static void
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index d1503312d4..c5acb83ad0 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -427,6 +427,20 @@ struct brw_query_object {
> bool flushed;
>  };
>  
> +#define GEN6_QUERY_PREDICATE (2)
> +#define GEN6_QUERY_RESULTS (0)
> +
> +static inline unsigned gen6_query_predicate_offset(const struct 
> brw_query_object *query)
> +{
> +   return GEN6_QUERY_PREDICATE * sizeof(uint64_t);
> +}
> +
> +static inline unsigned gen6_query_results_offset(const struct 
> brw_query_object *query,
> +unsigned idx)
> +{
> +   return (GEN6_QUERY_RESULTS + idx) * sizeof(uint64_t);
> +}
> +
>  enum brw_gpu_ring {
> UNKNOWN_RING,
> RENDER_RING,
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
> b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index cc0f6f0b77..f913f986ae 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -71,7 +71,7 @@ set_query_availability(struct brw_context *brw, struct 
> brw_query_object *query,
>}
>  
>brw_emit_pipe_control_write(brw, flags,
> -  query->bo, 2 * sizeof(uint64_t),
> +  query->bo, 
> gen6_query_predicate_offset(query),
>available, 0);
> }
>  }
> @@ -318,7 +318,7 @@ gen6_begin_query(struct gl_context *ctx, struct 
> gl_query_object *q)
>  {
> struct brw_context *brw = brw_context(ctx);
> struct brw_query_object *query = (struct brw_query_object *)q;
> -   const int idx = 0;
> +   const int idx = GEN6_QUERY_RESULTS;
>  
> /* Since we're starting a new query, we need to throw away old results. */
> brw_bo_unreference(query->bo);
> @@ -407,7 +407,7 @@ gen6_end_query(struct gl_context *ctx, struct 
> gl_query_object *q)
>  {
> struct brw_context *brw = brw_context(ctx);
> struct brw_query_object *query = (struct brw_query_object *)q;
> -   const int idx = 1;
> +   const int idx = GEN6_QUERY_RESULTS + 1;
>  
> switch (query->Base.Target) {
> case GL_TIME_ELAPSED:
> diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c 
> b/src/mesa/drivers/dri/i965/hsw_queryobj.c
> index b81ab3b6f8..cb1a2df52d 100644
> --- a/src/mesa/drivers/dri/i965/hsw_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c
> @@ -191,7 +191,7 @@ load_overflow_data_to_cs_gprs(struct brw_context *brw,
>struct brw_query_object *query,
>int idx)
>  {
> -   int offset = idx * sizeof(uint64_t) * 4;
> +   int offset = gen6_query_results_offset(query, 0) + idx * sizeof(uint64_t) 
> * 4;

FWIW, I'm pretty sure 4 here is BRW_MAX_XFB_STREAMS.

I personally don't think that the code is more readable after patches
4-5, but I suppose that's a matter of taste.  I'd be inclined to leave the
code with hardcoded offsets, but add a comment to the top of the file
describing the layout (I thought we had one already, but it looks like we
don't).

That said, the patches look correct to me, so if someone else wants to
chime in and say that they prefer this style, I'm okay with them.

>  
> 

Re: [Mesa-dev] [PATCH 07/13] anv/blorp: Remove 3D subresource transition workaround

2017-06-14 Thread Nanley Chery
On Wed, Jun 14, 2017 at 09:32:22AM +0200, Iago Toral wrote:
> On Tue, 2017-06-13 at 11:41 -0700, Nanley Chery wrote:
> > For 3D image subresources undergoing a layout transition via
> > PipelineBarrier, we increase the number of fast-cleared layers to
> > match
> > the intended behaviour of KHR_maintenance1. When such subresources
> > undergo layout transitions between subpasses, we don't do this to
> > avoid
> > failing incorrect CTS tests. Instead, unify the behaviour in both
> > scenarios, and wait for the CTS tests to catch up. See CL  for
> > the
> > test fix.
> > 
> > On SKL+, this causes 3 test failures under:
> > dEQP-VK.pipeline.render_to_image.3d.*
> > 
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_blorp.c | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/intel/vulkan/anv_blorp.c
> > b/src/intel/vulkan/anv_blorp.c
> > index 421f860428..ff3d7b126f 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -1478,12 +1478,12 @@ anv_image_ccs_clear(struct anv_cmd_buffer
> > *cmd_buffer,
> >  
> >    /* Blorp likes to treat 2D_ARRAY and 3D the same. */
> >    uint32_t blorp_base_layer, blorp_layer_count;
> > -  if (view) {
> > - blorp_base_layer = view->base_array_layer;
> > - blorp_layer_count = view->array_len;
> > -  } else if (image->type == VK_IMAGE_TYPE_3D) {
> 
> Maybe add a comment referencing the requirement from
> VK_KHR_maintenance1 so it is clear why we ignore the view for 3D images
> here?
> 

Thank you for suggesting I add a comment. I actually meant to
double-check this before sending it out, but forgot. In the process of
writing the comment, I discovered that the desired behaviour for this
part of the extension is still being determined (Vulkan issue #849).

> > +  if (image->type == VK_IMAGE_TYPE_3D) {
> >   blorp_base_layer = 0;
> >   blorp_layer_count = extent.depth;
> > +  } else if (view) {
> > + blorp_base_layer = view->base_array_layer;
> > + blorp_layer_count = view->array_len;
> >    } else {
> >   blorp_base_layer = subresourceRange->baseArrayLayer;
> >   blorp_layer_count = anv_get_layerCount(image,
> > subresourceRange);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Jose Fonseca

On 14/06/17 22:16, Brian Paul wrote:

On 06/14/2017 02:38 PM, Jose Fonseca wrote:

On 14/06/17 21:21, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonseca 
wrote:

On 14/06/17 21:07, Marek Olšák wrote:


On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca 
wrote:


On 14/06/17 17:12, Marek Olšák wrote:



On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák 
wrote:



On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
wrote:



On 12/06/17 22:56, Marek Olšák wrote:




On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca

wrote:




On 12/06/17 21:25, Marek Olšák wrote:





On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca

wrote:





How does this help exactly?

Are applications actually rendering to the same FBO w/ and w/o
SRGB
decoding?

Or is the problem here GL_SRGB_WRITE state getting spuriously
dirtied
by
the
application?

And even if they do, why is toggling surface views in
framebuffer
state
so
expensive?

I don't object per se, but it looks like an unusual thing to
optimize
for.



set_framebuffer_state is basically a memory barrier. We have
different
caches between FB and textures and we have to flush them when a
texture is unbound from the framebuffer and set as a sampler
view.
To
keep thing simple, set_framebuffer_state is the barrier. 
When we

change the blend state, the barrier is avoided. Note that the
barrier
makes set_framebuffer_state a function that is always 
GPU-bound.






I see.

And you're sure that the incoming set_framebuffer_state are not
spurious?

I know cso_context always eliminates redundant
pipe_context::set_framebuffer_state calls, but it is perhaps
possible
that
Mesa state tracker is reseting the framebuffer state with
different
surface
views, but that in practice are exactly the same as the previous
one?

Like I said, it seems odd apps are doing this: it doesn't make
much
sense
to
me to change colorspace of the fragments between draws.
(Unless some
of
the
assets are already in SRGB and the app is trying to be too
smart for
its
own
good to avoid the sRGB->RGB->sRGB.)  It seems much more likely
that
these
framebuffer state changes are self-inflicted some where in our
stack,
than
something truly demanded by the app.

And if that's the case and we can fix it, then it would be a
better
solution
all around.





Yeah the funny part and the reason is that we have a
microbenchmark
in
piglit (drawoverhead) changing this state between draw calls. :)

Marek



I couldn't find that piglit microbenchmark.  mesademos has
src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if
fbo
is
changing internally, then it's a perf bug in Mesa state tracker.

Unless it's mimicking something that real apps do, then it's
probably
better
to fix the microbenchmark to use a more realistic tests.




If you build piglit, it's in bin/drawoverhead.

You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
rather artificial and fairly unlikely to occur with real apps.




FYI, I'm dropping this series and I don't have it in my repo 
anymore.

piglit/drawoverhead will be updated not to test this state change.

Marek




Great.

BTW, I'm not sure what's a good state to change in such
microbenchmark.

There is of course, a myriad of states to pick, but they are not
all the
same: performance can vary wildly depending on the choice.   I'm
not sure
what's a good representative state change in such circumstances
Perhaps
toggling between two texture objects? Or some sampler state?



If you've ever run the microbenchmark, you know there are plenty of
state changes tested. I think there are like 15 state changes tested
in about 60 subtests at the moment. I'm adding more tests into it.
Currently I have 100 subtests in there locally. At the moment the
missing subtests are mostly just shader resources: immutable textures
(mutable textures i.e. not TexStorage-based are already tested), TBOs,
images, image buffers, SSBOs (maybe), atomic counters (maybe). The
methodology is 1 state change followed by 1 draw call in a loop,
measuring the number of draw calls per second for that case, and
comparing with the baseline draw rate (which is without the state
change).

Marek



I just ran it.  Pretty neat!  I didn't know we were adding 
benchmarks to

piglit.


That's because piglit has a very convenient window system integration
framework that I refuse to re-invent elsewhere.


Ah, makes sense.


Which reminds me: do people think we should transition mesademos off
glut to glfw or waffle? Or do you think we should just strive to migrate
the stuff there to piglit?


I'm not sure I see a need.  Does anyone use the Mesa demos for 
benchmarking anymore?


I wasn't thinking of benchmarking per se, but just being able to run any 
of the Mesa demos directly on Wayland (ie, EGL as oposed to GLUT+X11).


And in general, many/most of the Mesa demos have some interactive aspect 
to them (key presses or mouse 

Re: [Mesa-dev] [PATCH 2/9] i965: Check last known busy status on bo before asking the kernel

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:33 AM PDT Chris Wilson wrote:
> If we know the bo is idle (that is we have no submitted a command buffer
> referencing this bo since the last query) we can skip asking the kernel.
> Note this may report a false negative if the target is being shared
> between processes (exported via dmabuf or flink). To allow the caller
> control over using the last known flag, the query is split into two.

I'm not crazy about exposing __brw_bo_busy and brw_bo_busy, with slightly
different semantics.  Why not just make brw_bo_busy do:

   if (bo->idle && bo->reusable)
  return false;

   /* otherwise query the kernel */

These days, it appears that bo->reusable is false for any buffers that
have been imported/exported via dmabuf or flink, and true otherwise.
(We might want to rename it to bo->foreign or such.)

With that change, brw_bo_busy should bypass the ioctl for most BOs, but
would still work for foreign BOs, without the caller having to worry
about it.

> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 +
>  src/mesa/drivers/dri/i965/brw_bufmgr.h | 33 ++---
>  2 files changed, 35 insertions(+), 15 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 67c15878d0..01590a0b0a 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -194,21 +194,14 @@ brw_bo_reference(struct brw_bo *bo)
>  }
>  
>  int
> -brw_bo_busy(struct brw_bo *bo)
> +__brw_bo_busy(struct brw_bo *bo)
>  {
> -   struct brw_bufmgr *bufmgr = bo->bufmgr;
> -   struct drm_i915_gem_busy busy;
> -   int ret;
> +   struct drm_i915_gem_busy busy = { bo->gem_handle };
>  
> -   memclear(busy);
> -   busy.handle = bo->gem_handle;
> +   drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, );
>  
> -   ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, );
> -   if (ret == 0) {
> -  bo->idle = !busy.busy;
> -  return busy.busy;
> -   }
> -   return false;
> +   bo->idle = !busy.busy;
> +   return busy.busy;
>  }
>  
>  int
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 70cc2bbc6c..3a397be695 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -240,11 +240,38 @@ int brw_bo_get_tiling(struct brw_bo *bo, uint32_t 
> *tiling_mode,
>   */
>  int brw_bo_flink(struct brw_bo *bo, uint32_t *name);
>  
> +int __brw_bo_busy(struct brw_bo *bo);
> +
>  /**
> - * Returns 1 if mapping the buffer for write could cause the process
> - * to block, due to the object being active in the GPU.
> + * Returns 0 if mapping the buffer is not in active use by the gpu.
> + * If non-zero, any mapping for for write could cause the process
> + * to block, due to the object being active in the GPU. If the lower
> + * 16 bits are zero, then we can map for read without stalling.
> + *
> + * The last-known busy status of the brw_bo is checked first. This may be
> + * stale if the brw_bo has been exported to a foriegn process. If used on an
> + * exported bo, call __brw_bo_busy() directly to bypass the local check.
>   */
> -int brw_bo_busy(struct brw_bo *bo);
> +static inline int brw_bo_busy(struct brw_bo *bo)
> +{
> +   if (bo->idle) /* Note this may be stale if the bo is exported */
> +  return 0;
> +
> +   return __brw_bo_busy(bo);
> +}

I'd rather keep this as a boolean result, rather than an integer with
certain bits having particular meanings.  Bonus points for changing the
return type to "bool".

> +
> +/**
> + * Returns true if mapping the buffer for read will cause the process to
> + * block (i.e. the buffer is still being writen). Note that when it
> + * returns false, the buffer may still be concurrently read by the GPU.
> + */
> +static inline int brw_bo_write_busy(struct brw_bo *bo)
> +{
> +   if (bo->idle) /* Note this may be stale if the bo is exported */
> +  return 0;
> +
> +   return __brw_bo_busy(bo) & 0x;
> +}
>  
>  /**
>   * Specify the volatility of the buffer.

This seems like a nice helper.

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] st/mesa: simplify st_update_viewport

2017-06-14 Thread Marek Olšák
Also adding this:

diff --git a/src/mesa/state_tracker/st_atom_viewport.c
b/src/mesa/state_tracker/st_atom_viewport.c
index b434f0d..d01836f 100644
--- a/src/mesa/state_tracker/st_atom_viewport.c
+++ b/src/mesa/state_tracker/st_atom_viewport.c
@@ -54,9 +54,10 @@ st_update_viewport( struct st_context *st )
   _mesa_get_viewport_xform(ctx, i, scale, translate);

   /* _NEW_BUFFERS */
+  /* Drawing to a window where the coordinate system is upside down. */
   if (st->state.fb_orientation == Y_0_TOP) {
  scale[1] *= -1;
- translate[1] = translate[1] * -1 + st->state.fb_height;
+ translate[1] = st->state.fb_height - translate[1];
   }
}


Marek

On Wed, Jun 14, 2017 at 11:41 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> ---
>  src/mesa/state_tracker/st_atom_viewport.c | 34 
> ---
>  1 file changed, 9 insertions(+), 25 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_atom_viewport.c 
> b/src/mesa/state_tracker/st_atom_viewport.c
> index 1fc8908..9a9d570 100644
> --- a/src/mesa/state_tracker/st_atom_viewport.c
> +++ b/src/mesa/state_tracker/st_atom_viewport.c
> @@ -36,47 +36,31 @@
>  /**
>   * Update the viewport transformation matrix.  Depends on:
>   *  - viewport pos/size
>   *  - depthrange
>   *  - window pos/size or FBO size
>   */
>  void
>  st_update_viewport( struct st_context *st )
>  {
> struct gl_context *ctx = st->ctx;
> -   GLfloat yScale, yBias;
> unsigned i;
> -   /* _NEW_BUFFERS
> -*/
> -   if (st->state.fb_orientation == Y_0_TOP) {
> -  /* Drawing to a window.  The corresponding gallium surface uses
> -   * Y=0=TOP but OpenGL is Y=0=BOTTOM.  So we need to invert the 
> viewport.
> -   */
> -  yScale = -1;
> -  yBias = (GLfloat)ctx->DrawBuffer->Height;
> -   }
> -   else {
> -  /* Drawing to an FBO where Y=0=BOTTOM, like OpenGL - don't invert */
> -  yScale = 1.0;
> -  yBias = 0.0;
> -   }
>
> /* _NEW_VIEWPORT
>  */
> -   for (i = 0; i < ctx->Const.MaxViewports; i++)
> -   {
> -  float scale[3], translate[3];
> -  _mesa_get_viewport_xform(ctx, i, scale, translate);
> +   for (i = 0; i < ctx->Const.MaxViewports; i++) {
> +  float *scale = st->state.viewport[i].scale;
> +  float *translate = st->state.viewport[i].translate;
>
> -  st->state.viewport[i].scale[0] = scale[0];
> -  st->state.viewport[i].scale[1] = scale[1] * yScale;
> -  st->state.viewport[i].scale[2] = scale[2];
> +  _mesa_get_viewport_xform(ctx, i, scale, translate);
>
> -  st->state.viewport[i].translate[0] = translate[0];
> -  st->state.viewport[i].translate[1] = translate[1] * yScale + yBias;
> -  st->state.viewport[i].translate[2] = translate[2];
> +  /* _NEW_BUFFERS */
> +  if (st->state.fb_orientation == Y_0_TOP) {
> + scale[1] *= -1;
> + translate[1] = translate[1] * -1 + st->state.fb_height;
> +  }
> }
>
> cso_set_viewport(st->cso_context, >state.viewport[0]);
> if (ctx->Const.MaxViewports > 1)
>st->pipe->set_viewport_states(st->pipe, 1, ctx->Const.MaxViewports - 
> 1, >state.viewport[1]);
>  }
> --
> 2.7.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: don't call _mesa_update_clip_plane in the GL core profile

2017-06-14 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Wed, Jun 14, 2017 at 5:37 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> It uses the projection matrix to transform the clip plane.
> ---
>  src/mesa/main/enable.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
> index 0324170..0f7cdcd 100644
> --- a/src/mesa/main/enable.c
> +++ b/src/mesa/main/enable.c
> @@ -352,21 +352,22 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
> GLboolean state)
>  if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES ||
>  !ctx->DriverFlags.NewClipPlaneEnable) {
> FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
>  } else {
> FLUSH_VERTICES(ctx, 0);
>  }
>  ctx->NewDriverState |= ctx->DriverFlags.NewClipPlaneEnable;
>
>  if (state) {
> ctx->Transform.ClipPlanesEnabled |= (1 << p);
> -   _mesa_update_clip_plane(ctx, p);
> +   if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES)
> +  _mesa_update_clip_plane(ctx, p);
>  }
>  else {
> ctx->Transform.ClipPlanesEnabled &= ~(1 << p);
>  }
>   }
>   break;
>case GL_COLOR_MATERIAL:
>   if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
>  goto invalid_enum_error;
>   if (ctx->Light.ColorMaterialEnabled == state)
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] st/mesa: use precomputed st_fb_orientation

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

---
 src/mesa/state_tracker/st_atom_list.h   | 6 +++---
 src/mesa/state_tracker/st_atom_rasterizer.c | 6 +++---
 src/mesa/state_tracker/st_atom_scissor.c| 2 +-
 src/mesa/state_tracker/st_atom_viewport.c   | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_list.h 
b/src/mesa/state_tracker/st_atom_list.h
index 81a9308..b76854e 100644
--- a/src/mesa/state_tracker/st_atom_list.h
+++ b/src/mesa/state_tracker/st_atom_list.h
@@ -1,24 +1,21 @@
 /* Render (non-compute) states must be first. */
 ST_STATE(ST_NEW_DSA, st_update_depth_stencil_alpha)
 ST_STATE(ST_NEW_CLIP_STATE, st_update_clip)
 
 ST_STATE(ST_NEW_FS_STATE, st_update_fp)
 ST_STATE(ST_NEW_GS_STATE, st_update_gp)
 ST_STATE(ST_NEW_TES_STATE, st_update_tep)
 ST_STATE(ST_NEW_TCS_STATE, st_update_tcp)
 ST_STATE(ST_NEW_VS_STATE, st_update_vp)
 
-ST_STATE(ST_NEW_RASTERIZER, st_update_rasterizer)
 ST_STATE(ST_NEW_POLY_STIPPLE, st_update_polygon_stipple)
-ST_STATE(ST_NEW_VIEWPORT, st_update_viewport)
-ST_STATE(ST_NEW_SCISSOR, st_update_scissor)
 ST_STATE(ST_NEW_WINDOW_RECTANGLES, st_update_window_rectangles)
 ST_STATE(ST_NEW_BLEND, st_update_blend)
 ST_STATE(ST_NEW_BLEND_COLOR, st_update_blend_color)
 
 ST_STATE(ST_NEW_VS_SAMPLER_VIEWS, st_update_vertex_textures)
 ST_STATE(ST_NEW_FS_SAMPLER_VIEWS, st_update_fragment_textures)
 ST_STATE(ST_NEW_GS_SAMPLER_VIEWS, st_update_geometry_textures)
 ST_STATE(ST_NEW_TCS_SAMPLER_VIEWS, st_update_tessctrl_textures)
 ST_STATE(ST_NEW_TES_SAMPLER_VIEWS, st_update_tesseval_textures)
 
@@ -29,22 +26,25 @@ ST_STATE(ST_NEW_TES_SAMPLERS, st_update_tesseval_samplers) 
/* depends on update_
 ST_STATE(ST_NEW_GS_SAMPLERS, st_update_geometry_samplers) /* depends on 
update_*_texture for swizzle */
 ST_STATE(ST_NEW_FS_SAMPLERS, st_update_fragment_samplers) /* depends on 
update_*_texture for swizzle */
 
 ST_STATE(ST_NEW_VS_IMAGES, st_bind_vs_images)
 ST_STATE(ST_NEW_TCS_IMAGES, st_bind_tcs_images)
 ST_STATE(ST_NEW_TES_IMAGES, st_bind_tes_images)
 ST_STATE(ST_NEW_GS_IMAGES, st_bind_gs_images)
 ST_STATE(ST_NEW_FS_IMAGES, st_bind_fs_images)
 
 ST_STATE(ST_NEW_FB_STATE, st_update_framebuffer_state) /* depends on 
update_*_texture and bind_*_images */
+ST_STATE(ST_NEW_RASTERIZER, st_update_rasterizer) /* depends on 
update_framebuffer_state */
 ST_STATE(ST_NEW_SAMPLE_MASK, st_update_sample_mask) /* depends on 
update_framebuffer_state */
 ST_STATE(ST_NEW_SAMPLE_SHADING, st_update_sample_shading)
+ST_STATE(ST_NEW_SCISSOR, st_update_scissor) /* depends on 
update_framebuffer_state */
+ST_STATE(ST_NEW_VIEWPORT, st_update_viewport) /* depends on 
update_framebuffer_state */
 
 ST_STATE(ST_NEW_VS_CONSTANTS, st_update_vs_constants)
 ST_STATE(ST_NEW_TCS_CONSTANTS, st_update_tcs_constants)
 ST_STATE(ST_NEW_TES_CONSTANTS, st_update_tes_constants)
 ST_STATE(ST_NEW_GS_CONSTANTS, st_update_gs_constants)
 ST_STATE(ST_NEW_FS_CONSTANTS, st_update_fs_constants)
 
 ST_STATE(ST_NEW_VS_UBOS, st_bind_vs_ubos)
 ST_STATE(ST_NEW_TCS_UBOS, st_bind_tcs_ubos)
 ST_STATE(ST_NEW_TES_UBOS, st_bind_tes_ubos)
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c 
b/src/mesa/state_tracker/st_atom_rasterizer.c
index 6e17562..e388960 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -79,21 +79,21 @@ void st_update_rasterizer( struct st_context *st )
   if (ctx->Transform.ClipOrigin == GL_UPPER_LEFT) {
  raster->front_ccw ^= 1;
   }
 
   /*
* Gallium's surfaces are Y=0=TOP orientation.  OpenGL is the
* opposite.  Window system surfaces are Y=0=TOP.  Mesa's FBOs
* must match OpenGL conventions so FBOs use Y=0=BOTTOM.  In that
* case, we must invert Y and flip the notion of front vs. back.
*/
-  if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
+  if (st->state.fb_orientation == Y_0_BOTTOM) {
  /* Drawing to an FBO.  The viewport will be inverted. */
  raster->front_ccw ^= 1;
   }
}
 
/* _NEW_LIGHT
 */
raster->flatshade = ctx->Light.ShadeModel == GL_FLAT;
   
raster->flatshade_first = ctx->Light.ProvokingVertex ==
@@ -167,21 +167,21 @@ void st_update_rasterizer( struct st_context *st )
/* _NEW_POINT
 */
raster->point_size = ctx->Point.Size;
raster->point_smooth = !ctx->Point.PointSprite && ctx->Point.SmoothFlag;
 
/* _NEW_POINT | _NEW_PROGRAM
 */
if (ctx->Point.PointSprite) {
   /* origin */
   if ((ctx->Point.SpriteOrigin == GL_UPPER_LEFT) ^
-  (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM))
+  (st->state.fb_orientation == Y_0_BOTTOM))
  raster->sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
   else 
  raster->sprite_coord_mode = PIPE_SPRITE_COORD_LOWER_LEFT;
 
   /* Coord replacement flags.  If bit 'k' is set that means
* that we need to replace GENERIC[k] attrib with an automatically
* computed texture 

[Mesa-dev] [PATCH 2/5] st/mesa: remove redundant sample_mask checking

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

cso does that too
---
 src/mesa/state_tracker/st_atom_msaa.c | 7 +--
 src/mesa/state_tracker/st_context.h   | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_msaa.c 
b/src/mesa/state_tracker/st_atom_msaa.c
index 0bdb9b2..814077f 100644
--- a/src/mesa/state_tracker/st_atom_msaa.c
+++ b/src/mesa/state_tracker/st_atom_msaa.c
@@ -55,26 +55,21 @@ void st_update_sample_mask( struct st_context *st )
 Also, there's an interface restriction here in theory it is
 encouraged this mask not be the same at each pixel. */
  sample_mask = (1 << nr_bits) - 1;
  if (st->ctx->Multisample.SampleCoverageInvert)
 sample_mask = ~sample_mask;
   }
   if (st->ctx->Multisample.SampleMask)
  sample_mask &= st->ctx->Multisample.SampleMaskValue;
}
 
-   /* mask off unused bits or don't care? */
-
-   if (sample_mask != st->state.sample_mask) {
-  st->state.sample_mask = sample_mask;
-  cso_set_sample_mask(st->cso_context, sample_mask);
-   }
+   cso_set_sample_mask(st->cso_context, sample_mask);
 }
 
 void st_update_sample_shading( struct st_context *st )
 {
if (!st->fp)
   return;
 
if (!st->ctx->Extensions.ARB_sample_shading)
   return;
 
diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 6497587..2fe9d92 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -149,21 +149,20 @@ struct st_context
   unsigned fb_height;
   unsigned fb_num_samples;
   unsigned fb_num_layers;
   struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS];
   struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS];
   struct {
  unsigned num;
  boolean include;
  struct pipe_scissor_state rects[PIPE_MAX_WINDOW_RECTANGLES];
   } window_rects;
-  unsigned sample_mask;
 
   GLuint poly_stipple[32];  /**< In OpenGL's bottom-to-top order */
 
   GLuint fb_orientation;
} state;
 
uint64_t dirty; /**< dirty states */
 
/** This masks out unused shader resources. Only valid in draw calls. */
uint64_t active_states;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] st/mesa: fix pipe_rasterizer_state::scissor with multiple viewports

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

Cc: 17.1 
---
 src/mesa/state_tracker/st_atom_rasterizer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c 
b/src/mesa/state_tracker/st_atom_rasterizer.c
index e388960..39be6b1 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -255,21 +255,21 @@ void st_update_rasterizer( struct st_context *st )
 
/* _NEW_MULTISAMPLE | _NEW_BUFFERS */
raster->force_persample_interp =
  !st->force_persample_in_shader &&
  raster->multisample &&
  ctx->Multisample.SampleShading &&
  ctx->Multisample.MinSampleShadingValue *
  _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
/* _NEW_SCISSOR */
-   raster->scissor = ctx->Scissor.EnableFlags;
+   raster->scissor = !!ctx->Scissor.EnableFlags;
 
/* _NEW_FRAG_CLAMP */
raster->clamp_fragment_color = !st->clamp_frag_color_in_shader &&
   ctx->Color._ClampFragmentColor;
 
raster->half_pixel_center = 1;
if (st->state.fb_orientation == Y_0_TOP)
   raster->bottom_edge_rule = 1;
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipOrigin == GL_UPPER_LEFT)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] st/mesa: don't set 16 scissors and 16 viewports if they're unused

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

Only do so if there is a shader writing gl_ViewportIndex.
This removes a lot of CPU overhead for the most common case.
---
 src/mesa/state_tracker/st_atom.c  | 18 ++
 src/mesa/state_tracker/st_atom_scissor.c  | 10 +++---
 src/mesa/state_tracker/st_atom_viewport.c | 11 ---
 src/mesa/state_tracker/st_context.h   |  1 +
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index bcfbcf8..253b508 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -69,20 +69,21 @@ static void check_program_state( struct st_context *st )
struct st_common_program *old_tep = st->tep;
struct st_common_program *old_gp = st->gp;
struct st_fragment_program *old_fp = st->fp;
 
struct gl_program *new_vp = ctx->VertexProgram._Current;
struct gl_program *new_tcp = ctx->TessCtrlProgram._Current;
struct gl_program *new_tep = ctx->TessEvalProgram._Current;
struct gl_program *new_gp = ctx->GeometryProgram._Current;
struct gl_program *new_fp = ctx->FragmentProgram._Current;
uint64_t dirty = 0;
+   unsigned num_viewports = 1;
 
/* Flag states used by both new and old shaders to unbind shader resources
 * properly when transitioning to shaders that don't use them.
 */
if (unlikely(new_vp != _vp->Base)) {
   if (old_vp)
  dirty |= old_vp->affected_states;
   if (new_vp)
  dirty |= ST_NEW_VERTEX_PROGRAM(st, st_vertex_program(new_vp));
}
@@ -108,20 +109,37 @@ static void check_program_state( struct st_context *st )
  dirty |= st_common_program(new_gp)->affected_states;
}
 
if (unlikely(new_fp != _fp->Base)) {
   if (old_fp)
  dirty |= old_fp->affected_states;
   if (new_fp)
  dirty |= st_fragment_program(new_fp)->affected_states;
}
 
+   /* Find out the number of viewports. This determines how many scissors
+* and viewport states we need to update.
+*/
+   struct gl_program *last_prim_shader = new_gp ? new_gp :
+ new_tep ? new_tep : new_vp;
+   if (last_prim_shader &&
+   last_prim_shader->info.outputs_written & VARYING_BIT_VIEWPORT)
+  num_viewports = ctx->Const.MaxViewports;
+
+   if (st->state.num_viewports != num_viewports) {
+  st->state.num_viewports = num_viewports;
+  dirty |= ST_NEW_VIEWPORT;
+
+  if (ctx->Scissor.EnableFlags & u_bit_consecutive(0, num_viewports))
+ dirty |= ST_NEW_SCISSOR;
+   }
+
st->dirty |= dirty;
 }
 
 static void check_attrib_edgeflag(struct st_context *st)
 {
const struct gl_vertex_array **arrays = st->ctx->Array._DrawArrays;
GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
struct gl_program *vp = st->ctx->VertexProgram._Current;
 
if (!arrays)
diff --git a/src/mesa/state_tracker/st_atom_scissor.c 
b/src/mesa/state_tracker/st_atom_scissor.c
index ccd6e8e..a87d029 100644
--- a/src/mesa/state_tracker/st_atom_scissor.c
+++ b/src/mesa/state_tracker/st_atom_scissor.c
@@ -46,21 +46,21 @@ st_update_scissor( struct st_context *st )
 {
struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS];
const struct gl_context *ctx = st->ctx;
const struct gl_framebuffer *fb = ctx->DrawBuffer;
const unsigned int fb_width = _mesa_geometric_width(fb);
const unsigned int fb_height = _mesa_geometric_height(fb);
GLint miny, maxy;
unsigned i;
bool changed = false;
 
-   for (i = 0 ; i < ctx->Const.MaxViewports; i++) {
+   for (i = 0 ; i < st->state.num_viewports; i++) {
   scissor[i].minx = 0;
   scissor[i].miny = 0;
   scissor[i].maxx = fb_width;
   scissor[i].maxy = fb_height;
 
   if (ctx->Scissor.EnableFlags & (1 << i)) {
  /* need to be careful here with xmax or ymax < 0 */
  GLint xmax = MAX2(0, ctx->Scissor.ScissorArray[i].X + 
ctx->Scissor.ScissorArray[i].Width);
  GLint ymax = MAX2(0, ctx->Scissor.ScissorArray[i].Y + 
ctx->Scissor.ScissorArray[i].Height);
 
@@ -88,22 +88,26 @@ st_update_scissor( struct st_context *st )
  scissor[i].miny = miny;
  scissor[i].maxy = maxy;
   }
 
   if (memcmp([i], >state.scissor[i], sizeof(scissor[0])) != 0) 
{
  /* state has changed */
  st->state.scissor[i] = scissor[i];  /* struct copy */
  changed = true;
   }
}
-   if (changed)
-  st->pipe->set_scissor_states(st->pipe, 0, ctx->Const.MaxViewports, 
scissor); /* activate */
+
+   if (changed) {
+  struct pipe_context *pipe = st->pipe;
+
+  pipe->set_scissor_states(pipe, 0, st->state.num_viewports, scissor);
+   }
 }
 
 void
 st_update_window_rectangles(struct st_context *st)
 {
struct pipe_scissor_state new_rects[PIPE_MAX_WINDOW_RECTANGLES];
const struct gl_context *ctx = st->ctx;
const struct gl_scissor_attrib *scissor = >Scissor;
unsigned i;
bool changed = 

[Mesa-dev] [PATCH 3/5] st/mesa: simplify st_update_viewport

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

---
 src/mesa/state_tracker/st_atom_viewport.c | 34 ---
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_viewport.c 
b/src/mesa/state_tracker/st_atom_viewport.c
index 1fc8908..9a9d570 100644
--- a/src/mesa/state_tracker/st_atom_viewport.c
+++ b/src/mesa/state_tracker/st_atom_viewport.c
@@ -36,47 +36,31 @@
 /**
  * Update the viewport transformation matrix.  Depends on:
  *  - viewport pos/size
  *  - depthrange
  *  - window pos/size or FBO size
  */
 void
 st_update_viewport( struct st_context *st )
 {
struct gl_context *ctx = st->ctx;
-   GLfloat yScale, yBias;
unsigned i;
-   /* _NEW_BUFFERS
-*/
-   if (st->state.fb_orientation == Y_0_TOP) {
-  /* Drawing to a window.  The corresponding gallium surface uses
-   * Y=0=TOP but OpenGL is Y=0=BOTTOM.  So we need to invert the viewport.
-   */
-  yScale = -1;
-  yBias = (GLfloat)ctx->DrawBuffer->Height;
-   }
-   else {
-  /* Drawing to an FBO where Y=0=BOTTOM, like OpenGL - don't invert */
-  yScale = 1.0;
-  yBias = 0.0;
-   }
 
/* _NEW_VIEWPORT 
 */
-   for (i = 0; i < ctx->Const.MaxViewports; i++)
-   {
-  float scale[3], translate[3];
-  _mesa_get_viewport_xform(ctx, i, scale, translate);
+   for (i = 0; i < ctx->Const.MaxViewports; i++) {
+  float *scale = st->state.viewport[i].scale;
+  float *translate = st->state.viewport[i].translate;
 
-  st->state.viewport[i].scale[0] = scale[0];
-  st->state.viewport[i].scale[1] = scale[1] * yScale;
-  st->state.viewport[i].scale[2] = scale[2];
+  _mesa_get_viewport_xform(ctx, i, scale, translate);
 
-  st->state.viewport[i].translate[0] = translate[0];
-  st->state.viewport[i].translate[1] = translate[1] * yScale + yBias;
-  st->state.viewport[i].translate[2] = translate[2];
+  /* _NEW_BUFFERS */
+  if (st->state.fb_orientation == Y_0_TOP) {
+ scale[1] *= -1;
+ translate[1] = translate[1] * -1 + st->state.fb_height;
+  }
}
 
cso_set_viewport(st->cso_context, >state.viewport[0]);
if (ctx->Const.MaxViewports > 1)
   st->pipe->set_viewport_states(st->pipe, 1, ctx->Const.MaxViewports - 1, 
>state.viewport[1]);
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] i965: Mark freshly allocate bo as idle

2017-06-14 Thread Kenneth Graunke
On Friday, June 9, 2017 6:01:32 AM PDT Chris Wilson wrote:
> When created, buffers are idle, so mark them as such to save an early
> ioctl or mistaken assuming the fresh buffer is busy.
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 2ee57e2a0d..67c15878d0 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -324,6 +324,7 @@ retry:
>   goto err;
>  
>bo->size = bo_size;
> +  bo->idle = true;
>  
>memclear(create);
>create.size = bo_size;
> 

I noticed the other day that we weren't using bo->idle anymore.  Too
much hacking and slashing on my part, I guess...thanks for fixing it.

This patch is:
Reviewed-by: Kenneth Graunke 

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: don't call _mesa_update_clip_plane in the GL core profile

2017-06-14 Thread Marek Olšák
From: Marek Olšák 

It uses the projection matrix to transform the clip plane.
---
 src/mesa/main/enable.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 0324170..0f7cdcd 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -352,21 +352,22 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
GLboolean state)
 if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES ||
 !ctx->DriverFlags.NewClipPlaneEnable) {
FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
 } else {
FLUSH_VERTICES(ctx, 0);
 }
 ctx->NewDriverState |= ctx->DriverFlags.NewClipPlaneEnable;
 
 if (state) {
ctx->Transform.ClipPlanesEnabled |= (1 << p);
-   _mesa_update_clip_plane(ctx, p);
+   if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES)
+  _mesa_update_clip_plane(ctx, p);
 }
 else {
ctx->Transform.ClipPlanesEnabled &= ~(1 << p);
 }   
  }
  break;
   case GL_COLOR_MATERIAL:
  if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
 goto invalid_enum_error;
  if (ctx->Light.ColorMaterialEnabled == state)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 11:23 PM, Timothy Arceri  wrote:
>
>
> On 15/06/17 04:10, Marek Olšák wrote:
>>
>> On Wed, Jun 14, 2017 at 7:27 PM, Marek Olšák  wrote:
>>>
>>> On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri 
>>> wrote:



 On 13/06/17 04:18, Marek Olšák wrote:
>
>
> From: Marek Olšák 
>
> AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI
> since
> border color swizzling is broken there anyway. The only other user of
> this
> code is nouveau.



 Maybe move this comment into the code as a TODO? I was a little confused
 at
 first as I thought this commit was meant to make the change. With that:
>>>
>>>
>>> I don't understand. What are you confused about?
>>
>>
>> The commit message talks about radeonsi, but this patch is for
>> st/mesa. st/mesa doesn't care which drivers use the codepath.
>
>
> Well how do you intent to stop using this? Why is the commit message for a
> st change talking about radeonsi? I was assuming you wanted to eventually
> remove this code path from all drivers (or at least skip it for some) in
> which case making this a code comment would make sense, otherwise why do you
> even talk about this in the commit message?

You're right. I'll just remove that commit message.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/24] st/mesa: don't unbind sampler states if none are used

2017-06-14 Thread Timothy Arceri



On 15/06/17 03:17, Marek Olšák wrote:

On Tue, Jun 13, 2017 at 7:46 AM, Timothy Arceri  wrote:

On 13/06/17 15:32, Timothy Arceri wrote:




On 13/06/17 04:23, Ilia Mirkin wrote:


On Mon, Jun 12, 2017 at 2:18 PM, Marek Olšák  wrote:


From: Marek Olšák 

---
   src/mesa/state_tracker/st_atom_sampler.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_atom_sampler.c
b/src/mesa/state_tracker/st_atom_sampler.c
index f33e334..11db6e1 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -263,21 +263,21 @@ update_shader_samplers(struct st_context *st,
  struct pipe_sampler_state *samplers,
  unsigned *num_samplers)
   {
  GLbitfield samplers_used = prog->SamplersUsed;
  GLbitfield free_slots = ~prog->SamplersUsed;
  GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
  GLuint unit;
  const GLuint old_max = *num_samplers;
  const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];

-   if (*num_samplers == 0 && samplers_used == 0x0)
+   if (samplers_used == 0x0)
 return;

  *num_samplers = 0;



Does this still need to get executed even if samplers_used == 0?



It seems correct to skip this, otherwise old_max won't be set correctly in
the above code the next time we get here.




Although it seems we ignore old_max in the following patches anyway because
cso_set_samplers() will set things to NULL for us.

So maybe it would make sense to set it to 0?


Yes, it should be set to 0, but it's not that important (only
DrawPixels would be affected), though I think we can just drop
tracking num_samplers in st_context and simply rely on
num_sampler_views.

I'll fix that in a follow-up patch, which I'm gonna send shortly
([25/24]]. In the meantime, I'd like an Rb on this one if there are no
other comments.


You still have my r-b here.

25 is also:

Reviewed-by: Timothy Arceri 




Thanks,
Marek








Reviewed-by: Timothy Arceri 





  /* loop over sampler units (aka tex image units) */
  for (unit = 0; unit < max_units; unit++, samplers_used >>= 1) {
 struct pipe_sampler_state *sampler = samplers + unit;

 if (samplers_used & 1) {
const GLuint texUnit = prog->SamplerUnits[unit];
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color

2017-06-14 Thread Timothy Arceri



On 15/06/17 04:10, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 7:27 PM, Marek Olšák  wrote:

On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri  wrote:



On 13/06/17 04:18, Marek Olšák wrote:


From: Marek Olšák 

AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI since
border color swizzling is broken there anyway. The only other user of this
code is nouveau.



Maybe move this comment into the code as a TODO? I was a little confused at
first as I thought this commit was meant to make the change. With that:


I don't understand. What are you confused about?


The commit message talks about radeonsi, but this patch is for
st/mesa. st/mesa doesn't care which drivers use the codepath.


Well how do you intent to stop using this? Why is the commit message for 
a st change talking about radeonsi? I was assuming you wanted to 
eventually remove this code path from all drivers (or at least skip it 
for some) in which case making this a code comment would make sense, 
otherwise why do you even talk about this in the commit message?




Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] mesa/main: Missing NULL pointer check.

2017-06-14 Thread Timothy Arceri
This and the following patch are in no error paths. The result cannot be 
NULL unless the function is being used incorrectly. I would rather this 
be left as to segfault than to fail silently.


On 15/06/17 02:33, Plamena Manolova wrote:

In prepare_target it's plausible that the parameters of
_mesa_lookup_texture might be invalid and NULL is returned,
so we need a NULL pointer check.

CID: 1412566
Signed-off-by: Plamena Manolova 
---
  src/mesa/main/copyimage.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c
index 2cb617c..dd97f1a 100644
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -228,14 +228,17 @@ prepare_target(struct gl_context *ctx, GLuint name, 
GLenum target,
 } else {
struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
  
+  *renderbuffer = NULL;

+
+  if (texObj == NULL)
+return;
+
if (target == GL_TEXTURE_CUBE_MAP) {
   *texImage = texObj->Image[z][level];
}
else {
   *texImage = _mesa_select_tex_image(texObj, target, level);
}
-
-  *renderbuffer = NULL;
 }
  }
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Brian Paul

On 06/14/2017 02:38 PM, Jose Fonseca wrote:

On 14/06/17 21:21, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonseca 
wrote:

On 14/06/17 21:07, Marek Olšák wrote:


On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca 
wrote:


On 14/06/17 17:12, Marek Olšák wrote:



On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák 
wrote:



On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
wrote:



On 12/06/17 22:56, Marek Olšák wrote:




On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca

wrote:




On 12/06/17 21:25, Marek Olšák wrote:





On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca

wrote:





How does this help exactly?

Are applications actually rendering to the same FBO w/ and w/o
SRGB
decoding?

Or is the problem here GL_SRGB_WRITE state getting spuriously
dirtied
by
the
application?

And even if they do, why is toggling surface views in
framebuffer
state
so
expensive?

I don't object per se, but it looks like an unusual thing to
optimize
for.



set_framebuffer_state is basically a memory barrier. We have
different
caches between FB and textures and we have to flush them when a
texture is unbound from the framebuffer and set as a sampler
view.
To
keep thing simple, set_framebuffer_state is the barrier. When we
change the blend state, the barrier is avoided. Note that the
barrier
makes set_framebuffer_state a function that is always GPU-bound.






I see.

And you're sure that the incoming set_framebuffer_state are not
spurious?

I know cso_context always eliminates redundant
pipe_context::set_framebuffer_state calls, but it is perhaps
possible
that
Mesa state tracker is reseting the framebuffer state with
different
surface
views, but that in practice are exactly the same as the previous
one?

Like I said, it seems odd apps are doing this: it doesn't make
much
sense
to
me to change colorspace of the fragments between draws.
(Unless some
of
the
assets are already in SRGB and the app is trying to be too
smart for
its
own
good to avoid the sRGB->RGB->sRGB.)  It seems much more likely
that
these
framebuffer state changes are self-inflicted some where in our
stack,
than
something truly demanded by the app.

And if that's the case and we can fix it, then it would be a
better
solution
all around.





Yeah the funny part and the reason is that we have a
microbenchmark
in
piglit (drawoverhead) changing this state between draw calls. :)

Marek



I couldn't find that piglit microbenchmark.  mesademos has
src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if
fbo
is
changing internally, then it's a perf bug in Mesa state tracker.

Unless it's mimicking something that real apps do, then it's
probably
better
to fix the microbenchmark to use a more realistic tests.




If you build piglit, it's in bin/drawoverhead.

You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
rather artificial and fairly unlikely to occur with real apps.




FYI, I'm dropping this series and I don't have it in my repo anymore.
piglit/drawoverhead will be updated not to test this state change.

Marek




Great.

BTW, I'm not sure what's a good state to change in such
microbenchmark.

There is of course, a myriad of states to pick, but they are not
all the
same: performance can vary wildly depending on the choice.   I'm
not sure
what's a good representative state change in such circumstances
Perhaps
toggling between two texture objects? Or some sampler state?



If you've ever run the microbenchmark, you know there are plenty of
state changes tested. I think there are like 15 state changes tested
in about 60 subtests at the moment. I'm adding more tests into it.
Currently I have 100 subtests in there locally. At the moment the
missing subtests are mostly just shader resources: immutable textures
(mutable textures i.e. not TexStorage-based are already tested), TBOs,
images, image buffers, SSBOs (maybe), atomic counters (maybe). The
methodology is 1 state change followed by 1 draw call in a loop,
measuring the number of draw calls per second for that case, and
comparing with the baseline draw rate (which is without the state
change).

Marek



I just ran it.  Pretty neat!  I didn't know we were adding benchmarks to
piglit.


That's because piglit has a very convenient window system integration
framework that I refuse to re-invent elsewhere.


Ah, makes sense.


Which reminds me: do people think we should transition mesademos off
glut to glfw or waffle? Or do you think we should just strive to migrate
the stuff there to piglit?


I'm not sure I see a need.  Does anyone use the Mesa demos for 
benchmarking anymore?


And in general, many/most of the Mesa demos have some interactive aspect 
to them (key presses or mouse input) that isn't available in waffle or 
piglit (I'm not familiar with glfw).  And few of the Mesa demos do pixel 
probing for correctness.


-Brian


Re: [Mesa-dev] [PATCH 04/11] intel/genxml: Rename IndirectStatePointer to BorderColorPointer

2017-06-14 Thread Rafael Antognolli
Reviewed-by: Rafael Antognolli 

On Tue, Jun 13, 2017 at 11:28:23AM -0700, Anuj Phogat wrote:
> Signed-off-by: Anuj Phogat 
> ---
>  src/intel/genxml/gen10.xml | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
> index 64041c1..06260cf 100644
> --- a/src/intel/genxml/gen10.xml
> +++ b/src/intel/genxml/gen10.xml
> @@ -900,7 +900,7 @@
>
>
>  
> -
> +
>  
>   type="uint">
>
> -- 
> 2.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] i965: Stop hanging on Haswell

2017-06-14 Thread Kenneth Graunke
On Tuesday, June 13, 2017 2:53:20 PM PDT Jason Ekstrand wrote:
> As I've been working on converting more things in the GL driver over to
> blorp, I've been highly annoyed by all of the hangs on Haswell.  About one
> in 3-5 Jenkins runs would hang somewhere.  After looking at about a
> half-dozen error states, I noticed that all of the hangs seemed to be on
> fast-clear operations (clear or resolve) that happen at the start of a
> batch, right after STATE_BASE_ADDRESS.
> 
> Haswell seems to be a bit more picky than other hardware about having
> fast-clear operations in flight at the same time as regular rendering and
> hangs if the two ever overlap.  (Other hardware can get rendering
> corruption but not usually hangs.)  Also, Haswell doesn't fully stall if
> you just do a RT flush and a CS stall.  The hardware docs refer to
> something they call an "end of pipe sync" which is a CS stall with a write
> to the workaround BO.  On Haswell, you also need to read from that same
> address to create a memory dependency and make sure the system is fully
> stalled.
> 
> When you call brw_blorp_resolve_color it calls brw_emit_pipe_control_flush
> and does the correct flushes and then calls into core blorp to do the
> actual resolve operation.  If the batch doesn't have enough space left in
> it for the fast-clear operation, the batch will get split and the
> fast-clear will happen in the next batch.  I believe what is happening is
> that while we're building the second batch that actually contains the
> fast-clear, some other process completes a batch and inserts it between our
> PIPE_CONTROL to do the stall and the actual fast-clear.  We then end up
> with more stuff in flight than we can handle and the GPU explodes.
> 
> I'm not 100% convinced of this explanation because it seems a bit fishy
> that a context switch wouldn't be enough to fully flush out the GPU.
> However, what I do know is that, without these patches I get a hang in one
> out of three to five Jenkins runs on my wip/i965-blorp-ds branch.  With the
> patches (or an older variant that did the same thing), I have done almost 20
> Jenkins runs and have yet to see a hang.  I'd call that success.
> 
> Jason Ekstrand (6):
>   i965: Flush around state base address
>   i965: Take a uint64_t immediate in emit_pipe_control_write
>   i965: Unify the two emit_pipe_control functions
>   i965: Do an end-of-pipe sync prior to STATE_BASE_ADDRESS
>   i965/blorp: Do an end-of-pipe sync around CCS ops
>   i965: Do an end-of-pipe sync after flushes
> 
> Topi Pohjolainen (1):
>   i965: Add an end-of-pipe sync helper
> 
>  src/mesa/drivers/dri/i965/brw_blorp.c|  16 +-
>  src/mesa/drivers/dri/i965/brw_context.h  |   3 +-
>  src/mesa/drivers/dri/i965/brw_misc_state.c   |  38 +
>  src/mesa/drivers/dri/i965/brw_pipe_control.c | 243 
> ++-
>  src/mesa/drivers/dri/i965/brw_queryobj.c |   5 +-
>  src/mesa/drivers/dri/i965/gen6_queryobj.c|   2 +-
>  src/mesa/drivers/dri/i965/genX_blorp_exec.c  |   2 +-
>  7 files changed, 211 insertions(+), 98 deletions(-)
> 
> 

The series is:
Reviewed-by: Kenneth Graunke 

If Chris is right, and what we're really seeing is that MI_SET_CONTEXT
needs additional flushing, it probably makes sense to fix the kernel.
If it's really fast clear related, then we should do it in Mesa.

I'm not sure we'll ever be able to properly determine that.

Even if we go the kernel route, we should land patches 1-3.

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] i965: Add an end-of-pipe sync helper

2017-06-14 Thread Kenneth Graunke
On Tuesday, June 13, 2017 2:53:24 PM PDT Jason Ekstrand wrote:
> From: Topi Pohjolainen 
> 
> v2 (Jason Ekstrand):
>  - Take a flags parameter to control the flushes
>  - Refactoring
> 
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h  |  1 +
>  src/mesa/drivers/dri/i965/brw_pipe_control.c | 96 
> +++-
>  2 files changed, 96 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 7b9be8a..b137409 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1641,6 +1641,7 @@ void brw_emit_pipe_control_flush(struct brw_context 
> *brw, uint32_t flags);
>  void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
>   struct brw_bo *bo, uint32_t offset,
>   uint64_t imm);
> +void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags);
>  void brw_emit_mi_flush(struct brw_context *brw);
>  void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 39bb9c7..338e4fc 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -271,7 +271,6 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
> brw->workaround_bo, 0, 0);
>  }
>  
> -
>  /**
>   * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
>   * implementing two workarounds on gen6.  From section 1.4.7.1
> @@ -320,6 +319,101 @@ brw_emit_post_sync_nonzero_flush(struct brw_context 
> *brw)
> brw->workaround_bo, 0, 0);
>  }
>  
> +/*
> + * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
> + *
> + *  Write synchronization is a special case of end-of-pipe
> + *  synchronization that requires that the render cache and/or depth
> + *  related caches are flushed to memory, where the data will become
> + *  globally visible. This type of synchronization is required prior to
> + *  SW (CPU) actually reading the result data from memory, or initiating
> + *  an operation that will use as a read surface (such as a texture
> + *  surface) a previous render target and/or depth/stencil buffer
> + *
> + *
> + * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
> + *
> + *  Exercising the write cache flush bits (Render Target Cache Flush
> + *  Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
> + *  ensures the write caches are flushed and doesn't guarantee the data
> + *  is globally visible.
> + *
> + *  SW can track the completion of the end-of-pipe-synchronization by
> + *  using "Notify Enable" and "PostSync Operation - Write Immediate
> + *  Data" in the PIPE_CONTROL command. 
> + */
> +void
> +brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
> +{
> +   if (brw->gen >= 6) {
> +  /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
> +   *
> +   *"The most common action to perform upon reaching a 
> synchronization
> +   *point is to write a value out to memory. An immediate value
> +   *(included with the synchronization command) may be written."
> +   *
> +   *
> +   * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
> +   *
> +   *"In case the data flushed out by the render engine is to be read
> +   *back in to the render engine in coherent manner, then the render
> +   *engine has to wait for the fence completion before accessing the
> +   *flushed data. This can be achieved by following means on various
> +   *products: PIPE_CONTROL command with CS Stall and the required
> +   *write caches flushed with Post-Sync-Operation as Write Immediate
> +   *Data.
> +   *
> +   *Example:
> +   *   - Workload-1 (3D/GPGPU/MEDIA)
> +   *   - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
> +   * Data, Required Write Cache Flush bits set)
> +   *   - Workload-2 (Can use the data produce or output by 
> Workload-1)
> +   */
> +  brw_emit_pipe_control_write(brw,
> +  flags | PIPE_CONTROL_CS_STALL |
> +  PIPE_CONTROL_WRITE_IMMEDIATE,
> +  brw->workaround_bo, 0, 0);
> +
> +  if (brw->is_haswell) {
> + /* Haswell needs addition work-arounds:
> +  *
> +  * From Haswell PRM, volume 2, part 1, "End-of-Pipe 
> Synchronization":
> +  *
> +  *Option 1:
> +  *PIPE_CONTROL command with the CS Stall and the required 

Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Jose Fonseca

On 14/06/17 21:21, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonseca  wrote:

On 14/06/17 21:07, Marek Olšák wrote:


On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca  wrote:


On 14/06/17 17:12, Marek Olšák wrote:



On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák  wrote:



On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
wrote:



On 12/06/17 22:56, Marek Olšák wrote:




On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca 
wrote:




On 12/06/17 21:25, Marek Olšák wrote:





On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca 
wrote:





How does this help exactly?

Are applications actually rendering to the same FBO w/ and w/o
SRGB
decoding?

Or is the problem here GL_SRGB_WRITE state getting spuriously
dirtied
by
the
application?

And even if they do, why is toggling surface views in framebuffer
state
so
expensive?

I don't object per se, but it looks like an unusual thing to
optimize
for.



set_framebuffer_state is basically a memory barrier. We have
different
caches between FB and textures and we have to flush them when a
texture is unbound from the framebuffer and set as a sampler view.
To
keep thing simple, set_framebuffer_state is the barrier. When we
change the blend state, the barrier is avoided. Note that the
barrier
makes set_framebuffer_state a function that is always GPU-bound.






I see.

And you're sure that the incoming set_framebuffer_state are not
spurious?

I know cso_context always eliminates redundant
pipe_context::set_framebuffer_state calls, but it is perhaps
possible
that
Mesa state tracker is reseting the framebuffer state with different
surface
views, but that in practice are exactly the same as the previous
one?

Like I said, it seems odd apps are doing this: it doesn't make much
sense
to
me to change colorspace of the fragments between draws. (Unless some
of
the
assets are already in SRGB and the app is trying to be too smart for
its
own
good to avoid the sRGB->RGB->sRGB.)  It seems much more likely that
these
framebuffer state changes are self-inflicted some where in our
stack,
than
something truly demanded by the app.

And if that's the case and we can fix it, then it would be a better
solution
all around.





Yeah the funny part and the reason is that we have a microbenchmark
in
piglit (drawoverhead) changing this state between draw calls. :)

Marek



I couldn't find that piglit microbenchmark.  mesademos has
src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if fbo
is
changing internally, then it's a perf bug in Mesa state tracker.

Unless it's mimicking something that real apps do, then it's probably
better
to fix the microbenchmark to use a more realistic tests.




If you build piglit, it's in bin/drawoverhead.

You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
rather artificial and fairly unlikely to occur with real apps.




FYI, I'm dropping this series and I don't have it in my repo anymore.
piglit/drawoverhead will be updated not to test this state change.

Marek




Great.

BTW, I'm not sure what's a good state to change in such microbenchmark.

There is of course, a myriad of states to pick, but they are not all the
same: performance can vary wildly depending on the choice.   I'm not sure
what's a good representative state change in such circumstances Perhaps
toggling between two texture objects? Or some sampler state?



If you've ever run the microbenchmark, you know there are plenty of
state changes tested. I think there are like 15 state changes tested
in about 60 subtests at the moment. I'm adding more tests into it.
Currently I have 100 subtests in there locally. At the moment the
missing subtests are mostly just shader resources: immutable textures
(mutable textures i.e. not TexStorage-based are already tested), TBOs,
images, image buffers, SSBOs (maybe), atomic counters (maybe). The
methodology is 1 state change followed by 1 draw call in a loop,
measuring the number of draw calls per second for that case, and
comparing with the baseline draw rate (which is without the state
change).

Marek



I just ran it.  Pretty neat!  I didn't know we were adding benchmarks to
piglit.


That's because piglit has a very convenient window system integration
framework that I refuse to re-invent elsewhere.


Ah, makes sense.


Which reminds me: do people think we should transition mesademos off 
glut to glfw or waffle? Or do you think we should just strive to migrate 
the stuff there to piglit?



Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 10:13 PM, Jose Fonseca  wrote:
> On 14/06/17 21:07, Marek Olšák wrote:
>>
>> On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca  wrote:
>>>
>>> On 14/06/17 17:12, Marek Olšák wrote:


 On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák  wrote:
>
>
> On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
> wrote:
>>
>>
>> On 12/06/17 22:56, Marek Olšák wrote:
>>>
>>>
>>>
>>> On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca 
>>> wrote:



 On 12/06/17 21:25, Marek Olšák wrote:
>
>
>
>
> On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca 
> wrote:
>>
>>
>>
>>
>> How does this help exactly?
>>
>> Are applications actually rendering to the same FBO w/ and w/o
>> SRGB
>> decoding?
>>
>> Or is the problem here GL_SRGB_WRITE state getting spuriously
>> dirtied
>> by
>> the
>> application?
>>
>> And even if they do, why is toggling surface views in framebuffer
>> state
>> so
>> expensive?
>>
>> I don't object per se, but it looks like an unusual thing to
>> optimize
>> for.
>>
>
> set_framebuffer_state is basically a memory barrier. We have
> different
> caches between FB and textures and we have to flush them when a
> texture is unbound from the framebuffer and set as a sampler view.
> To
> keep thing simple, set_framebuffer_state is the barrier. When we
> change the blend state, the barrier is avoided. Note that the
> barrier
> makes set_framebuffer_state a function that is always GPU-bound.





 I see.

 And you're sure that the incoming set_framebuffer_state are not
 spurious?

 I know cso_context always eliminates redundant
 pipe_context::set_framebuffer_state calls, but it is perhaps
 possible
 that
 Mesa state tracker is reseting the framebuffer state with different
 surface
 views, but that in practice are exactly the same as the previous
 one?

 Like I said, it seems odd apps are doing this: it doesn't make much
 sense
 to
 me to change colorspace of the fragments between draws. (Unless some
 of
 the
 assets are already in SRGB and the app is trying to be too smart for
 its
 own
 good to avoid the sRGB->RGB->sRGB.)  It seems much more likely that
 these
 framebuffer state changes are self-inflicted some where in our
 stack,
 than
 something truly demanded by the app.

 And if that's the case and we can fix it, then it would be a better
 solution
 all around.
>>>
>>>
>>>
>>>
>>> Yeah the funny part and the reason is that we have a microbenchmark
>>> in
>>> piglit (drawoverhead) changing this state between draw calls. :)
>>>
>>> Marek
>>>
>>
>> I couldn't find that piglit microbenchmark.  mesademos has
>> src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if fbo
>> is
>> changing internally, then it's a perf bug in Mesa state tracker.
>>
>> Unless it's mimicking something that real apps do, then it's probably
>> better
>> to fix the microbenchmark to use a more realistic tests.
>
>
>
> If you build piglit, it's in bin/drawoverhead.
>
> You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
> rather artificial and fairly unlikely to occur with real apps.



 FYI, I'm dropping this series and I don't have it in my repo anymore.
 piglit/drawoverhead will be updated not to test this state change.

 Marek
>>>
>>>
>>>
>>> Great.
>>>
>>> BTW, I'm not sure what's a good state to change in such microbenchmark.
>>>
>>> There is of course, a myriad of states to pick, but they are not all the
>>> same: performance can vary wildly depending on the choice.   I'm not sure
>>> what's a good representative state change in such circumstances Perhaps
>>> toggling between two texture objects? Or some sampler state?
>>
>>
>> If you've ever run the microbenchmark, you know there are plenty of
>> state changes tested. I think there are like 15 state changes tested
>> in about 60 subtests at the moment. I'm adding more tests into it.
>> Currently I have 100 subtests in there locally. At the moment the
>> missing subtests are mostly just shader resources: immutable textures
>> (mutable textures i.e. not TexStorage-based are already tested), TBOs,
>> 

Re: [Mesa-dev] [PATCH] gallium/radeon: fix initialization of new resource bindless fields

2017-06-14 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jun 14, 2017 at 9:11 PM, Samuel Pitoiset
 wrote:
> r600_resource objects are not calloc'd.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeon/r600_buffer_common.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
> b/src/gallium/drivers/radeon/r600_buffer_common.c
> index fb74b45d2fa..5336f55cb57 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -110,6 +110,8 @@ void r600_init_resource_fields(struct r600_common_screen 
> *rscreen,
> res->bo_size = size;
> res->bo_alignment = alignment;
> res->flags = 0;
> +   res->texture_handle_allocated = false;
> +   res->image_handle_allocated = false;
>
> switch (res->b.b.usage) {
> case PIPE_USAGE_STREAM:
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Jose Fonseca

On 14/06/17 21:07, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca  wrote:

On 14/06/17 17:12, Marek Olšák wrote:


On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák  wrote:


On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
wrote:


On 12/06/17 22:56, Marek Olšák wrote:



On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca 
wrote:



On 12/06/17 21:25, Marek Olšák wrote:




On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca 
wrote:




How does this help exactly?

Are applications actually rendering to the same FBO w/ and w/o SRGB
decoding?

Or is the problem here GL_SRGB_WRITE state getting spuriously
dirtied
by
the
application?

And even if they do, why is toggling surface views in framebuffer
state
so
expensive?

I don't object per se, but it looks like an unusual thing to
optimize
for.



set_framebuffer_state is basically a memory barrier. We have
different
caches between FB and textures and we have to flush them when a
texture is unbound from the framebuffer and set as a sampler view. To
keep thing simple, set_framebuffer_state is the barrier. When we
change the blend state, the barrier is avoided. Note that the barrier
makes set_framebuffer_state a function that is always GPU-bound.





I see.

And you're sure that the incoming set_framebuffer_state are not
spurious?

I know cso_context always eliminates redundant
pipe_context::set_framebuffer_state calls, but it is perhaps possible
that
Mesa state tracker is reseting the framebuffer state with different
surface
views, but that in practice are exactly the same as the previous one?

Like I said, it seems odd apps are doing this: it doesn't make much
sense
to
me to change colorspace of the fragments between draws. (Unless some
of
the
assets are already in SRGB and the app is trying to be too smart for
its
own
good to avoid the sRGB->RGB->sRGB.)  It seems much more likely that
these
framebuffer state changes are self-inflicted some where in our stack,
than
something truly demanded by the app.

And if that's the case and we can fix it, then it would be a better
solution
all around.




Yeah the funny part and the reason is that we have a microbenchmark in
piglit (drawoverhead) changing this state between draw calls. :)

Marek



I couldn't find that piglit microbenchmark.  mesademos has
src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if fbo is
changing internally, then it's a perf bug in Mesa state tracker.

Unless it's mimicking something that real apps do, then it's probably
better
to fix the microbenchmark to use a more realistic tests.



If you build piglit, it's in bin/drawoverhead.

You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
rather artificial and fairly unlikely to occur with real apps.



FYI, I'm dropping this series and I don't have it in my repo anymore.
piglit/drawoverhead will be updated not to test this state change.

Marek



Great.

BTW, I'm not sure what's a good state to change in such microbenchmark.

There is of course, a myriad of states to pick, but they are not all the
same: performance can vary wildly depending on the choice.   I'm not sure
what's a good representative state change in such circumstances Perhaps
toggling between two texture objects? Or some sampler state?


If you've ever run the microbenchmark, you know there are plenty of
state changes tested. I think there are like 15 state changes tested
in about 60 subtests at the moment. I'm adding more tests into it.
Currently I have 100 subtests in there locally. At the moment the
missing subtests are mostly just shader resources: immutable textures
(mutable textures i.e. not TexStorage-based are already tested), TBOs,
images, image buffers, SSBOs (maybe), atomic counters (maybe). The
methodology is 1 state change followed by 1 draw call in a loop,
measuring the number of draw calls per second for that case, and
comparing with the baseline draw rate (which is without the state
change).

Marek



I just ran it.  Pretty neat!  I didn't know we were adding benchmarks to 
piglit.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] i965/bufmgr: Rename bo_alloc_tiled to bo_alloc_tiled_2d

2017-06-14 Thread Kenneth Graunke
On Tuesday, June 13, 2017 4:19:00 PM PDT Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c|  6 ++--
>  src/mesa/drivers/dri/i965/brw_bufmgr.h| 12 +++
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 46 
> +--
>  src/mesa/drivers/dri/i965/intel_screen.c  | 24 +++---
>  4 files changed, 44 insertions(+), 44 deletions(-)

Thanks, this is so much nicer.  I was looking into orphaning busy storage
and the new interface will make it a lot easier to allocate a new tiled BO
of the same size and pitch.

Series is:
Reviewed-by: Kenneth Graunke 

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Samuel Pitoiset



On 06/14/2017 10:07 PM, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca  wrote:

On 14/06/17 17:12, Marek Olšák wrote:


On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák  wrote:


On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
wrote:


On 12/06/17 22:56, Marek Olšák wrote:



On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca 
wrote:



On 12/06/17 21:25, Marek Olšák wrote:




On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca 
wrote:




How does this help exactly?

Are applications actually rendering to the same FBO w/ and w/o SRGB
decoding?

Or is the problem here GL_SRGB_WRITE state getting spuriously
dirtied
by
the
application?

And even if they do, why is toggling surface views in framebuffer
state
so
expensive?

I don't object per se, but it looks like an unusual thing to
optimize
for.



set_framebuffer_state is basically a memory barrier. We have
different
caches between FB and textures and we have to flush them when a
texture is unbound from the framebuffer and set as a sampler view. To
keep thing simple, set_framebuffer_state is the barrier. When we
change the blend state, the barrier is avoided. Note that the barrier
makes set_framebuffer_state a function that is always GPU-bound.





I see.

And you're sure that the incoming set_framebuffer_state are not
spurious?

I know cso_context always eliminates redundant
pipe_context::set_framebuffer_state calls, but it is perhaps possible
that
Mesa state tracker is reseting the framebuffer state with different
surface
views, but that in practice are exactly the same as the previous one?

Like I said, it seems odd apps are doing this: it doesn't make much
sense
to
me to change colorspace of the fragments between draws. (Unless some
of
the
assets are already in SRGB and the app is trying to be too smart for
its
own
good to avoid the sRGB->RGB->sRGB.)  It seems much more likely that
these
framebuffer state changes are self-inflicted some where in our stack,
than
something truly demanded by the app.

And if that's the case and we can fix it, then it would be a better
solution
all around.




Yeah the funny part and the reason is that we have a microbenchmark in
piglit (drawoverhead) changing this state between draw calls. :)

Marek



I couldn't find that piglit microbenchmark.  mesademos has
src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if fbo is
changing internally, then it's a perf bug in Mesa state tracker.

Unless it's mimicking something that real apps do, then it's probably
better
to fix the microbenchmark to use a more realistic tests.



If you build piglit, it's in bin/drawoverhead.

You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
rather artificial and fairly unlikely to occur with real apps.



FYI, I'm dropping this series and I don't have it in my repo anymore.
piglit/drawoverhead will be updated not to test this state change.

Marek



Great.

BTW, I'm not sure what's a good state to change in such microbenchmark.

There is of course, a myriad of states to pick, but they are not all the
same: performance can vary wildly depending on the choice.   I'm not sure
what's a good representative state change in such circumstances Perhaps
toggling between two texture objects? Or some sampler state?


If you've ever run the microbenchmark, you know there are plenty of
state changes tested. I think there are like 15 state changes tested
in about 60 subtests at the moment. I'm adding more tests into it.
Currently I have 100 subtests in there locally. At the moment the
missing subtests are mostly just shader resources: immutable textures
(mutable textures i.e. not TexStorage-based are already tested), TBOs,
images, image buffers, SSBOs (maybe), atomic counters (maybe). The
methodology is 1 state change followed by 1 draw call in a loop,
measuring the number of draw calls per second for that case, and
comparing with the baseline draw rate (which is without the state
change).


And I plan to add bindless support to it. :)



Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 9:45 PM, Jose Fonseca  wrote:
> On 14/06/17 17:12, Marek Olšák wrote:
>>
>> On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák  wrote:
>>>
>>> On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca 
>>> wrote:

 On 12/06/17 22:56, Marek Olšák wrote:
>
>
> On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca 
> wrote:
>>
>>
>> On 12/06/17 21:25, Marek Olšák wrote:
>>>
>>>
>>>
>>> On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca 
>>> wrote:



 How does this help exactly?

 Are applications actually rendering to the same FBO w/ and w/o SRGB
 decoding?

 Or is the problem here GL_SRGB_WRITE state getting spuriously
 dirtied
 by
 the
 application?

 And even if they do, why is toggling surface views in framebuffer
 state
 so
 expensive?

 I don't object per se, but it looks like an unusual thing to
 optimize
 for.

>>>
>>> set_framebuffer_state is basically a memory barrier. We have
>>> different
>>> caches between FB and textures and we have to flush them when a
>>> texture is unbound from the framebuffer and set as a sampler view. To
>>> keep thing simple, set_framebuffer_state is the barrier. When we
>>> change the blend state, the barrier is avoided. Note that the barrier
>>> makes set_framebuffer_state a function that is always GPU-bound.
>>
>>
>>
>>
>> I see.
>>
>> And you're sure that the incoming set_framebuffer_state are not
>> spurious?
>>
>> I know cso_context always eliminates redundant
>> pipe_context::set_framebuffer_state calls, but it is perhaps possible
>> that
>> Mesa state tracker is reseting the framebuffer state with different
>> surface
>> views, but that in practice are exactly the same as the previous one?
>>
>> Like I said, it seems odd apps are doing this: it doesn't make much
>> sense
>> to
>> me to change colorspace of the fragments between draws. (Unless some
>> of
>> the
>> assets are already in SRGB and the app is trying to be too smart for
>> its
>> own
>> good to avoid the sRGB->RGB->sRGB.)  It seems much more likely that
>> these
>> framebuffer state changes are self-inflicted some where in our stack,
>> than
>> something truly demanded by the app.
>>
>> And if that's the case and we can fix it, then it would be a better
>> solution
>> all around.
>
>
>
> Yeah the funny part and the reason is that we have a microbenchmark in
> piglit (drawoverhead) changing this state between draw calls. :)
>
> Marek
>

 I couldn't find that piglit microbenchmark.  mesademos has
 src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if fbo is
 changing internally, then it's a perf bug in Mesa state tracker.

 Unless it's mimicking something that real apps do, then it's probably
 better
 to fix the microbenchmark to use a more realistic tests.
>>>
>>>
>>> If you build piglit, it's in bin/drawoverhead.
>>>
>>> You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
>>> rather artificial and fairly unlikely to occur with real apps.
>>
>>
>> FYI, I'm dropping this series and I don't have it in my repo anymore.
>> piglit/drawoverhead will be updated not to test this state change.
>>
>> Marek
>
>
> Great.
>
> BTW, I'm not sure what's a good state to change in such microbenchmark.
>
> There is of course, a myriad of states to pick, but they are not all the
> same: performance can vary wildly depending on the choice.   I'm not sure
> what's a good representative state change in such circumstances Perhaps
> toggling between two texture objects? Or some sampler state?

If you've ever run the microbenchmark, you know there are plenty of
state changes tested. I think there are like 15 state changes tested
in about 60 subtests at the moment. I'm adding more tests into it.
Currently I have 100 subtests in there locally. At the moment the
missing subtests are mostly just shader resources: immutable textures
(mutable textures i.e. not TexStorage-based are already tested), TBOs,
images, image buffers, SSBOs (maybe), atomic counters (maybe). The
methodology is 1 state change followed by 1 draw call in a loop,
measuring the number of draw calls per second for that case, and
comparing with the baseline draw rate (which is without the state
change).

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [v2 1/7] i965/gen4: Set depth offset when there is stencil attachment only

2017-06-14 Thread Pohjolainen, Topi
On Wed, Jun 14, 2017 at 11:18:40AM +0100, Emil Velikov wrote:
> Hi Topi,
> 
> On 22 May 2017 at 20:12, Topi Pohjolainen  wrote:
> > Current version fails to set depthstencil.depth_offset when there
> > is only stencil attachment (it does set the intra tile offsets
> > though). Fixes piglits:
> >
> > g45,g965,ilk:   depthstencil-render-miplevels 1024 s=z24_s8
> > g45,ilk:depthstencil-render-miplevels 273 s=z24_s8
> >
> > CC: mesa-sta...@lists.freedesktop.org
> > Signed-off-by: Topi Pohjolainen 
> > ---
> >  src/mesa/drivers/dri/i965/brw_misc_state.c | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> Doesn't seems like this patch has landed. Did it fell through the
> cracks, or it's been superseded/other?

It is still pending review, I'll let you know when I get to push it.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/gen4: Add support for single layer in alignment workaround

2017-06-14 Thread Pohjolainen, Topi
On Wed, Jun 14, 2017 at 10:48:09AM -0700, Ian Romanick wrote:
> On 06/09/2017 07:04 AM, Topi Pohjolainen wrote:
> > On gen < 6 one doesn't have level or layer specifiers available
> > for render and depth targets. In order to support rendering to
> > specific level/layer, driver needs to manually offset the surface
> > to the desired slice.
> > There are, however, alignment restrictions to respect as well and
> > in come cases the only option is to use temporary single slice
> > surface which driver copies after rendering to the full miptree.
> > 
> > Current alignment workaround introduces new texture images which
> > are added to the parent texture object. Texture validation later
> > on copies the additional levels back to the surface that contains
> > the full mipmap.
> > This only works for non-arrayed surfaces and driver currently
> > creates new arrayed images in vain - individual layers within the
> > newly created are still unaligned the same as before.
> > 
> > This patch drops this mechanism and instead attaches single
> > temporary slice into the render buffer. This gets immediately
> > copied back to the mipmapped and/or arrayed surface just after
> > the render is done.
> > 
> > Sitting on top of earlier series cleaning up the depth buffer
> > state, this patch additionally fixes the following piglit tests:
> > 
> > ext_texture_array.copyteximage 2d_array.g45m64
> > ext_texture_array.copyteximage 1d_array.g45m64
> > arb_framebuffer_object.fbo-blit-stretch.g33m64
> 
> I wish I had noticed this before I had Mark open a bug for it:
> 
> https://bugs.freedesktop.org/show_bug.cgi?id=101414

Sorry about this Ian, it is a wrong call. I blindly took it from the
change list after I saw it there every time (I have been churning my isl work
a lot in jenkins). I don't even touch i915 driver here.

> 
> > ext_framebuffer_object.fbo-cubemap.g965m64
> > arb_framebuffer_object.fbo-generatemipmap-cubemap.g965m64
> > arb_texture_cube_map.copyteximage cube.g965m64
> > ext_texture_array.copyteximage 1d_array.g965m64
> > ext_texture_array.copyteximage 2d_array.g965m64
> > ext_texture_array.fbo-array.g965m64
> > ext_texture_array.gen-mipmap.g965m64
> > ext_texture_array.fbo-generatemipmap-array.g965m64
> > arb_pixel_buffer_object.texsubimage array pbo.g965m64
> > ext_texture_array.copyteximage 2d_array.ilkm64
> > ext_texture_array.copyteximage 1d_array.ilkm64
> > arb_texture_cube_map.copyteximage cube.ilkm64
> > 
> > CC: Kenneth Graunke 
> > CC: Jason Ekstrand 
> > CC: Ian Romanick 
> > Signed-off-by: Topi Pohjolainen 
> > ---
> >  src/mesa/drivers/dri/i965/brw_draw.c | 51 
> > 
> >  src/mesa/drivers/dri/i965/brw_misc_state.c   |  4 +-
> >  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  3 +-
> >  src/mesa/drivers/dri/i965/intel_fbo.c| 19 +
> >  src/mesa/drivers/dri/i965/intel_fbo.h| 24 +++
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c|  2 +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  7 
> >  7 files changed, 99 insertions(+), 11 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
> > b/src/mesa/drivers/dri/i965/brw_draw.c
> > index 611cb86..cb441c3 100644
> > --- a/src/mesa/drivers/dri/i965/brw_draw.c
> > +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> > @@ -396,6 +396,56 @@ brw_postdraw_set_buffers_need_resolve(struct 
> > brw_context *brw)
> >  }
> >  
> >  static void
> > +intel_renderbuffer_move_temp_back(struct brw_context *brw,
> > +  struct intel_renderbuffer *irb)
> > +{
> > +   if (irb->align_wa_mt == NULL)
> > +  return;
> > +
> > +   brw_render_cache_set_check_flush(brw, irb->align_wa_mt->bo);
> > +
> > +   intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0,
> > +irb->mt,
> > +irb->Base.Base.TexImage->Level, irb->mt_layer);
> > +
> > +   intel_miptree_reference(>align_wa_mt, NULL);
> > +
> > +   /* Finally restore the x,y to correspond to full miptree. */
> > +   intel_renderbuffer_set_draw_offset(irb);
> > +
> > +   /* Make sure render surface state gets re-emitted with updated miptree. 
> > */
> > +   brw->NewGLState |= _NEW_BUFFERS;
> > +}
> > +
> > +static void
> > +brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)
> > +{
> > +   struct gl_context *ctx = >ctx;
> > +   struct gl_framebuffer *fb = ctx->DrawBuffer;
> > +
> > +   struct intel_renderbuffer *depth_irb =
> > +  intel_get_renderbuffer(fb, BUFFER_DEPTH);
> > +   struct intel_renderbuffer *stencil_irb =
> > +  intel_get_renderbuffer(fb, BUFFER_STENCIL);
> > +
> > +   if (depth_irb && depth_irb->align_wa_mt)
> > +  intel_renderbuffer_move_temp_back(brw, depth_irb);
> > +
> > +   if (stencil_irb && stencil_irb->align_wa_mt)
> > +  

Re: [Mesa-dev] [PATCH 1/7] gallium: add pipe_blend_state::srgb_enable and the CAP

2017-06-14 Thread Jose Fonseca

On 14/06/17 17:12, Marek Olšák wrote:

On Tue, Jun 13, 2017 at 3:43 PM, Marek Olšák  wrote:

On Tue, Jun 13, 2017 at 1:40 PM, Jose Fonseca  wrote:

On 12/06/17 22:56, Marek Olšák wrote:


On Mon, Jun 12, 2017 at 10:43 PM, Jose Fonseca 
wrote:


On 12/06/17 21:25, Marek Olšák wrote:



On Mon, Jun 12, 2017 at 9:51 PM, Jose Fonseca 
wrote:



How does this help exactly?

Are applications actually rendering to the same FBO w/ and w/o SRGB
decoding?

Or is the problem here GL_SRGB_WRITE state getting spuriously dirtied
by
the
application?

And even if they do, why is toggling surface views in framebuffer state
so
expensive?

I don't object per se, but it looks like an unusual thing to optimize
for.



set_framebuffer_state is basically a memory barrier. We have different
caches between FB and textures and we have to flush them when a
texture is unbound from the framebuffer and set as a sampler view. To
keep thing simple, set_framebuffer_state is the barrier. When we
change the blend state, the barrier is avoided. Note that the barrier
makes set_framebuffer_state a function that is always GPU-bound.




I see.

And you're sure that the incoming set_framebuffer_state are not spurious?

I know cso_context always eliminates redundant
pipe_context::set_framebuffer_state calls, but it is perhaps possible
that
Mesa state tracker is reseting the framebuffer state with different
surface
views, but that in practice are exactly the same as the previous one?

Like I said, it seems odd apps are doing this: it doesn't make much sense
to
me to change colorspace of the fragments between draws. (Unless some of
the
assets are already in SRGB and the app is trying to be too smart for its
own
good to avoid the sRGB->RGB->sRGB.)  It seems much more likely that these
framebuffer state changes are self-inflicted some where in our stack,
than
something truly demanded by the app.

And if that's the case and we can fix it, then it would be a better
solution
all around.



Yeah the funny part and the reason is that we have a microbenchmark in
piglit (drawoverhead) changing this state between draw calls. :)

Marek



I couldn't find that piglit microbenchmark.  mesademos has
src/perf/drawoverhead.c but it doesn't set GL_SRGB_WRITE.  So if fbo is
changing internally, then it's a perf bug in Mesa state tracker.

Unless it's mimicking something that real apps do, then it's probably better
to fix the microbenchmark to use a more realistic tests.


If you build piglit, it's in bin/drawoverhead.

You're right that this subtest (switching GL_FRAMEBUFFER_SRGB) is
rather artificial and fairly unlikely to occur with real apps.


FYI, I'm dropping this series and I don't have it in my repo anymore.
piglit/drawoverhead will be updated not to test this state change.

Marek


Great.

BTW, I'm not sure what's a good state to change in such microbenchmark.

There is of course, a myriad of states to pick, but they are not all the 
same: performance can vary wildly depending on the choice.   I'm not 
sure what's a good representative state change in such circumstances 
Perhaps toggling between two texture objects? Or some sampler state?


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] radeonsi: reduce overhead for resident textures which need color decompression

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoiset
 wrote:
> This is done by introducing a separate list.
>
> si_decompress_textures() is now 5x faster.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_blit.c| 21 +++--
>  src/gallium/drivers/radeonsi/si_descriptors.c | 64 
> ---
>  src/gallium/drivers/radeonsi/si_pipe.c|  4 ++
>  src/gallium/drivers/radeonsi/si_pipe.h|  4 +-
>  4 files changed, 59 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
> b/src/gallium/drivers/radeonsi/si_blit.c
> index 06a99fbc8a2..b240c4d355e 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -693,18 +693,13 @@ static void si_check_render_feedback(struct si_context 
> *sctx)
>
>  static void si_decompress_resident_textures(struct si_context *sctx)
>  {
> -   util_dynarray_foreach(>resident_tex_handles,
> +   util_dynarray_foreach(>resident_tex_needs_color_decompress,
>   struct si_texture_handle *, tex_handle) {
> struct pipe_sampler_view *view = (*tex_handle)->view;
> -   struct si_sampler_view *sview = (struct si_sampler_view 
> *)view;
> struct r600_texture *tex = (struct r600_texture 
> *)view->texture;
>
> -   if (view->texture->target == PIPE_BUFFER)
> -   continue;
> -
> -   if ((*tex_handle)->needs_color_decompress)
> -   si_decompress_color_texture(sctx, tex, 
> view->u.tex.first_level,
> -   view->u.tex.last_level);
> +   si_decompress_color_texture(sctx, tex, 
> view->u.tex.first_level,
> +   view->u.tex.last_level);
> }
>
> util_dynarray_foreach(>resident_tex_needs_depth_decompress,
> @@ -722,17 +717,13 @@ static void si_decompress_resident_textures(struct 
> si_context *sctx)
>
>  static void si_decompress_resident_images(struct si_context *sctx)
>  {
> -   util_dynarray_foreach(>resident_img_handles,
> +   util_dynarray_foreach(>resident_img_needs_color_decompress,
>   struct si_image_handle *, img_handle) {
> struct pipe_image_view *view = &(*img_handle)->view;
> struct r600_texture *tex = (struct r600_texture 
> *)view->resource;
>
> -   if (view->resource->target == PIPE_BUFFER)
> -   continue;
> -
> -   if ((*img_handle)->needs_color_decompress)
> -   si_decompress_color_texture(sctx, tex, 
> view->u.tex.level,
> -   view->u.tex.level);
> +   si_decompress_color_texture(sctx, tex, view->u.tex.level,
> +   view->u.tex.level);
> }
>  }
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index a8f54e0714a..f9e87530330 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1617,29 +1617,41 @@ static void si_set_polygon_stipple(struct 
> pipe_context *ctx,
>  static void
>  si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
>  {
> +

needless new line?

For the series:

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] i965/gen6: Use isl for hiz

2017-06-14 Thread Pohjolainen, Topi
On Wed, Jun 14, 2017 at 10:18:18AM +0300, Pohjolainen, Topi wrote:
> On Tue, Jun 13, 2017 at 04:20:02PM -0700, Jason Ekstrand wrote:
> > On Tue, Jun 13, 2017 at 4:14 PM, Jason Ekstrand 
> > wrote:
> > 
> > > On Tue, Jun 13, 2017 at 7:53 AM, Topi Pohjolainen <
> > > topi.pohjolai...@gmail.com> wrote:
> > >
> > >> Signed-off-by: Topi Pohjolainen 
> > >> ---
> > >>  src/mesa/drivers/dri/i965/brw_blorp.c |  9 +++--
> > >>  src/mesa/drivers/dri/i965/gen6_depth_state.c  | 12 +++
> > >>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 50
> > >> ++-
> > >>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  7 ++--
> > >>  4 files changed, 39 insertions(+), 39 deletions(-)
> > >>
> > >> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
> > >> b/src/mesa/drivers/dri/i965/brw_blorp.c
> > >> index 4bc53b76b5..b722454703 100644
> > >> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> > >> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> > >> @@ -165,8 +165,13 @@ blorp_surf_for_miptree(struct brw_context *brw,
> > >>
> > >> surf->aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
> > >>
> > >> -   struct isl_surf *aux_surf = _surfs[1];
> > >> -   intel_miptree_get_aux_isl_surf(brw, mt, surf->aux_usage, aux_surf);
> > >> +   struct isl_surf *aux_surf;
> > >> +   if (brw->gen == 6 && mt->hiz_buf) {
> > >> +  aux_surf = >hiz_buf->aux_base.surf;
> > >> +   } else {
> > >> +  aux_surf = _surfs[1];
> > >> +  intel_miptree_get_aux_isl_surf(brw, mt, surf->aux_usage,
> > >> aux_surf);
> > >>
> > >
> > > This is a bit awkward.  Maybe just make intel_miptree_get_aux_isl_surf
> > > return the surf from hiz_buf on gen6?  Not that it matters much since I
> > > have a feeling this is all going away in the future.
> 
> I'd like to keep intel_miptree_get_aux_isl_surf() unchanged, I'm throwing it
> out later and it is clearer when I don't need to move anything back from it.
> 
> > >
> > >
> > >> +   }
> > >>
> > >> if (wants_resolve) {
> > >>bool supports_aux = surf->aux_usage != ISL_AUX_USAGE_NONE &&
> > >> diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c
> > >> b/src/mesa/drivers/dri/i965/gen6_depth_state.c
> > >> index 0d8785db65..0f5e4d3201 100644
> > >> --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
> > >> +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
> > >> @@ -165,18 +165,14 @@ gen6_emit_depth_stencil_hiz(struct brw_context
> > >> *brw,
> > >>/* Emit hiz buffer. */
> > >>if (hiz) {
> > >>   assert(depth_mt);
> > >> - struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
> > >>
> > >> - assert(hiz_mt->array_layout == GEN6_HIZ_STENCIL);
> > >> -
> > >> - const uint32_t offset = intel_miptree_get_aligned_offset(
> > >> -hiz_mt,
> > >> -hiz_mt->level[lod].level_x,
> > >> -hiz_mt->level[lod].level_y);
> > >> + uint32_t offset;
> > >> + isl_surf_get_image_offset_B_tile_sa(_mt->hiz_buf->aux
> > >> _base.surf,
> > >> + lod, 0, 0, , NULL,
> > >> NULL);
> > >>
> > >>  BEGIN_BATCH(3);
> > >>  OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
> > >> -OUT_BATCH(depth_mt->hiz_buf->aux_base.pitch - 1);
> > >> +OUT_BATCH(depth_mt->hiz_buf->aux_base.surf.row_pitch - 1);
> > >>  OUT_RELOC(depth_mt->hiz_buf->aux_base.bo,
> > >>I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> > >>offset);
> > >> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > >> index d87dbfaacd..4dbf853eee 100644
> > >> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > >> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > >> @@ -1058,10 +1058,7 @@ intel_miptree_hiz_buffer_free(struct
> > >> intel_miptree_hiz_buffer *hiz_buf)
> > >> if (hiz_buf == NULL)
> > >>return;
> > >>
> > >> -   if (hiz_buf->mt)
> > >> -  intel_miptree_release(_buf->mt);
> > >> -   else
> > >> -  brw_bo_unreference(hiz_buf->aux_base.bo);
> > >> +   brw_bo_unreference(hiz_buf->aux_base.bo);
> > >>
> > >> free(hiz_buf);
> > >>  }
> > >> @@ -2007,34 +2004,39 @@ intel_hiz_miptree_buf_create(struct brw_context
> > >> *brw,
> > >>   struct intel_mipmap_tree *mt)
> > >>  {
> > >> struct intel_miptree_hiz_buffer *buf = calloc(sizeof(*buf), 1);
> > >> -   uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
> > >> +   if (!buf)
> > >> +  return NULL;
> > >>
> > >> -   if (brw->gen == 6)
> > >> -  layout_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL;
> > >> +   struct isl_surf_init_info init_info = {
> > >> +  .dim = get_isl_surf_dim(mt->target),
> > >> +  .format = ISL_FORMAT_HIZ,
> > >> +  .width = mt->logical_width0,
> > >> +  

Re: [Mesa-dev] [Mesa-stable] [PATCH 08/11] i965/blorp: Do a depth flush/stall prior to HiZ operations

2017-06-14 Thread Emil Velikov
On 14 June 2017 at 18:15, Jason Ekstrand  wrote:
> On Wed, Jun 14, 2017 at 10:12 AM, Jason Ekstrand 
> wrote
>>
>> On Wed, Jun 14, 2017 at 3:51 AM, Emil Velikov 
>> wrote:
>>>
>>> Hi Jason,
>>>
>>> On 7 June 2017 at 06:00, Jason Ekstrand  wrote:
>>> > Without this stall, the test group ES3-CTS.functional.fbo.msaa.\* hangs
>>> > about 1 out of every 2 or 3 times on my Sky Lake GT3 laptop.  With the
>>> > flush and stall, I can run it 6 times in a row without a hang.
>>> >
>>> > Cc: "17.1" 
>>> > ---
>>> >  src/mesa/drivers/dri/i965/brw_blorp.c | 17 +
>>> >  1 file changed, 17 insertions(+)
>>> >
>>> Doesn't seem like this patch has landed, has it? Or perhaps it's
>>> superseded by another commit?
>>
>>
>> No, I just have too much in flight and need to actually push my patches.
>> :-)  I'll try and get this landed today.
>
>
> Scratch that... It was superseded by
> acbd02450bfd53f61bbe468a6f0e8bf5e4507095 and friends.
>
Great, I've already have it (& Co) in the queue.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/tests: add GL_ARB_bindless_texture entry points

2017-06-14 Thread Samuel Pitoiset

See https://lists.freedesktop.org/archives/mesa-dev/2017-June/159273.html.

Thanks.

On 06/14/2017 09:33 PM, Ian Romanick wrote:

From: Ian Romanick 

Should have been part of commit 5f249b9 "mapi: add
GL_ARB_bindless_texture entry points"

Signed-off-by: Ian Romanick 
Cc: Samuel Pitoiset 
Cc: Nicolai Hähnle 
Cc: Mark Janes 
---
  src/mesa/main/tests/dispatch_sanity.cpp | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index 47d0aa6..408c813 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -965,6 +965,24 @@ const struct function common_desktop_functions_possible[] 
= {
 { "glBufferPageCommitmentARB", 43, -1 },
 { "glNamedBufferPageCommitmentARB", 43, -1 },
  
+   /* GL_ARB_bindless_texture */

+   { "glGetTextureHandleARB", 45, -1 },
+   { "glGetTextureSamplerHandleARB", 45, -1 },
+   { "glMakeTextureHandleResidentARB", 45, -1 },
+   { "glMakeTextureHandleNonResidentARB", 45, -1 },
+   { "glGetImageHandleARB", 45, -1 },
+   { "glMakeImageHandleResidentARB", 45, -1 },
+   { "glMakeImageHandleNonResidentARB", 45, -1 },
+   { "glUniformHandleui64ARB", 45, -1 },
+   { "glUniformHandleui64vARB", 45, -1 },
+   { "glProgramUniformHandleui64ARB", 45, -1 },
+   { "glProgramUniformHandleui64vARB", 45, -1 },
+   { "glIsTextureHandleResidentARB", 45, -1 },
+   { "glIsImageHandleResidentARB", 45, -1 },
+   { "glVertexAttribL1ui64ARB", 45, -1 },
+   { "glVertexAttribL1ui64vARB", 45, -1 },
+   { "glGetVertexAttribLui64vARB", 45, -1 },
+
 { NULL, 0, -1 }
  };
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa/tests: add GL_ARB_bindless_texture entry points

2017-06-14 Thread Ian Romanick
From: Ian Romanick 

Should have been part of commit 5f249b9 "mapi: add
GL_ARB_bindless_texture entry points"

Signed-off-by: Ian Romanick 
Cc: Samuel Pitoiset 
Cc: Nicolai Hähnle 
Cc: Mark Janes 
---
 src/mesa/main/tests/dispatch_sanity.cpp | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index 47d0aa6..408c813 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -965,6 +965,24 @@ const struct function common_desktop_functions_possible[] 
= {
{ "glBufferPageCommitmentARB", 43, -1 },
{ "glNamedBufferPageCommitmentARB", 43, -1 },
 
+   /* GL_ARB_bindless_texture */
+   { "glGetTextureHandleARB", 45, -1 },
+   { "glGetTextureSamplerHandleARB", 45, -1 },
+   { "glMakeTextureHandleResidentARB", 45, -1 },
+   { "glMakeTextureHandleNonResidentARB", 45, -1 },
+   { "glGetImageHandleARB", 45, -1 },
+   { "glMakeImageHandleResidentARB", 45, -1 },
+   { "glMakeImageHandleNonResidentARB", 45, -1 },
+   { "glUniformHandleui64ARB", 45, -1 },
+   { "glUniformHandleui64vARB", 45, -1 },
+   { "glProgramUniformHandleui64ARB", 45, -1 },
+   { "glProgramUniformHandleui64vARB", 45, -1 },
+   { "glIsTextureHandleResidentARB", 45, -1 },
+   { "glIsImageHandleResidentARB", 45, -1 },
+   { "glVertexAttribL1ui64ARB", 45, -1 },
+   { "glVertexAttribL1ui64vARB", 45, -1 },
+   { "glGetVertexAttribLui64vARB", 45, -1 },
+
{ NULL, 0, -1 }
 };
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 9:08 PM, Samuel Pitoiset
 wrote:
>
>
> On 06/14/2017 08:24 PM, Marek Olšák wrote:
>>
>> On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoiset
>>  wrote:
>>>
>>> Signed-off-by: Samuel Pitoiset 
>>> ---
>>>   src/util/u_dynarray.h | 6 ++
>>>   1 file changed, 6 insertions(+)
>>>
>>> diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
>>> index 57f96ff79c0..cc316323f28 100644
>>> --- a/src/util/u_dynarray.h
>>> +++ b/src/util/u_dynarray.h
>>> @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf)
>>>  }
>>>   }
>>>
>>> +static inline void
>>> +util_dynarray_clear(struct util_dynarray *buf)
>>
>>
>> util_dynarray_init? "clear" sounds like "free".
>>
>
> There is already util_dynarray_init(), but it resets all fields to 0
> including 'capacity', same for util_dynarray_fini().
>
> The idea behind util_dynarray_clear() is to avoid extra reallocations.
>
> Though, the number of color/depth textures which need decompression is most
> likely small, so using util_dynarray_fini() shouldn't impact too much. What
> do you think?

"clear" sounds good then.

Marek

>
>
>> Marek
>>
>>> +{
>>> +   buf->size = 0;
>>> +}
>>> +
>>>   #define DYN_ARRAY_INITIAL_SIZE 64
>>>
>>>   /* use util_dynarray_trim to reduce the allocated storage */
>>> --
>>> 2.13.1
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/radeon: fix initialization of new resource bindless fields

2017-06-14 Thread Samuel Pitoiset
r600_resource objects are not calloc'd.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index fb74b45d2fa..5336f55cb57 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -110,6 +110,8 @@ void r600_init_resource_fields(struct r600_common_screen 
*rscreen,
res->bo_size = size;
res->bo_alignment = alignment;
res->flags = 0;
+   res->texture_handle_allocated = false;
+   res->image_handle_allocated = false;
 
switch (res->b.b.usage) {
case PIPE_USAGE_STREAM:
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper

2017-06-14 Thread Samuel Pitoiset



On 06/14/2017 08:24 PM, Marek Olšák wrote:

On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoiset
 wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/util/u_dynarray.h | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
index 57f96ff79c0..cc316323f28 100644
--- a/src/util/u_dynarray.h
+++ b/src/util/u_dynarray.h
@@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf)
 }
  }

+static inline void
+util_dynarray_clear(struct util_dynarray *buf)


util_dynarray_init? "clear" sounds like "free".



There is already util_dynarray_init(), but it resets all fields to 0 
including 'capacity', same for util_dynarray_fini().


The idea behind util_dynarray_clear() is to avoid extra reallocations.

Though, the number of color/depth textures which need decompression is 
most likely small, so using util_dynarray_fini() shouldn't impact too 
much. What do you think?



Marek


+{
+   buf->size = 0;
+}
+
  #define DYN_ARRAY_INITIAL_SIZE 64

  /* use util_dynarray_trim to reduce the allocated storage */
--
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/11] i965: Use blorp for depth/stencil clears on gen6+

2017-06-14 Thread Pohjolainen, Topi
On Tue, Jun 06, 2017 at 10:00:06PM -0700, Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c | 106 
> ++
>  src/mesa/drivers/dri/i965/brw_blorp.h |   4 ++
>  src/mesa/drivers/dri/i965/brw_clear.c |   6 ++
>  3 files changed, 116 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 38925d9..a46b624 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -930,6 +930,112 @@ brw_blorp_clear_color(struct brw_context *brw, struct 
> gl_framebuffer *fb,
>  }
>  
>  void
> +brw_blorp_clear_depth_stencil(struct brw_context *brw,
> +  struct gl_framebuffer *fb,
> +  GLbitfield mask, bool partial_clear)
> +{
> +   const struct gl_context *ctx = >ctx;
> +   struct gl_renderbuffer *depth_rb =
> +  fb->Attachment[BUFFER_DEPTH].Renderbuffer;
> +   struct gl_renderbuffer *stencil_rb =
> +  fb->Attachment[BUFFER_STENCIL].Renderbuffer;
> +
> +   if (!depth_rb || ctx->Depth.Mask == GL_FALSE)
> +  mask &= ~BUFFER_BIT_DEPTH;
> +
> +   if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0)
> +  mask &= ~BUFFER_BIT_STENCIL;
> +
> +   if (!(mask & (BUFFER_BITS_DEPTH_STENCIL)))
> +  return;
> +
> +   uint32_t x0, x1, y0, y1, rb_name, rb_height;
> +   if (depth_rb) {
> +  rb_name = depth_rb->Name;
> +  rb_height = depth_rb->Height;
> +  if (stencil_rb) {
> + assert(depth_rb->Width == stencil_rb->Width);
> + assert(depth_rb->Height == stencil_rb->Height);
> +  }
> +   } else {
> +  assert(stencil_rb);
> +  rb_name = stencil_rb->Name;
> +  rb_height = stencil_rb->Height;
> +   }
> +
> +   x0 = fb->_Xmin;
> +   x1 = fb->_Xmax;
> +   if (rb_name != 0) {
> +  y0 = fb->_Ymin;
> +  y1 = fb->_Ymax;
> +   } else {
> +  y0 = rb_height - fb->_Ymax;
> +  y1 = rb_height - fb->_Ymin;
> +   }
> +
> +   /* If the clear region is empty, just return. */
> +   if (x0 == x1 || y0 == y1)
> +  return;
> +
> +   unsigned level, layer, num_layers;
> +   struct isl_surf isl_tmp[4];
> +   struct blorp_surf depth_surf, stencil_surf;
> +
> +   if (mask & BUFFER_BIT_DEPTH) {
> +  struct intel_renderbuffer *irb = intel_renderbuffer(depth_rb);
> +  struct intel_mipmap_tree *depth_mt =
> + find_miptree(GL_DEPTH_BUFFER_BIT, irb);
> +
> +  level = irb->mt_level;
> +  layer = irb_logical_mt_layer(irb);
> +  num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
> +
> +  intel_miptree_set_all_slices_need_depth_resolve(depth_mt, level);
> +
> +  unsigned depth_level = level;
> +  blorp_surf_for_miptree(brw, _surf, depth_mt, true,
> + (1 << ISL_AUX_USAGE_HIZ),
> + _level, layer, num_layers, _tmp[0]);
> +  assert(depth_level == level);
> +   }
> +
> +   uint8_t stencil_mask = 0;
> +   if (mask & BUFFER_BIT_STENCIL) {
> +  struct intel_renderbuffer *irb = intel_renderbuffer(stencil_rb);
> +  struct intel_mipmap_tree *stencil_mt =
> + find_miptree(GL_STENCIL_BUFFER_BIT, irb);
> +
> +  if (mask & BUFFER_BIT_DEPTH) {
> + assert(level == irb->mt_level);
> + assert(layer == irb_logical_mt_layer(irb));
> + assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1);
> +  } else {
> + level = irb->mt_level;
> + layer = irb_logical_mt_layer(irb);
> + num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
> +  }
> +
> +  stencil_mask = ctx->Stencil.WriteMask[0] & 0xff;
> +
> +  unsigned stencil_level = level;
> +  blorp_surf_for_miptree(brw, _surf, stencil_mt, true,
> + (1 << ISL_AUX_USAGE_HIZ),

Why do we set hiz for stencil?

I noticed that anv_blorp.c::anv_CmdClearDepthStencilImage() sets it to NONE
for depth and stencil while get_blorp_surf_for_anv_image() has code to take
the HIZ usage away for stencil (if given).


Otherwise looks good to me:

Reviewed-by: Topi Pohjolainen 

> + _level, layer, num_layers, _tmp[2]);
> +   }
> +
> +   assert((mask & BUFFER_BIT_DEPTH) || stencil_mask);
> +
> +   struct blorp_batch batch;
> +   blorp_batch_init(>blorp, , brw, 0);
> +   blorp_clear_depth_stencil(, _surf, _surf,
> + level, layer, num_layers,
> + x0, y0, x1, y1,
> + (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear,
> + stencil_mask, ctx->Stencil.Clear);
> +   blorp_batch_finish();
> +}
> +
> +void
>  brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree 
> *mt,
>  unsigned level, unsigned layer)
>  {
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
> b/src/mesa/drivers/dri/i965/brw_blorp.h
> index 8743d96..868301f 100644
> --- 

Re: [Mesa-dev] [PATCH 01/15] i965/miptree: Refactor mapping table alloc

2017-06-14 Thread Pohjolainen, Topi
On Tue, Jun 13, 2017 at 04:31:26PM -0700, Nanley Chery wrote:
> On Tue, Jun 13, 2017 at 05:49:59PM +0300, Topi Pohjolainen wrote:
> > Signed-off-by: Topi Pohjolainen 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 
> > +--
> >  1 file changed, 27 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 253d833b13..78a223a7f3 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -285,6 +285,26 @@ intel_depth_format_for_depthstencil_format(mesa_format 
> > format) {
> > }
> >  }
> >  
> > +static bool
> > +create_mapping_table(GLenum target, unsigned first_level, unsigned 
> > last_level,
> > + unsigned depth0, struct intel_mipmap_level *table)
> > +{
> > +   for (unsigned level = first_level; level <= last_level; level++) {
> > +  const unsigned d = target == GL_TEXTURE_3D ? depth0 >> level : 
> > depth0;
> 
> There's a bug here. If the target is GL_TEXTURE_3D we should
> minify(depth0, level) to avoid setting a depth of 0.

Oops, definitely.

> 
> This seems to be more than a refactor. Prior to this patch,
> brw_miptree_layout_gen6_hiz_stencil wouldn't shrink the number of slices
> per mipmap level as the level increases, but does so now.

Right. I actually missed that. How do want to handle that? I could write a
patch against brw_miptree_layout_gen6_hiz_stencil() doing the same thing there
(modifying the argument given to intel_miptree_set_level_info() but keeping
actual allocation size as it was in order to have space for level 0 qpitch).

> 
> -Nanley
> 
> > +
> > +  table[level].slice = calloc(d, sizeof(*table[0].slice));
> > +  if (!table[level].slice)
> > + goto unwind;
> > +   }
> > +
> > +   return true;
> > +
> > +unwind:
> > +   for (unsigned level = first_level; level <= last_level; level++)
> > +  free(table[level].slice);
> > +
> > +   return false;
> > +}
> >  
> >  /**
> >   * @param for_bo Indicates that the caller is
> > @@ -424,6 +444,12 @@ intel_miptree_create_layout(struct brw_context *brw,
> >}
> > }
> >  
> > +   if (!create_mapping_table(target, first_level, last_level, depth0,
> > + mt->level)) {
> > +  free(mt);
> > +  return NULL;
> > +   }
> > +
> > /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 
> > can
> >  * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on
> >  * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is 
> > still
> > @@ -1103,9 +1129,8 @@ intel_miptree_set_level_info(struct intel_mipmap_tree 
> > *mt,
> > DBG("%s level %d, depth %d, offset %d,%d\n", __func__,
> > level, d, x, y);
> >  
> > -   assert(mt->level[level].slice == NULL);
> > +   assert(mt->level[level].slice);
> >  
> > -   mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
> > mt->level[level].slice[0].x_offset = mt->level[level].level_x;
> > mt->level[level].slice[0].y_offset = mt->level[level].level_y;
> >  }
> > -- 
> > 2.11.0
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper

2017-06-14 Thread Thomas Helland
2017-06-14 13:55 GMT+02:00 Samuel Pitoiset :
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/util/u_dynarray.h | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
> index 57f96ff79c0..cc316323f28 100644
> --- a/src/util/u_dynarray.h
> +++ b/src/util/u_dynarray.h
> @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf)
> }
>  }
>
> +static inline void
> +util_dynarray_clear(struct util_dynarray *buf)
> +{
> +   buf->size = 0;
> +}
> +

The indentation looks a bit off here? Also, I agree with Marek;
clear indicates to me that the memory is either freed or that
it is cleared to NULL.

>  #define DYN_ARRAY_INITIAL_SIZE 64
>
>  /* use util_dynarray_trim to reduce the allocated storage */
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/15] i965: Prepare slice validator for isl based miptrees

2017-06-14 Thread Pohjolainen, Topi
On Wed, Jun 14, 2017 at 11:28:39AM -0700, Nanley Chery wrote:
> On Wed, Jun 14, 2017 at 10:36:16AM +0300, Pohjolainen, Topi wrote:
> > On Tue, Jun 13, 2017 at 05:26:52PM -0700, Nanley Chery wrote:
> > > On Tue, Jun 13, 2017 at 05:50:04PM +0300, Topi Pohjolainen wrote:
> > > > Signed-off-by: Topi Pohjolainen 
> > > > ---
> > > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 +++-
> > > >  1 file changed, 7 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > > > index 8479b285cb..0b85bc12ef 100644
> > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > > > @@ -748,7 +748,13 @@ intel_miptree_check_level_layer(const struct 
> > > > intel_mipmap_tree *mt,
> > > >  
> > > > assert(level >= mt->first_level);
> > > > assert(level <= mt->last_level);
> > > > -   assert(layer < mt->level[level].depth);
> > > > +
> > > > +   if (mt->surf.size > 0)
> > > > +  assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ?
> > > > + mt->surf.phys_level0_sa.depth :
> > > > + mt->surf.phys_level0_sa.array_len));
> > > 
> > > Did you mean to access mt->surf.logical_level0_px here?
> > 
> > I was just about to say that "Actually no, mt->level[level].depth represents
> > the number of physical layers." 
> 
> You may be right. I just expected the other field would be accessed
> because in the previous patch you access logical_level0_px instead of
> phys_level0_sa. I'm not very experienced with this code so I may have
> missed some detail.

Funny, I was just double checking this myself :) Things actually are as they
used to be, "mt->level[level].depth" is still based on physical depth. It
will go away altogether once I'm done with color surfaces.

> 
> > But now reading the current logic I remembered
> > that Jason just recently changed all that. This is based on the way it was
> > before. Thanks Nanley!
> > 
> > > 
> > > -Nanley
> > > 
> > > > +   else
> > > > +  assert(layer < mt->level[level].depth);
> > > >  }
> > > >  
> > > >  void intel_miptree_reference(struct intel_mipmap_tree **dst,
> > > > -- 
> > > > 2.11.0
> > > > 
> > > > ___
> > > > mesa-dev mailing list
> > > > mesa-dev@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/15] i965: Prepare slice validator for isl based miptrees

2017-06-14 Thread Nanley Chery
On Wed, Jun 14, 2017 at 10:36:16AM +0300, Pohjolainen, Topi wrote:
> On Tue, Jun 13, 2017 at 05:26:52PM -0700, Nanley Chery wrote:
> > On Tue, Jun 13, 2017 at 05:50:04PM +0300, Topi Pohjolainen wrote:
> > > Signed-off-by: Topi Pohjolainen 
> > > ---
> > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 +++-
> > >  1 file changed, 7 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > > index 8479b285cb..0b85bc12ef 100644
> > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > > @@ -748,7 +748,13 @@ intel_miptree_check_level_layer(const struct 
> > > intel_mipmap_tree *mt,
> > >  
> > > assert(level >= mt->first_level);
> > > assert(level <= mt->last_level);
> > > -   assert(layer < mt->level[level].depth);
> > > +
> > > +   if (mt->surf.size > 0)
> > > +  assert(layer < (mt->surf.dim == ISL_SURF_DIM_3D ?
> > > + mt->surf.phys_level0_sa.depth :
> > > + mt->surf.phys_level0_sa.array_len));
> > 
> > Did you mean to access mt->surf.logical_level0_px here?
> 
> I was just about to say that "Actually no, mt->level[level].depth represents
> the number of physical layers." 

You may be right. I just expected the other field would be accessed
because in the previous patch you access logical_level0_px instead of
phys_level0_sa. I'm not very experienced with this code so I may have
missed some detail.

> But now reading the current logic I remembered
> that Jason just recently changed all that. This is based on the way it was
> before. Thanks Nanley!
> 
> > 
> > -Nanley
> > 
> > > +   else
> > > +  assert(layer < mt->level[level].depth);
> > >  }
> > >  
> > >  void intel_miptree_reference(struct intel_mipmap_tree **dst,
> > > -- 
> > > 2.11.0
> > > 
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa/util: add util_dynarray_clear() helper

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 1:55 PM, Samuel Pitoiset
 wrote:
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/util/u_dynarray.h | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
> index 57f96ff79c0..cc316323f28 100644
> --- a/src/util/u_dynarray.h
> +++ b/src/util/u_dynarray.h
> @@ -68,6 +68,12 @@ util_dynarray_fini(struct util_dynarray *buf)
> }
>  }
>
> +static inline void
> +util_dynarray_clear(struct util_dynarray *buf)

util_dynarray_init? "clear" sounds like "free".

Marek

> +{
> +   buf->size = 0;
> +}
> +
>  #define DYN_ARRAY_INITIAL_SIZE 64
>
>  /* use util_dynarray_trim to reduce the allocated storage */
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/radeon: add a new HUD query for the number of resident handles

2017-06-14 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jun 14, 2017 at 11:40 AM, Samuel Pitoiset
 wrote:
> Useful for debugging performance issues when ARB_bindless_texture
> is enabled. This query doesn't make a distinction between texture
> and image handles.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
>  src/gallium/drivers/radeon/r600_query.c   | 7 +++
>  src/gallium/drivers/radeon/r600_query.h   | 1 +
>  src/gallium/drivers/radeonsi/si_descriptors.c | 3 +++
>  4 files changed, 12 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 45ed5bab74b..006b795fe29 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -598,6 +598,7 @@ struct r600_common_context {
> unsignednum_fb_cache_flushes;
> unsignednum_L2_invalidates;
> unsignednum_L2_writebacks;
> +   unsignednum_resident_handles;
> uint64_tnum_alloc_tex_transfer_bytes;
> unsignedlast_tex_ps_draw_ratio; /* for query 
> */
>
> diff --git a/src/gallium/drivers/radeon/r600_query.c 
> b/src/gallium/drivers/radeon/r600_query.c
> index bce43171e30..8bd94e6a940 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -134,6 +134,9 @@ static bool r600_query_sw_begin(struct 
> r600_common_context *rctx,
> case R600_QUERY_NUM_L2_WRITEBACKS:
> query->begin_result = rctx->num_L2_writebacks;
> break;
> +   case R600_QUERY_NUM_RESIDENT_HANDLES:
> +   query->begin_result = rctx->num_resident_handles;
> +   break;
> case R600_QUERY_TC_OFFLOADED_SLOTS:
> query->begin_result = rctx->tc ? 
> rctx->tc->num_offloaded_slots : 0;
> break;
> @@ -276,6 +279,9 @@ static bool r600_query_sw_end(struct r600_common_context 
> *rctx,
> case R600_QUERY_NUM_L2_WRITEBACKS:
> query->end_result = rctx->num_L2_writebacks;
> break;
> +   case R600_QUERY_NUM_RESIDENT_HANDLES:
> +   query->end_result = rctx->num_resident_handles;
> +   break;
> case R600_QUERY_TC_OFFLOADED_SLOTS:
> query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots 
> : 0;
> break;
> @@ -1834,6 +1840,7 @@ static struct pipe_driver_query_info 
> r600_driver_query_list[] = {
> X("num-fb-cache-flushes",   NUM_FB_CACHE_FLUSHES,   UINT64, 
> AVERAGE),
> X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, 
> AVERAGE),
> X("num-L2-writebacks",  NUM_L2_WRITEBACKS,  UINT64, 
> AVERAGE),
> +   X("num-resident-handles",   NUM_RESIDENT_HANDLES,   UINT64, 
> AVERAGE),
> X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, 
> AVERAGE),
> X("tc-direct-slots",TC_DIRECT_SLOTS,UINT64, 
> AVERAGE),
> X("tc-num-syncs",   TC_NUM_SYNCS,   UINT64, 
> AVERAGE),
> diff --git a/src/gallium/drivers/radeon/r600_query.h 
> b/src/gallium/drivers/radeon/r600_query.h
> index ed607ec199b..9e6617f342a 100644
> --- a/src/gallium/drivers/radeon/r600_query.h
> +++ b/src/gallium/drivers/radeon/r600_query.h
> @@ -54,6 +54,7 @@ enum {
> R600_QUERY_NUM_FB_CACHE_FLUSHES,
> R600_QUERY_NUM_L2_INVALIDATES,
> R600_QUERY_NUM_L2_WRITEBACKS,
> +   R600_QUERY_NUM_RESIDENT_HANDLES,
> R600_QUERY_TC_OFFLOADED_SLOTS,
> R600_QUERY_TC_DIRECT_SLOTS,
> R600_QUERY_TC_NUM_SYNCS,
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 375bcaea937..41f6e054615 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -2595,6 +2595,9 @@ void si_all_resident_buffers_begin_new_cs(struct 
> si_context *sctx)
>RADEON_USAGE_READWRITE,
>false, false);
> }
> +
> +   sctx->b.num_resident_handles += num_resident_tex_handles +
> +   num_resident_img_handles;
>  }
>
>  /* INIT/DEINIT/UPLOAD */
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 7:27 PM, Marek Olšák  wrote:
> On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri  wrote:
>>
>>
>> On 13/06/17 04:18, Marek Olšák wrote:
>>>
>>> From: Marek Olšák 
>>>
>>> AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI since
>>> border color swizzling is broken there anyway. The only other user of this
>>> code is nouveau.
>>
>>
>> Maybe move this comment into the code as a TODO? I was a little confused at
>> first as I thought this commit was meant to make the change. With that:
>
> I don't understand. What are you confused about?

The commit message talks about radeonsi, but this patch is for
st/mesa. st/mesa doesn't care which drivers use the codepath.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/24] cso: don't track the number of sampler states bound

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 8:03 PM, Marek Olšák  wrote:
> On Wed, Jun 14, 2017 at 9:23 AM, Nicolai Hähnle  wrote:
>> On 12.06.2017 20:18, Marek Olšák wrote:
>>>
>>> From: Marek Olšák 
>>>
>>> This removes 2 loops from hot codepaths and adds 1 loop to a rare codepath
>>> (restore_sampler_states), and makes sanitize_hash() slightly worse.
>>>
>>> Sampler states, when bound, are not unbound for draw calls that don't need
>>> them. That's OK, because bound sampler states don't add any overhead.
>>
>>
>> Is this really always true? They might show up in texture decompression
>> checks.
>
> Sampler states are not sampler views. They don't have any resources
> attached to them.

Also, radeonsi never unbinds sampler states, so this change has no
effect on radeonsi behavior.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/24] cso: don't track the number of sampler states bound

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 9:23 AM, Nicolai Hähnle  wrote:
> On 12.06.2017 20:18, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> This removes 2 loops from hot codepaths and adds 1 loop to a rare codepath
>> (restore_sampler_states), and makes sanitize_hash() slightly worse.
>>
>> Sampler states, when bound, are not unbound for draw calls that don't need
>> them. That's OK, because bound sampler states don't add any overhead.
>
>
> Is this really always true? They might show up in texture decompression
> checks.

Sampler states are not sampler views. They don't have any resources
attached to them.

Marek

>
> Cheers,
> Nicolai
>
>
>
>>
>> This results in lower CPU overhead in most cases.
>> ---
>>   src/gallium/auxiliary/cso_cache/cso_context.c | 59
>> +++
>>   1 file changed, 23 insertions(+), 36 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c
>> b/src/gallium/auxiliary/cso_cache/cso_context.c
>> index 5558385..4947b8e 100644
>> --- a/src/gallium/auxiliary/cso_cache/cso_context.c
>> +++ b/src/gallium/auxiliary/cso_cache/cso_context.c
>> @@ -50,21 +50,20 @@
>>   #include "cso_context.h"
>>   /**
>>* Per-shader sampler information.
>>*/
>>   struct sampler_info
>>   {
>>  struct cso_sampler *cso_samplers[PIPE_MAX_SAMPLERS];
>>  void *samplers[PIPE_MAX_SAMPLERS];
>> -   unsigned nr_samplers;
>>   };
>> struct cso_context {
>>  struct pipe_context *pipe;
>>  struct cso_cache *cache;
>>  struct u_vbuf *vbuf;
>>boolean has_geometry_shader;
>> @@ -76,20 +75,25 @@ struct cso_context {
>>struct pipe_sampler_view
>> *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>>  unsigned nr_fragment_views;
>>struct pipe_sampler_view
>> *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>>  unsigned nr_fragment_views_saved;
>>struct sampler_info fragment_samplers_saved;
>>  struct sampler_info samplers[PIPE_SHADER_TYPES];
>>   +   /* Temporary number until cso_single_sampler_done is called.
>> +* It tracks the highest sampler seen in cso_single_sampler.
>> +*/
>> +   int max_sampler_seen;
>> +
>>  struct pipe_vertex_buffer aux_vertex_buffer_current;
>>  struct pipe_vertex_buffer aux_vertex_buffer_saved;
>>  unsigned aux_vertex_buffer_index;
>>struct pipe_constant_buffer
>> aux_constbuf_current[PIPE_SHADER_TYPES];
>>  struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES];
>>struct pipe_image_view fragment_image0_current;
>>  struct pipe_image_view fragment_image0_saved;
>>   @@ -233,21 +237,21 @@ sanitize_hash(struct cso_hash *hash, enum
>> cso_cache_type type,
>>  if (type == CSO_SAMPLER) {
>> int i, j;
>>   samplers_to_restore = MALLOC(PIPE_SHADER_TYPES *
>> PIPE_MAX_SAMPLERS *
>>  sizeof(*samplers_to_restore));
>>   /* Temporarily remove currently bound sampler states from the
>> hash
>>  * table, to prevent them from being deleted
>>  */
>> for (i = 0; i < PIPE_SHADER_TYPES; i++) {
>> - for (j = 0; j < ctx->samplers[i].nr_samplers; j++) {
>> + for (j = 0; j < PIPE_MAX_SAMPLERS; j++) {
>>   struct cso_sampler *sampler =
>> ctx->samplers[i].cso_samplers[j];
>> if (sampler && cso_hash_take(hash, sampler->hash_key))
>>  samplers_to_restore[to_restore++] = sampler;
>>}
>> }
>>  }
>>iter = cso_hash_first_node(hash);
>>  while (to_remove) {
>> @@ -327,20 +331,21 @@ cso_create_context(struct pipe_context *pipe,
>> unsigned u_vbuf_flags)
>>   PIPE_SHADER_CAP_SUPPORTED_IRS);
>> if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
>>ctx->has_compute_shader = TRUE;
>> }
>>  }
>>  if (pipe->screen->get_param(pipe->screen,
>>  PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0)
>> {
>> ctx->has_streamout = TRUE;
>>  }
>>   +   ctx->max_sampler_seen = -1;
>>  return ctx;
>> out:
>>  cso_destroy_context( ctx );
>>  return NULL;
>>   }
>> /**
>>* Free the CSO context.
>>*/
>> @@ -1223,116 +1228,98 @@ cso_single_sampler(struct cso_context *ctx, enum
>> pipe_shader_type shader_stage,
>>   FREE(cso);
>>   return PIPE_ERROR_OUT_OF_MEMORY;
>>}
>> }
>> else {
>>cso = cso_hash_iter_data(iter);
>> }
>>   ctx->samplers[shader_stage].cso_samplers[idx] = cso;
>> ctx->samplers[shader_stage].samplers[idx] = cso->data;
>> -   } else {
>> -  ctx->samplers[shader_stage].cso_samplers[idx] = NULL;
>> -  ctx->samplers[shader_stage].samplers[idx] = NULL;
>> +  ctx->max_sampler_seen = MAX2(ctx->max_sampler_seen, (int)idx);
>>  }
>>return PIPE_OK;
>>   }
>>   /**
>>* Send staged sampler state to the 

Re: [Mesa-dev] [PATCH 3/3] i965/gen4: Add support for single layer in alignment workaround

2017-06-14 Thread Ian Romanick
On 06/09/2017 07:04 AM, Topi Pohjolainen wrote:
> On gen < 6 one doesn't have level or layer specifiers available
> for render and depth targets. In order to support rendering to
> specific level/layer, driver needs to manually offset the surface
> to the desired slice.
> There are, however, alignment restrictions to respect as well and
> in come cases the only option is to use temporary single slice
> surface which driver copies after rendering to the full miptree.
> 
> Current alignment workaround introduces new texture images which
> are added to the parent texture object. Texture validation later
> on copies the additional levels back to the surface that contains
> the full mipmap.
> This only works for non-arrayed surfaces and driver currently
> creates new arrayed images in vain - individual layers within the
> newly created are still unaligned the same as before.
> 
> This patch drops this mechanism and instead attaches single
> temporary slice into the render buffer. This gets immediately
> copied back to the mipmapped and/or arrayed surface just after
> the render is done.
> 
> Sitting on top of earlier series cleaning up the depth buffer
> state, this patch additionally fixes the following piglit tests:
> 
> ext_texture_array.copyteximage 2d_array.g45m64
> ext_texture_array.copyteximage 1d_array.g45m64
> arb_framebuffer_object.fbo-blit-stretch.g33m64

I wish I had noticed this before I had Mark open a bug for it:

https://bugs.freedesktop.org/show_bug.cgi?id=101414

> ext_framebuffer_object.fbo-cubemap.g965m64
> arb_framebuffer_object.fbo-generatemipmap-cubemap.g965m64
> arb_texture_cube_map.copyteximage cube.g965m64
> ext_texture_array.copyteximage 1d_array.g965m64
> ext_texture_array.copyteximage 2d_array.g965m64
> ext_texture_array.fbo-array.g965m64
> ext_texture_array.gen-mipmap.g965m64
> ext_texture_array.fbo-generatemipmap-array.g965m64
> arb_pixel_buffer_object.texsubimage array pbo.g965m64
> ext_texture_array.copyteximage 2d_array.ilkm64
> ext_texture_array.copyteximage 1d_array.ilkm64
> arb_texture_cube_map.copyteximage cube.ilkm64
> 
> CC: Kenneth Graunke 
> CC: Jason Ekstrand 
> CC: Ian Romanick 
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/brw_draw.c | 51 
> 
>  src/mesa/drivers/dri/i965/brw_misc_state.c   |  4 +-
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  3 +-
>  src/mesa/drivers/dri/i965/intel_fbo.c| 19 +
>  src/mesa/drivers/dri/i965/intel_fbo.h| 24 +++
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c|  2 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  7 
>  7 files changed, 99 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index 611cb86..cb441c3 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -396,6 +396,56 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context 
> *brw)
>  }
>  
>  static void
> +intel_renderbuffer_move_temp_back(struct brw_context *brw,
> +  struct intel_renderbuffer *irb)
> +{
> +   if (irb->align_wa_mt == NULL)
> +  return;
> +
> +   brw_render_cache_set_check_flush(brw, irb->align_wa_mt->bo);
> +
> +   intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0,
> +irb->mt,
> +irb->Base.Base.TexImage->Level, irb->mt_layer);
> +
> +   intel_miptree_reference(>align_wa_mt, NULL);
> +
> +   /* Finally restore the x,y to correspond to full miptree. */
> +   intel_renderbuffer_set_draw_offset(irb);
> +
> +   /* Make sure render surface state gets re-emitted with updated miptree. */
> +   brw->NewGLState |= _NEW_BUFFERS;
> +}
> +
> +static void
> +brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)
> +{
> +   struct gl_context *ctx = >ctx;
> +   struct gl_framebuffer *fb = ctx->DrawBuffer;
> +
> +   struct intel_renderbuffer *depth_irb =
> +  intel_get_renderbuffer(fb, BUFFER_DEPTH);
> +   struct intel_renderbuffer *stencil_irb =
> +  intel_get_renderbuffer(fb, BUFFER_STENCIL);
> +
> +   if (depth_irb && depth_irb->align_wa_mt)
> +  intel_renderbuffer_move_temp_back(brw, depth_irb);
> +
> +   if (stencil_irb && stencil_irb->align_wa_mt)
> +  intel_renderbuffer_move_temp_back(brw, stencil_irb);
> +
> +   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
> +  struct intel_renderbuffer *irb =
> + intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> +
> +  if (!irb || irb->align_wa_mt == NULL)
> + continue;
> +
> +  intel_renderbuffer_move_temp_back(brw, irb);
> +   }
> +}
> +
> +static void
>  brw_predraw_set_aux_buffers(struct brw_context *brw)
>  {
> if (brw->gen < 9)
> @@ -626,6 +676,7 @@ 

Re: [Mesa-dev] [PATCH 2/6] i965: When gl_PointSize is unwritten, default to 1.0 on Gen4-5.

2017-06-14 Thread Rafael Antognolli
Reviewed-by: Rafael Antognolli 

On Wed, May 10, 2017 at 11:47:26AM -0700, Kenneth Graunke wrote:
> Modern GL specifications say that the point size should be 1.0 when
> gl_PointSize is unwritten and the last enabled stage is a geometry
> or tessellation shader.  If it's a vertex shader, though, both the
> GL specs and ES 3.0 spec say that it's undefined - so since Gen4-5
> only support vertex shaders, there's no actual requirement to do this.
> 
> Since there is a cost associated (an extra dirty bit, which may cause
> SF_STATE to be emitted more often), it may not be a good idea.
> 
> The real benefit is that it makes all generations behave identically.
> And that seems somewhat nice...
> ---
>  src/mesa/drivers/dri/i965/brw_sf_state.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c 
> b/src/mesa/drivers/dri/i965/brw_sf_state.c
> index ff6b5ebf79b..d5e586d1bf3 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
> @@ -263,9 +263,8 @@ static void upload_sf_unit( struct brw_context *brw )
> point_sz = CLAMP(point_sz, 0.125f, 255.875f);
> sf->sf7.point_size = U_FIXED(point_sz, 3);
>  
> -   /* _NEW_PROGRAM | _NEW_POINT */
> -   sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
> - ctx->Point._Attenuated);
> +   /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
> +   sf->sf7.use_point_size_state = use_state_point_size(brw);
> sf->sf7.aa_line_distance_mode = brw->is_g4x || brw->gen == 5;
>  
> /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
> @@ -317,6 +316,7 @@ const struct brw_tracked_state brw_sf_unit = {
> BRW_NEW_PROGRAM_CACHE |
> BRW_NEW_SF_PROG_DATA |
> BRW_NEW_SF_VP |
> +   BRW_NEW_VUE_MAP_GEOM_OUT |
> BRW_NEW_URB_FENCE,
> },
> .emit = upload_sf_unit,
> -- 
> 2.12.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/24] st/mesa: optimize sampler state translation code

2017-06-14 Thread Marek Olšák
On Wed, Jun 14, 2017 at 9:15 AM, Nicolai Hähnle  wrote:
> On 12.06.2017 20:18, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/mesa/state_tracker/st_atom_sampler.c | 79
>> +---
>>   1 file changed, 31 insertions(+), 48 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_atom_sampler.c
>> b/src/mesa/state_tracker/st_atom_sampler.c
>> index 9695069..ea231f3 100644
>> --- a/src/mesa/state_tracker/st_atom_sampler.c
>> +++ b/src/mesa/state_tracker/st_atom_sampler.c
>> @@ -51,85 +51,68 @@
>> #include "util/u_format.h"
>>   /**
>>* Convert GLenum texcoord wrap tokens to pipe tokens.
>>*/
>>   static GLuint
>>   gl_wrap_xlate(GLenum wrap)
>>   {
>> -   switch (wrap) {
>> -   case GL_REPEAT:
>> -  return PIPE_TEX_WRAP_REPEAT;
>> -   case GL_CLAMP:
>> -  return PIPE_TEX_WRAP_CLAMP;
>> -   case GL_CLAMP_TO_EDGE:
>> -  return PIPE_TEX_WRAP_CLAMP_TO_EDGE;
>> -   case GL_CLAMP_TO_BORDER:
>> -  return PIPE_TEX_WRAP_CLAMP_TO_BORDER;
>> -   case GL_MIRRORED_REPEAT:
>> -  return PIPE_TEX_WRAP_MIRROR_REPEAT;
>> -   case GL_MIRROR_CLAMP_EXT:
>> -  return PIPE_TEX_WRAP_MIRROR_CLAMP;
>> -   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
>> -  return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
>> -   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
>> -  return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER;
>> -   default:
>> -  assert(0);
>> -  return 0;
>> -   }
>> +   /* Take advantage of how the enums are defined. */
>> +   static const unsigned table[32] = {
>> +  PIPE_TEX_WRAP_CLAMP,
>> +  PIPE_TEX_WRAP_REPEAT,
>> +  PIPE_TEX_WRAP_MIRROR_CLAMP,
>> +  PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE,
>> +  0,
>> +  0,
>> +  0,
>> +  0,
>> +  0,
>> +  0,
>> +  0,
>> +  0,
>> +  0,
>> +  PIPE_TEX_WRAP_CLAMP_TO_BORDER,
>> +  0,
>> +  PIPE_TEX_WRAP_CLAMP_TO_EDGE,
>> +  PIPE_TEX_WRAP_MIRROR_REPEAT,
>> +  0,
>> +  PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER,
>
>
> Please use designated initializers, like
>
>  [GL_REPEAT & 0x1f] = PIPE_TEX_WRAP_CLAMP
>
> etc.

Do designated initializers work with MSVC?

Marek

>
> With this and Timothy's comment on patch 9 fixed, patches 1-10 are
>
> Reviewed-by: Nicolai Hähnle 
>
>
>
>> +   };
>> +
>> +   return table[wrap & 0x1f];
>>   }
>>   static GLuint
>>   gl_filter_to_mip_filter(GLenum filter)
>>   {
>> -   switch (filter) {
>> -   case GL_NEAREST:
>> -   case GL_LINEAR:
>> +   /* Take advantage of how the enums are defined. */
>> +   if (filter <= GL_LINEAR)
>> return PIPE_TEX_MIPFILTER_NONE;
>> -
>> -   case GL_NEAREST_MIPMAP_NEAREST:
>> -   case GL_LINEAR_MIPMAP_NEAREST:
>> +   if (filter <= GL_LINEAR_MIPMAP_NEAREST)
>> return PIPE_TEX_MIPFILTER_NEAREST;
>>   -   case GL_NEAREST_MIPMAP_LINEAR:
>> -   case GL_LINEAR_MIPMAP_LINEAR:
>> -  return PIPE_TEX_MIPFILTER_LINEAR;
>> -
>> -   default:
>> -  assert(0);
>> -  return PIPE_TEX_MIPFILTER_NONE;
>> -   }
>> +   return PIPE_TEX_MIPFILTER_LINEAR;
>>   }
>>   static GLuint
>>   gl_filter_to_img_filter(GLenum filter)
>>   {
>> -   switch (filter) {
>> -   case GL_NEAREST:
>> -   case GL_NEAREST_MIPMAP_NEAREST:
>> -   case GL_NEAREST_MIPMAP_LINEAR:
>> -  return PIPE_TEX_FILTER_NEAREST;
>> -
>> -   case GL_LINEAR:
>> -   case GL_LINEAR_MIPMAP_NEAREST:
>> -   case GL_LINEAR_MIPMAP_LINEAR:
>> +   /* Take advantage of how the enums are defined. */
>> +   if (filter & 1)
>> return PIPE_TEX_FILTER_LINEAR;
>>   -   default:
>> -  assert(0);
>> -  return PIPE_TEX_FILTER_NEAREST;
>> -   }
>> +   return PIPE_TEX_FILTER_NEAREST;
>>   }
>>   /**
>>* Convert a gl_sampler_object to a pipe_sampler_state object.
>>*/
>>   void
>>   st_convert_sampler(const struct st_context *st,
>>  const struct gl_texture_object *texobj,
>>  const struct gl_sampler_object *msamp,
>>
>
>
> --
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/24] st/mesa: sink code needed for apply_texture_swizzle_to_border_color

2017-06-14 Thread Marek Olšák
On Tue, Jun 13, 2017 at 8:10 AM, Timothy Arceri  wrote:
>
>
> On 13/06/17 04:18, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> AMD SI-VI use this. GFX9 doesn't. We can stop doing this for SI-VI since
>> border color swizzling is broken there anyway. The only other user of this
>> code is nouveau.
>
>
> Maybe move this comment into the code as a TODO? I was a little confused at
> first as I thought this commit was meant to make the change. With that:

I don't understand. What are you confused about?

Marek

>
> Reviewed-by: Timothy Arceri 
>
>
>> ---
>>   src/mesa/state_tracker/st_atom_sampler.c | 61
>> +---
>>   1 file changed, 33 insertions(+), 28 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_atom_sampler.c
>> b/src/mesa/state_tracker/st_atom_sampler.c
>> index 9e5d940..9695069 100644
>> --- a/src/mesa/state_tracker/st_atom_sampler.c
>> +++ b/src/mesa/state_tracker/st_atom_sampler.c
>> @@ -170,51 +170,56 @@ st_convert_sampler(const struct st_context *st,
>> sampler->max_lod = sampler->min_lod;
>> sampler->min_lod = tmp;
>> assert(sampler->min_lod <= sampler->max_lod);
>>  }
>>/* For non-black borders... */
>>  if (msamp->BorderColor.ui[0] ||
>>  msamp->BorderColor.ui[1] ||
>>  msamp->BorderColor.ui[2] ||
>>  msamp->BorderColor.ui[3]) {
>> -  const struct st_texture_object *stobj =
>> st_texture_object_const(texobj);
>> const GLboolean is_integer = texobj->_IsIntegerFormat;
>> -  const struct pipe_sampler_view *sv = NULL;
>> -  union pipe_color_union border_color;
>> -  GLuint i;
>> -
>> -  /* Just search for the first used view. We can do this because the
>> - swizzle is per-texture, not per context. */
>> -  /* XXX: clean that up to not use the sampler view at all */
>> -  for (i = 0; i < stobj->num_sampler_views; ++i) {
>> - if (stobj->sampler_views[i]) {
>> -sv = stobj->sampler_views[i];
>> -break;
>> - }
>> -  }
>>   -  if (st->apply_texture_swizzle_to_border_color && sv) {
>> - const unsigned char swz[4] =
>> - {
>> -sv->swizzle_r,
>> -sv->swizzle_g,
>> -sv->swizzle_b,
>> -sv->swizzle_a,
>> - };
>> -
>> - st_translate_color(>BorderColor,
>> -_color,
>> -texBaseFormat, is_integer);
>> +  if (st->apply_texture_swizzle_to_border_color) {
>> + const struct st_texture_object *stobj =
>> st_texture_object_const(texobj);
>> + const struct pipe_sampler_view *sv = NULL;
>> +
>> + /* Just search for the first used view. We can do this because
>> the
>> +swizzle is per-texture, not per context. */
>> + /* XXX: clean that up to not use the sampler view at all */
>> + for (unsigned i = 0; i < stobj->num_sampler_views; ++i) {
>> +if (stobj->sampler_views[i]) {
>> +   sv = stobj->sampler_views[i];
>> +   break;
>> +}
>> + }
>>   - util_format_apply_color_swizzle(>border_color,
>> - _color, swz, is_integer);
>> + if (sv) {
>> +union pipe_color_union tmp;
>> +const unsigned char swz[4] =
>> +{
>> +   sv->swizzle_r,
>> +   sv->swizzle_g,
>> +   sv->swizzle_b,
>> +   sv->swizzle_a,
>> +};
>> +
>> +st_translate_color(>BorderColor, ,
>> +   texBaseFormat, is_integer);
>> +
>> +util_format_apply_color_swizzle(>border_color,
>> +, swz, is_integer);
>> + } else {
>> +st_translate_color(>BorderColor,
>> +   >border_color,
>> +   texBaseFormat, is_integer);
>> + }
>> } else {
>>st_translate_color(>BorderColor,
>>   >border_color,
>>   texBaseFormat, is_integer);
>> }
>>  }
>>sampler->max_anisotropy = (msamp->MaxAnisotropy == 1.0 ?
>> 0 : (GLuint) msamp->MaxAnisotropy);
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: gen4_blorp_exec.h to the sources list

2017-06-14 Thread Juan A. Suarez Romero
On Wed, 2017-06-14 at 17:02 +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> We tend to use the sources, as opposed to EXTRA_DIST to include the
> headers.
> 

Reviewed-by: Juan A. Suarez Romero 

> Cc: Juan A. Suarez Romero 
> Signed-off-by: Emil Velikov 
> ---
>  src/mesa/drivers/dri/i965/Makefile.am  | 1 -
>  src/mesa/drivers/dri/i965/Makefile.sources | 1 +
>  2 files changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.am 
> b/src/mesa/drivers/dri/i965/Makefile.am
> index 9879bf79a57..c29cc594c8e 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.am
> +++ b/src/mesa/drivers/dri/i965/Makefile.am
> @@ -102,7 +102,6 @@ BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
>  CLEANFILES = $(BUILT_SOURCES)
>  
>  EXTRA_DIST = \
> - gen4_blorp_exec.h \
>   brw_oa_hsw.xml \
>   brw_oa.py
>  
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
> b/src/mesa/drivers/dri/i965/Makefile.sources
> index 1e656eb65a4..041cd079884 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -67,6 +67,7 @@ i965_FILES = \
>   brw_wm.h \
>   brw_wm_state.c \
>   brw_wm_surface_state.c \
> + gen4_blorp_exec.h \
>   gen6_clip_state.c \
>   gen6_constant_state.c \
>   gen6_depth_state.c \
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >