date:20170625

[Mesa-dev] [PATCH 2/4] mesa: create copyteximage_err() helper and always inline copyteximage()

2017-06-25 Thread Timothy Arceri

This will be useful in the following patch when we add KHR_no_error
support.
---
 src/mesa/main/teximage.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 796c8ad..29a795f 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3803,7 +3803,7 @@ copy_texture_sub_image_no_error(struct gl_context *ctx, 
GLuint dims,
 /**
  * Implement the glCopyTexImage1/2D() functions.
  */
-static void
+static ALWAYS_INLINE void
 copyteximage(struct gl_context *ctx, GLuint dims,
  GLenum target, GLint level, GLenum internalFormat,
  GLint x, GLint y, GLsizei width, GLsizei height, GLint border )
@@ -3955,6 +3955,15 @@ copyteximage(struct gl_context *ctx, GLuint dims,
 }
 
 
+static void
+copyteximage_err(struct gl_context *ctx, GLuint dims, GLenum target,
+ GLint level, GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLsizei height, GLint border)
+{
+   copyteximage(ctx, dims, target, level, internalFormat, x, y, width, height,
+border);
+}
+
 
 void GLAPIENTRY
 _mesa_CopyTexImage1D( GLenum target, GLint level,
@@ -3963,7 +3972,8 @@ _mesa_CopyTexImage1D( GLenum target, GLint level,
   GLsizei width, GLint border )
 {
GET_CURRENT_CONTEXT(ctx);
-   copyteximage(ctx, 1, target, level, internalFormat, x, y, width, 1, border);
+   copyteximage_err(ctx, 1, target, level, internalFormat, x, y, width, 1,
+border);
 }
 
 
@@ -3974,8 +3984,8 @@ _mesa_CopyTexImage2D( GLenum target, GLint level, GLenum 
internalFormat,
   GLint border )
 {
GET_CURRENT_CONTEXT(ctx);
-   copyteximage(ctx, 2, target, level, internalFormat,
-x, y, width, height, border);
+   copyteximage_err(ctx, 2, target, level, internalFormat,
+x, y, width, height, border);
 }
 
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] mesa: tidy up copyteximage()

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/teximage.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index a9086a2..796c8ad 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3808,11 +3808,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
  GLenum target, GLint level, GLenum internalFormat,
  GLint x, GLint y, GLsizei width, GLsizei height, GLint border )
 {
-   struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
-   const GLuint face = _mesa_tex_target_to_face(target);
-   mesa_format texFormat;
-   struct gl_renderbuffer *rb;
 
FLUSH_VERTICES(ctx, 0);
 
@@ -3837,11 +3833,12 @@ copyteximage(struct gl_context *ctx, GLuint dims,
   return;
}
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
+   struct gl_texture_object *texObj = _mesa_get_current_tex_object(ctx, 
target);
assert(texObj);
 
-   texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
-   internalFormat, GL_NONE, GL_NONE);
+   mesa_format texFormat =
+  _mesa_choose_texture_format(ctx, texObj, target, level, internalFormat,
+  GL_NONE, GL_NONE);
 
/* First check if reallocating the texture buffer can be avoided.
 * Without the realloc the copy can be 20x faster.
@@ -3861,9 +3858,10 @@ copyteximage(struct gl_context *ctx, GLuint dims,
_mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_LOW, "glCopyTexImage "
 "can't avoid reallocating texture storage\n");
 
-   rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
-
if (_mesa_is_gles3(ctx)) {
+  struct gl_renderbuffer *rb =
+ _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
+
   if (_mesa_is_enum_format_unsized(internalFormat)) {
   /* Conversion from GL_RGB10_A2 source buffer format is not allowed in
* OpenGL ES 3.0. Khronos bug# 9807.
@@ -3923,6 +3921,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
   }
   else {
  GLint srcX = x, srcY = y, dstX = 0, dstY = 0, dstZ = 0;
+ const GLuint face = _mesa_tex_target_to_face(target);
 
  /* Free old texture image */
  ctx->Driver.FreeTextureImageBuffer(ctx, texImage);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] mesa: add no error support to copyteximage()

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/teximage.c | 43 ++-
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 29a795f..a08f34b 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3806,7 +3806,8 @@ copy_texture_sub_image_no_error(struct gl_context *ctx, 
GLuint dims,
 static ALWAYS_INLINE void
 copyteximage(struct gl_context *ctx, GLuint dims,
  GLenum target, GLint level, GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height, GLint border )
+ GLint x, GLint y, GLsizei width, GLsizei height, GLint border,
+ bool no_error)
 {
struct gl_texture_image *texImage;
 
@@ -3822,15 +3823,17 @@ copyteximage(struct gl_context *ctx, GLuint dims,
if (ctx->NewState & NEW_COPY_TEX_STATE)
   _mesa_update_state(ctx);
 
-   if (copytexture_error_check(ctx, dims, target, level, internalFormat,
-   width, height, border))
-  return;
+   if (!no_error) {
+  if (copytexture_error_check(ctx, dims, target, level, internalFormat,
+  width, height, border))
+ return;
 
-   if (!_mesa_legal_texture_dimensions(ctx, target, level, width, height,
-   1, border)) {
-  _mesa_error(ctx, GL_INVALID_VALUE,
-  "glCopyTexImage%uD(invalid width or height)", dims);
-  return;
+  if (!_mesa_legal_texture_dimensions(ctx, target, level, width, height,
+  1, border)) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glCopyTexImage%uD(invalid width or height)", dims);
+ return;
+  }
}
 
struct gl_texture_object *texObj = _mesa_get_current_tex_object(ctx, 
target);
@@ -3849,8 +3852,13 @@ copyteximage(struct gl_context *ctx, GLuint dims,
   if (texImage && can_avoid_reallocation(texImage, internalFormat, 
texFormat,
  x, y, width, height, border)) {
  _mesa_unlock_texture(ctx, texObj);
- copy_texture_sub_image_err(ctx, dims, texObj, target, level, 0, 0, 0,
-x, y, width, height,"CopyTexImage");
+ if (no_error) {
+copy_texture_sub_image_no_error(ctx, dims, texObj, target, level, 
0,
+0, 0, x, y, width, height);
+ } else {
+copy_texture_sub_image_err(ctx, dims, texObj, target, level, 0, 0,
+   0, x, y, width, height,"CopyTexImage");
+ }
  return;
   }
}
@@ -3858,7 +3866,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
_mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_LOW, "glCopyTexImage "
 "can't avoid reallocating texture storage\n");
 
-   if (_mesa_is_gles3(ctx)) {
+   if (!no_error && _mesa_is_gles3(ctx)) {
   struct gl_renderbuffer *rb =
  _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
 
@@ -3961,7 +3969,16 @@ copyteximage_err(struct gl_context *ctx, GLuint dims, 
GLenum target,
  GLsizei width, GLsizei height, GLint border)
 {
copyteximage(ctx, dims, target, level, internalFormat, x, y, width, height,
-border);
+border, false);
+}
+
+static void
+copyteximage_no_error(struct gl_context *ctx, GLuint dims, GLenum target,
+  GLint level, GLenum internalFormat, GLint x, GLint y,
+  GLsizei width, GLsizei height, GLint border)
+{
+   copyteximage(ctx, dims, target, level, internalFormat, x, y, width, height,
+border, true);
 }
 
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] mesa: add KHR_no_error support for glCopyTexImage*D()

2017-06-25 Thread Timothy Arceri

---
 src/mapi/glapi/gen/gl_API.xml |  4 ++--
 src/mesa/main/teximage.c  | 20 
 src/mesa/main/teximage.h  | 11 +++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 9857e39..d878a04 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -3254,7 +3254,7 @@
 
 
 
-
+
 
 
 
@@ -3265,7 +3265,7 @@
 
 
 
-
+
 
 
 
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index a08f34b..0cacb89 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -4006,6 +4006,26 @@ _mesa_CopyTexImage2D( GLenum target, GLint level, GLenum 
internalFormat,
 }
 
 
+void GLAPIENTRY
+_mesa_CopyTexImage1D_no_error(GLenum target, GLint level, GLenum 
internalFormat,
+  GLint x, GLint y, GLsizei width, GLint border)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   copyteximage_no_error(ctx, 1, target, level, internalFormat, x, y, width, 1,
+ border);
+}
+
+
+void GLAPIENTRY
+_mesa_CopyTexImage2D_no_error(GLenum target, GLint level, GLenum 
internalFormat,
+  GLint x, GLint y, GLsizei width, GLsizei height,
+  GLint border)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   copyteximage_no_error(ctx, 2, target, level, internalFormat,
+ x, y, width, height, border);
+}
+
 
 void GLAPIENTRY
 _mesa_CopyTexSubImage1D( GLenum target, GLint level,
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index c2fd451..04ddbb9 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -352,6 +352,17 @@ _mesa_CopyTexImage2D( GLenum target, GLint level,
 
 
 extern void GLAPIENTRY
+_mesa_CopyTexImage1D_no_error(GLenum target, GLint level, GLenum 
internalformat,
+  GLint x, GLint y, GLsizei width, GLint border);
+
+
+extern void GLAPIENTRY
+_mesa_CopyTexImage2D_no_error(GLenum target, GLint level, GLenum 
internalformat,
+  GLint x, GLint y, GLsizei width, GLsizei height,
+  GLint border );
+
+
+extern void GLAPIENTRY
 _mesa_CopyTexSubImage1D( GLenum target, GLint level, GLint xoffset,
  GLint x, GLint y, GLsizei width );
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 22/31] mesa: don't flag _NEW_TRANSFORM for st/mesa if possible

2017-06-25 Thread Michel Dänzer

On 24/06/17 09:50 AM, Marek Olšák wrote:
> Thanks. It can only be reproduced with process isolation enabled in
> piglit, which is not something I test.

I'm running piglit with process isolation disabled as well. Maybe this
issue depended on the order in which this test was run relative to other
tests in the same group.

-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] mesa: remove redundant NULL check

2017-06-25 Thread Timothy Arceri

This can never be NULL in any of the entry paths.
---
 src/mesa/main/teximage.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 9d39759..a039747 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2504,6 +2504,8 @@ copytexsubimage_error_check(struct gl_context *ctx, 
GLuint dimensions,
 GLint xoffset, GLint yoffset, GLint zoffset,
 GLint width, GLint height, const char *caller)
 {
+   assert(texObj);
+
struct gl_texture_image *texImage;
 
/* Check that the source buffer is complete */
@@ -2530,12 +2532,6 @@ copytexsubimage_error_check(struct gl_context *ctx, 
GLuint dimensions,
   return GL_TRUE;
}
 
-   /* Get dest image pointers */
-   if (!texObj) {
-  _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s()", caller);
-  return GL_TRUE;
-   }
-
texImage = _mesa_select_tex_image(texObj, target, level);
if (!texImage) {
   /* destination image does not exist */
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/5] mesa: make _mesa_copy_texture_sub_image() static

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/teximage.c | 175 +++
 src/mesa/main/teximage.h |   9 ---
 2 files changed, 85 insertions(+), 99 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 128e010..88c76f0 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3708,6 +3708,73 @@ can_avoid_reallocation(const struct gl_texture_image 
*texImage,
return true;
 }
 
+
+/**
+ * Implementation for glCopyTex(ture)SubImage1/2/3D() functions.
+ */
+static void
+copy_texture_sub_image(struct gl_context *ctx, GLuint dims,
+   struct gl_texture_object *texObj,
+   GLenum target, GLint level,
+   GLint xoffset, GLint yoffset, GLint zoffset,
+   GLint x, GLint y, GLsizei width, GLsizei height,
+   const char *caller)
+{
+   struct gl_texture_image *texImage;
+
+   FLUSH_VERTICES(ctx, 0);
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+  _mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller,
+  _mesa_enum_to_string(target),
+  level, xoffset, yoffset, zoffset, x, y, width, height);
+
+   if (ctx->NewState & NEW_COPY_TEX_STATE)
+  _mesa_update_state(ctx);
+
+   if (copytexsubimage_error_check(ctx, dims, texObj, target, level,
+   xoffset, yoffset, zoffset,
+   width, height, caller)) {
+  return;
+   }
+
+   _mesa_lock_texture(ctx, texObj);
+
+   texImage = _mesa_select_tex_image(texObj, target, level);
+
+   /* If we have a border, offset=-1 is legal.  Bias by border width. */
+   switch (dims) {
+   case 3:
+  if (target != GL_TEXTURE_2D_ARRAY)
+ zoffset += texImage->Border;
+  /* fall-through */
+   case 2:
+  if (target != GL_TEXTURE_1D_ARRAY)
+ yoffset += texImage->Border;
+  /* fall-through */
+   case 1:
+  xoffset += texImage->Border;
+   }
+
+   if (_mesa_clip_copytexsubimage(ctx, &xoffset, &yoffset, &x, &y,
+  &width, &height)) {
+  struct gl_renderbuffer *srcRb =
+ get_copy_tex_image_source(ctx, texImage->TexFormat);
+
+  copytexsubimage_by_slice(ctx, texImage, dims, xoffset, yoffset, zoffset,
+   srcRb, x, y, width, height);
+
+  check_gen_mipmap(ctx, target, texObj, level);
+
+  /* NOTE: Don't signal _NEW_TEXTURE_OBJECT since we've only changed
+   * the texel data, not the texture format, size, etc.
+   */
+   }
+
+   _mesa_unlock_texture(ctx, texObj);
+}
+
+
 /**
  * Implement the glCopyTexImage1/2D() functions.
  */
@@ -3760,9 +3827,8 @@ copyteximage(struct gl_context *ctx, GLuint dims,
   if (texImage && can_avoid_reallocation(texImage, internalFormat, 
texFormat,
  x, y, width, height, border)) {
  _mesa_unlock_texture(ctx, texObj);
- _mesa_copy_texture_sub_image(ctx, dims, texObj, target, level,
-  0, 0, 0, x, y, width, height,
-  "CopyTexImage");
+ copy_texture_sub_image(ctx, dims, texObj, target, level, 0, 0, 0, x, 
y,
+width, height,"CopyTexImage");
  return;
   }
}
@@ -3888,72 +3954,7 @@ _mesa_CopyTexImage2D( GLenum target, GLint level, GLenum 
internalFormat,
 x, y, width, height, border);
 }
 
-/**
- * Implementation for glCopyTex(ture)SubImage1/2/3D() functions.
- */
-void
-_mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims,
- struct gl_texture_object *texObj,
- GLenum target, GLint level,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- const char *caller)
-{
-   struct gl_texture_image *texImage;
-
-   FLUSH_VERTICES(ctx, 0);
-
-   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-  _mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller,
-  _mesa_enum_to_string(target),
-  level, xoffset, yoffset, zoffset, x, y, width, height);
-
-   if (ctx->NewState & NEW_COPY_TEX_STATE)
-  _mesa_update_state(ctx);
-
-   if (copytexsubimage_error_check(ctx, dims, texObj, target, level,
-   xoffset, yoffset, zoffset,
-   width, height, caller)) {
-  return;
-   }
-
-   _mesa_lock_texture(ctx, texObj);
-   {
-  texImage = _mesa_select_tex_image(texObj, target, level);
 
-  /* If we have a border, offset=-1 is legal.  Bias by border width. */
-  switch (dims) {
-  case 3:
- if (target != GL_TEXTURE_2D_ARRAY)
-zoffset += texImage->Border;
- /* fall-through */
-  case 2:
- if (target != GL_TEXTURE

[Mesa-dev] [PATCH 2/5] mesa: create copy_texture_sub_image_err() helper

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/teximage.c | 84 +++-
 1 file changed, 48 insertions(+), 36 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 88c76f0..9d39759 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3717,27 +3717,10 @@ copy_texture_sub_image(struct gl_context *ctx, GLuint 
dims,
struct gl_texture_object *texObj,
GLenum target, GLint level,
GLint xoffset, GLint yoffset, GLint zoffset,
-   GLint x, GLint y, GLsizei width, GLsizei height,
-   const char *caller)
+   GLint x, GLint y, GLsizei width, GLsizei height)
 {
struct gl_texture_image *texImage;
 
-   FLUSH_VERTICES(ctx, 0);
-
-   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-  _mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller,
-  _mesa_enum_to_string(target),
-  level, xoffset, yoffset, zoffset, x, y, width, height);
-
-   if (ctx->NewState & NEW_COPY_TEX_STATE)
-  _mesa_update_state(ctx);
-
-   if (copytexsubimage_error_check(ctx, dims, texObj, target, level,
-   xoffset, yoffset, zoffset,
-   width, height, caller)) {
-  return;
-   }
-
_mesa_lock_texture(ctx, texObj);
 
texImage = _mesa_select_tex_image(texObj, target, level);
@@ -3775,6 +3758,35 @@ copy_texture_sub_image(struct gl_context *ctx, GLuint 
dims,
 }
 
 
+static void
+copy_texture_sub_image_err(struct gl_context *ctx, GLuint dims,
+   struct gl_texture_object *texObj,
+   GLenum target, GLint level,
+   GLint xoffset, GLint yoffset, GLint zoffset,
+   GLint x, GLint y, GLsizei width, GLsizei height,
+   const char *caller)
+{
+   FLUSH_VERTICES(ctx, 0);
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+  _mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller,
+  _mesa_enum_to_string(target),
+  level, xoffset, yoffset, zoffset, x, y, width, height);
+
+   if (ctx->NewState & NEW_COPY_TEX_STATE)
+  _mesa_update_state(ctx);
+
+   if (copytexsubimage_error_check(ctx, dims, texObj, target, level,
+   xoffset, yoffset, zoffset,
+   width, height, caller)) {
+  return;
+   }
+
+   copy_texture_sub_image(ctx, dims, texObj, target, level, xoffset, yoffset,
+  zoffset, x, y, width, height);
+}
+
+
 /**
  * Implement the glCopyTexImage1/2D() functions.
  */
@@ -3827,8 +3839,8 @@ copyteximage(struct gl_context *ctx, GLuint dims,
   if (texImage && can_avoid_reallocation(texImage, internalFormat, 
texFormat,
  x, y, width, height, border)) {
  _mesa_unlock_texture(ctx, texObj);
- copy_texture_sub_image(ctx, dims, texObj, target, level, 0, 0, 0, x, 
y,
-width, height,"CopyTexImage");
+ copy_texture_sub_image_err(ctx, dims, texObj, target, level, 0, 0, 0,
+x, y, width, height,"CopyTexImage");
  return;
   }
}
@@ -3977,8 +3989,8 @@ _mesa_CopyTexSubImage1D( GLenum target, GLint level,
if (!texObj)
   return;
 
-   copy_texture_sub_image(ctx, 1, texObj, target, level, xoffset, 0, 0,
-  x, y, width, 1, self);
+   copy_texture_sub_image_err(ctx, 1, texObj, target, level, xoffset, 0, 0,
+  x, y, width, 1, self);
 }
 
 void GLAPIENTRY
@@ -4003,8 +4015,8 @@ _mesa_CopyTexSubImage2D( GLenum target, GLint level,
if (!texObj)
   return;
 
-   copy_texture_sub_image(ctx, 2, texObj, target, level, xoffset, yoffset, 0,
-  x, y, width, height, self);
+   copy_texture_sub_image_err(ctx, 2, texObj, target, level, xoffset, yoffset,
+  0, x, y, width, height, self);
 }
 
 
@@ -4031,8 +4043,8 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level,
if (!texObj)
   return;
 
-   copy_texture_sub_image(ctx, 3, texObj, target, level, xoffset, yoffset,
-  zoffset, x, y, width, height, self);
+   copy_texture_sub_image_err(ctx, 3, texObj, target, level, xoffset, yoffset,
+  zoffset, x, y, width, height, self);
 }
 
 void GLAPIENTRY
@@ -4054,8 +4066,8 @@ _mesa_CopyTextureSubImage1D(GLuint texture, GLint level,
   return;
}
 
-   copy_texture_sub_image(ctx, 1, texObj, texObj->Target, level, xoffset, 0, 0,
-  x, y, width, 1, self);
+   copy_texture_sub_image_err(ctx, 1, texObj, texObj->Target, level, xoffset, 
0,
+  0, x, y, width, 1, self);
 }
 
 void GLAPIENTRY
@@ -4078,8 +4090,8 @@ _mesa_CopyTextureSubImage2D(GLuint texture,

[Mesa-dev] [PATCH 4/5] mesa: add copy_texture_sub_image_no_error() helper

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/teximage.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index a039747..ac25985 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3783,6 +3783,23 @@ copy_texture_sub_image_err(struct gl_context *ctx, 
GLuint dims,
 }
 
 
+static void
+copy_texture_sub_image_no_error(struct gl_context *ctx, GLuint dims,
+struct gl_texture_object *texObj,
+GLenum target, GLint level,
+GLint xoffset, GLint yoffset, GLint zoffset,
+GLint x, GLint y, GLsizei width, GLsizei 
height)
+{
+   FLUSH_VERTICES(ctx, 0);
+
+   if (ctx->NewState & NEW_COPY_TEX_STATE)
+  _mesa_update_state(ctx);
+
+   copy_texture_sub_image(ctx, dims, texObj, target, level, xoffset, yoffset,
+  zoffset, x, y, width, height);
+}
+
+
 /**
  * Implement the glCopyTexImage1/2D() functions.
  */
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/5] mesa: add KHR_no_error support for glCopyTex{ture}SubImage*D()

2017-06-25 Thread Timothy Arceri

---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  6 +-
 src/mapi/glapi/gen/gl_API.xml  |  6 +-
 src/mesa/main/teximage.c   | 81 ++
 src/mesa/main/teximage.h   | 28 +
 4 files changed, 115 insertions(+), 6 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index d3d2246..c9031c1 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -446,7 +446,7 @@
   

 
-   
+   
   
   
   
@@ -455,7 +455,7 @@
   

 
-   
+   
   
   
   
@@ -466,7 +466,7 @@
   

 
-   
+   
   
   
   
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 550af08..9857e39 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -3277,7 +3277,7 @@
 
 
 
-
+
 
 
 
@@ -3287,7 +3287,7 @@
 
 
 
-
+
 
 
 
@@ -4041,7 +4041,7 @@
 
 
 
-
+
 
 
 
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index ac25985..a9086a2 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -4141,6 +4141,87 @@ _mesa_CopyTextureSubImage3D(GLuint texture, GLint level,
  yoffset, zoffset, x, y, width, height, self);
 }
 
+
+void GLAPIENTRY
+_mesa_CopyTexSubImage1D_no_error(GLenum target, GLint level, GLint xoffset,
+ GLint x, GLint y, GLsizei width)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct gl_texture_object* texObj = _mesa_get_current_tex_object(ctx, 
target);
+   copy_texture_sub_image_no_error(ctx, 1, texObj, target, level, xoffset, 0, 
0,
+   x, y, width, 1);
+}
+
+void GLAPIENTRY
+_mesa_CopyTexSubImage2D_no_error(GLenum target, GLint level, GLint xoffset,
+ GLint yoffset, GLint x, GLint y, GLsizei 
width,
+ GLsizei height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct gl_texture_object* texObj = _mesa_get_current_tex_object(ctx, 
target);
+   copy_texture_sub_image_no_error(ctx, 2, texObj, target, level, xoffset,
+   yoffset, 0, x, y, width, height);
+}
+
+void GLAPIENTRY
+_mesa_CopyTexSubImage3D_no_error(GLenum target, GLint level, GLint xoffset,
+ GLint yoffset, GLint zoffset, GLint x, GLint 
y,
+ GLsizei width, GLsizei height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct gl_texture_object* texObj = _mesa_get_current_tex_object(ctx, 
target);
+   copy_texture_sub_image_no_error(ctx, 3, texObj, target, level, xoffset,
+   yoffset, zoffset, x, y, width, height);
+}
+
+void GLAPIENTRY
+_mesa_CopyTextureSubImage1D_no_error(GLuint texture, GLint level, GLint 
xoffset,
+ GLint x, GLint y, GLsizei width)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct gl_texture_object* texObj = _mesa_lookup_texture(ctx, texture);
+   copy_texture_sub_image_no_error(ctx, 1, texObj, texObj->Target, level,
+   xoffset, 0, 0, x, y, width, 1);
+}
+
+void GLAPIENTRY
+_mesa_CopyTextureSubImage2D_no_error(GLuint texture, GLint level, GLint 
xoffset,
+ GLint yoffset, GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct gl_texture_object* texObj = _mesa_lookup_texture(ctx, texture);
+   copy_texture_sub_image_no_error(ctx, 2, texObj, texObj->Target, level,
+   xoffset, yoffset, 0, x, y, width, height);
+}
+
+void GLAPIENTRY
+_mesa_CopyTextureSubImage3D_no_error(GLuint texture, GLint level, GLint 
xoffset,
+ GLint yoffset, GLint zoffset, GLint x,
+ GLint y, GLsizei width, GLsizei height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct gl_texture_object* texObj = _mesa_lookup_texture(ctx, texture);
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+  /* Act like CopyTexSubImage2D */
+  copy_texture_sub_image_no_error(ctx, 2, texObj,
+  GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset,
+  level, xoffset, yoffset, 0, x, y, width,
+  height);
+   }
+   else
+  copy_texture_sub_image_no_error(ctx, 3, texObj, texObj->Target, level,
+  xoffset, yoffset, zoffset, x, y, width,
+  height);
+}
+
+
 static bool
 check_clear_tex_image(struct gl_context *ctx,
   const char *function,
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
i

[Mesa-dev] [PATCH 1/3] radv/clear: add r32g32b32a32 fast clear support

2017-06-25 Thread Dave Airlie

From: Dave Airlie 

We can only fast clear 128-bit images if the r/g/b channels
are the same, and we are using DCC.

For DCC we'll bail out on translate if this isn't true,
and we catch cmask clears explicitly.

Signed-off-by: Dave Airlie 
---
 src/amd/vulkan/radv_formats.c| 7 +++
 src/amd/vulkan/radv_meta_clear.c | 5 +
 2 files changed, 12 insertions(+)

diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index b13adb9..0ca88ef 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -969,6 +969,13 @@ bool radv_format_pack_clear_color(VkFormat format,
clear_vals[0] = fui(value->float32[0]);
clear_vals[1] = fui(value->float32[1]);
break;
+   case VK_FORMAT_R32G32B32A32_SFLOAT:
+   if (value->float32[0] != value->float32[1] ||
+   value->float32[0] != value->float32[2])
+   return false;
+   clear_vals[0] = fui(value->float32[0]);
+   clear_vals[1] = fui(value->float32[3]);
+   break;
case VK_FORMAT_R32_SFLOAT:
clear_vals[1] = 0;
clear_vals[0] = fui(value->float32[0]);
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index 0648513..002e126 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -938,6 +938,11 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 iview->image->offset + 
iview->image->dcc_offset,
 iview->image->surface.dcc_size, 0x20202020);
} else {
+
+   if (iview->image->surface.bpe > 8) {
+   /* 128 bit formats not supported */
+   return false;
+   }
radv_fill_buffer(cmd_buffer, iview->image->bo,
 iview->image->offset + 
iview->image->cmask.offset,
 iview->image->cmask.size, 0);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] [rfc] radv: predicate cmask eliminate when using DCC.

2017-06-25 Thread Dave Airlie

From: Dave Airlie 

When using DCC some clear values don't require a cmask eliminate
step. This patch adds support for black and black with alpha 1,
there are other values, but I don't have access to a comprehensive list.

This works by setting the cmask eliminate predicate when doing the
fast clear, and later when doing the cmask elimination making sure
the draws are predicated.

This increases the fps on Sascha Willems deferred.

Tonga: 580fps->670fps on a Tonga PRO card.
Polaris 730->850fps

Signed-off-by: Dave Airlie 
---
 src/amd/vulkan/radv_cmd_buffer.c  | 29 
 src/amd/vulkan/radv_image.c   | 13 --
 src/amd/vulkan/radv_meta_clear.c  | 87 ++-
 src/amd/vulkan/radv_meta_fast_clear.c | 22 +
 src/amd/vulkan/radv_private.h |  4 ++
 5 files changed, 150 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index a257812..dd83fd0 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1117,6 +1117,35 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer 
*cmd_buffer,
radeon_emit(cmd_buffer->cs, 0);
 }
 
+/*
+ *with DCC some colors don't require CMASK elimiation before being
+ * used as a texture. This sets a predicate value to determine if the
+ * cmask eliminate is required.
+ */
+void
+radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ bool value)
+{
+   uint64_t pred_val = value;
+   uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+   va += image->offset + image->dcc_pred_offset;
+
+   if (!image->surface.dcc_size)
+   return;
+
+   cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
+
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+   radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+   S_370_WR_CONFIRM(1) |
+   S_370_ENGINE_SEL(V_370_PFP));
+   radeon_emit(cmd_buffer->cs, va);
+   radeon_emit(cmd_buffer->cs, va >> 32);
+   radeon_emit(cmd_buffer->cs, pred_val);
+   radeon_emit(cmd_buffer->cs, pred_val >> 32);
+}
+
 void
 radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
  struct radv_image *image,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 147ebed..43112d0 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -705,12 +705,16 @@ static void
 radv_image_alloc_cmask(struct radv_device *device,
   struct radv_image *image)
 {
+   uint32_t clear_value_size = 0;
radv_image_get_cmask_info(device, image, &image->cmask);
 
image->cmask.offset = align64(image->size, image->cmask.alignment);
/* + 8 for storing the clear values */
-   image->clear_value_offset = image->cmask.offset + image->cmask.size;
-   image->size = image->cmask.offset + image->cmask.size + 8;
+   if (!image->clear_value_offset) {
+   image->clear_value_offset = image->cmask.offset + 
image->cmask.size;
+   clear_value_size = 8;
+   }
+   image->size = image->cmask.offset + image->cmask.size + 
clear_value_size;
image->alignment = MAX2(image->alignment, image->cmask.alignment);
 }
 
@@ -719,9 +723,10 @@ radv_image_alloc_dcc(struct radv_device *device,
   struct radv_image *image)
 {
image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
-   /* + 8 for storing the clear values */
+   /* + 16 for storing the clear values + dcc pred */
image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
-   image->size = image->dcc_offset + image->surface.dcc_size + 8;
+   image->dcc_pred_offset = image->clear_value_offset + 8;
+   image->size = image->dcc_offset + image->surface.dcc_size + 16;
image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
 }
 
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index 002e126..c12a893 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -858,6 +858,83 @@ fail:
return res;
 }
 
+static void vi_get_fast_clear_parameters(VkFormat format,
+const VkClearColorValue *clear_value,
+uint32_t* reset_value,
+bool *can_avoid_fast_clear_elim)
+{
+   bool values[4] = {};
+   int extra_channel;
+   bool main_value = false;
+   bool extra_value = false;
+   int i;
+   *can_avoid_fast_clear_elim = false;
+
+   *reset_value = 0x20202020U;
+
+   const struct vk_format_description *desc = 
vk_format_description(format);
+   if (format == VK_FORMAT_

[Mesa-dev] [PATCH 2/3] radv: add support for cmd predication.

2017-06-25 Thread Dave Airlie

From: Dave Airlie 

This doesn't get used yet, it just adds support to various PKT3
emissions to enable it later.

Signed-off-by: Dave Airlie 
---
 src/amd/vulkan/radv_cmd_buffer.c | 11 ---
 src/amd/vulkan/radv_device.c |  3 ++
 src/amd/vulkan/radv_private.h|  5 +++
 src/amd/vulkan/radv_query.c  |  7 +++--
 src/amd/vulkan/si_cmd_buffer.c   | 68 ++--
 5 files changed, 64 insertions(+), 30 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 105384d..a257812 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1159,7 +1159,7 @@ radv_load_color_clear_regs(struct radv_cmd_buffer 
*cmd_buffer,
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + idx * 0x3c;
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
 
-   radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 
cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
COPY_DATA_COUNT_SEL);
@@ -1168,7 +1168,7 @@ radv_load_color_clear_regs(struct radv_cmd_buffer 
*cmd_buffer,
radeon_emit(cmd_buffer->cs, reg >> 2);
radeon_emit(cmd_buffer->cs, 0);
 
-   radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 
cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, 0);
 }
 
@@ -2639,10 +2639,10 @@ void radv_CmdDraw(
if (cmd_buffer->state.pipeline->graphics.vtx_emit_num == 3)
radeon_emit(cmd_buffer->cs, 0);
 
-   radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 
cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, instanceCount);
 
-   radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, 0));
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, 
cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, vertexCount);
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
S_0287F0_USE_OPAQUE(0));
@@ -3294,6 +3294,7 @@ static void write_event(struct radv_cmd_buffer 
*cmd_buffer,
 * the stage mask. */
 
si_cs_emit_write_event_eop(cs,
+  cmd_buffer->state.predicating,
   
cmd_buffer->device->physical_device->rad_info.chip_class,
   false,
   EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
@@ -3345,7 +3346,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer,
 
MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws, cs, 7);
 
-   si_emit_wait_fence(cs, va, 1, 0x);
+   si_emit_wait_fence(cs, false, va, 1, 0x);
assert(cmd_buffer->cs->cdw <= cdw_max);
}
 
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 427d357..abbdfdd 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1095,6 +1095,7 @@ VkResult radv_CreateDevice(
case RADV_QUEUE_GENERAL:
case RADV_QUEUE_COMPUTE:
si_cs_emit_cache_flush(device->flush_cs[family],
+  false,
   
device->physical_device->rad_info.chip_class,
   NULL, 0,
   family == RADV_QUEUE_COMPUTE && 
device->physical_device->rad_info.chip_class >= CIK,
@@ -,6 +1112,7 @@ VkResult radv_CreateDevice(
case RADV_QUEUE_GENERAL:
case RADV_QUEUE_COMPUTE:
si_cs_emit_cache_flush(device->flush_shader_cs[family],
+  false,
   
device->physical_device->rad_info.chip_class,
   NULL, 0,
   family == RADV_QUEUE_COMPUTE && 
device->physical_device->rad_info.chip_class >= CIK,
@@ -1761,6 +1763,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
 
if (!i) {
si_cs_emit_cache_flush(cs,
+  false,
   
queue->device->physical_device->rad_info.chip_class,
   NULL, 0,
   queue->queue_family_index == 
RING_COMPUTE &&
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ac89fc1..a167409 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/

Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2017-06-25 Thread Peter Frühberger

Hi all,

just as information:
https://github.com/FernetMenta/kodi-agile/commit/ca8119b4e11a52415125af959f220b280f56ecae
Rainer moved the specific parts of the buffer sharing into a separate
infrastructure see the VAAPIEGL.cpp and VAAPIEGL.h in the above patch.

This basically encapsulates the fourcc_code('R', '8', ' ', ' '); specific
to intel / mesa and makes VAAPI.cpp - the decoder - more generic.

That means an AMD implementation of above interface can now happen easily
by implementing Init / Map / Unmap.

Have a nice weekend
Peter

2017-03-20 17:00 GMT+01:00 Marek Olšák :

> On Sun, Mar 19, 2017 at 2:49 PM, Christian König
>  wrote:
> > Hi Peter,
> >
> > Adding Michel and Marek for the Mesa interop side and Harry for the
> display
> > side.
> >
> > How do you want us to display the decoded surfaces?
> >
> > Well to make a long story short: I don't have the slightest idea.
> Ideally we
> > would of the same handling as Intel so that you guys don't have anything
> > vendor dependent in your code.
> >
> > The first step would be to get the VA-API DRM extension to work with
> EGL. So
> > that Kodi is able to export the YUV surfaces and import parts of them as
> > separate R8/R16 or R8G8/R16G16 surfaces, right?
> >
> > What EGL/GL extension do you guys use to import the surfaces? Marek is
> that
> > stuff fully supported, e.g. do we also handle the offsets correctly? I've
> > added the backend code for this while doing VDPAU interop, but the EGL/GL
> > frontend code needs to handle it gracefully as well.
>
> Mesa/EGL imports an FD with an offset, but it always exports an FD
> with offset=0 (the driver offset is ignored). It also always returns
> num_planes = 1 on export, is that bad?
>
> Marek
>



-- 
   Key-ID: 0x1A995A9B
   keyserver: pgp.mit.edu
==
Fingerprint: 4606 DA19 EC2E 9A0B 0157  C81B DA07 CF63 1A99 5A9B
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: don't flush and wait for CB after depth-only rendering

2017-06-25 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 0f39ede..b236bed 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2528,22 +2528,25 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
 * Wait for compute shaders because of possible transitions:
 * - FB write -> shader read
 * - shader write -> FB read
 *
 * DB caches are flushed on demand (using si_decompress_textures).
 *
 * When MSAA is enabled, CB and TC caches are flushed on demand
 * (after FMASK decompression). Shader write -> FB read transitions
 * cannot happen for MSAA textures, because MSAA shader images are
 * not supported.
+*
+* Only flush and wait for CB if there is actually a bound color buffer.
 */
-   if (sctx->framebuffer.nr_samples <= 1) {
+   if (sctx->framebuffer.nr_samples <= 1 &&
+   sctx->framebuffer.state.nr_cbufs) {
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
 SI_CONTEXT_INV_GLOBAL_L2 |
 SI_CONTEXT_FLUSH_AND_INV_CB;
}
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
/* u_blitter doesn't invoke depth decompression when it does multiple
 * blits in a row, but the only case when it matters for DB is when
 * doing generate_mipmap. So here we flush DB manually between
 * individual generate_mipmap blits.
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] mesa: optimize GL_PRIMITIVE_RESTART_NV more

2017-06-25 Thread Marek Olšák

From: Marek Olšák 

And other client state changes don't have to call
update_derived_primitive_restart_state.
---
 src/mesa/main/enable.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 18a288d..2e5fb00 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -59,21 +59,20 @@ update_derived_primitive_restart_state(struct gl_context 
*ctx)
 
 /**
  * Helper to enable/disable client-side state.
  */
 static void
 client_state(struct gl_context *ctx, GLenum cap, GLboolean state)
 {
struct gl_vertex_array_object *vao = ctx->Array.VAO;
GLbitfield64 flag;
GLboolean *var;
-   uint64_t new_state = _NEW_ARRAY;
 
switch (cap) {
   case GL_VERTEX_ARRAY:
  var = &vao->VertexAttrib[VERT_ATTRIB_POS].Enabled;
  flag = VERT_BIT_POS;
  break;
   case GL_NORMAL_ARRAY:
  var = &vao->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled;
  flag = VERT_BIT_NORMAL;
  break;
@@ -104,41 +103,41 @@ client_state(struct gl_context *ctx, GLenum cap, 
GLboolean state)
 
   case GL_POINT_SIZE_ARRAY_OES:
  var = &vao->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled;
  flag = VERT_BIT_POINT_SIZE;
  FLUSH_VERTICES(ctx, _NEW_PROGRAM);
  ctx->VertexProgram.PointSizeEnabled = state;
  break;
 
   /* GL_NV_primitive_restart */
   case GL_PRIMITIVE_RESTART_NV:
- if (!ctx->Extensions.NV_primitive_restart) {
+ if (!ctx->Extensions.NV_primitive_restart)
 goto invalid_enum_error;
- }
- var = &ctx->Array.PrimitiveRestart;
- flag = 0;
- new_state = 0; /* primitive restart is not a vertex array state */
- break;
+ if (ctx->Array.PrimitiveRestart == state)
+return;
+
+ FLUSH_VERTICES(ctx, 0);
+ ctx->Array.PrimitiveRestart = state;
+ update_derived_primitive_restart_state(ctx);
+ return;
 
   default:
  goto invalid_enum_error;
}
 
if (*var == state)
   return;
 
-   FLUSH_VERTICES(ctx, new_state);
+   FLUSH_VERTICES(ctx, _NEW_ARRAY);
 
*var = state;
 
-   update_derived_primitive_restart_state(ctx);
-
if (state)
   vao->_Enabled |= flag;
else
   vao->_Enabled &= ~flag;
 
vao->NewArrays |= flag;
 
if (ctx->Driver.Enable) {
   ctx->Driver.Enable( ctx, cap, state );
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] mesa: fix clip plane enable breakage

2017-06-25 Thread Marek Olšák

From: Marek Olšák 

Broken by:

commit 00173d91b70ae4dcea7c6324ee4858c498cae14b
Author: Marek Olšák 
Date:   Sat Jun 10 12:09:43 2017 +0200

mesa: don't flag _NEW_TRANSFORM for st/mesa if possible

It also optimizes the case slightly for GL core.

It doesn't try to fix that glEnable might be a bad place to do the
clip plane transformation.
---
 src/mesa/main/enable.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index e1293f9..18a288d 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -354,22 +354,27 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
GLboolean state)
 if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES ||
 !ctx->DriverFlags.NewClipPlaneEnable) {
FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
 } else {
FLUSH_VERTICES(ctx, 0);
 }
 ctx->NewDriverState |= ctx->DriverFlags.NewClipPlaneEnable;
 
 if (state) {
ctx->Transform.ClipPlanesEnabled |= (1 << p);
-   if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES)
+
+   /* The projection matrix transforms the clip plane. */
+   /* TODO: glEnable might not be the best place to do it. */
+   if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) {
   _mesa_update_clip_plane(ctx, p);
+  ctx->NewDriverState |= ctx->DriverFlags.NewClipPlane;
+   }
 }
 else {
ctx->Transform.ClipPlanesEnabled &= ~(1 << p);
 }
  }
  break;
   case GL_COLOR_MATERIAL:
  if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
 goto invalid_enum_error;
  if (ctx->Light.ColorMaterialEnabled == state)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] mesa: add KHR_no_error support for gl{Compressed}TexImage*D()

2017-06-25 Thread Timothy Arceri

---
 src/mapi/glapi/gen/gl_API.xml | 14 
 src/mesa/main/teximage.c  | 82 +++
 src/mesa/main/teximage.h  | 34 ++
 3 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 2cc66ac..550af08 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -2149,7 +2149,7 @@
 
 
 
-
+
 
 
 
@@ -2161,7 +2161,7 @@
 
 
 
-
+
 
 
 
@@ -4011,7 +4011,7 @@
 
 
 
-
+
 
 
 
@@ -4507,7 +4507,8 @@
 
 
 
-
+
 
 
 
@@ -4520,7 +4521,8 @@
 
 
 
-
+
 
 
 
@@ -4532,7 +4534,7 @@
 
 
 
-
+
 
 
 
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 4ff7d33..128e010 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3090,6 +3090,18 @@ teximage_err(struct gl_context *ctx, GLboolean 
compressed, GLuint dims,
 }
 
 
+static void
+teximage_no_error(struct gl_context *ctx, GLboolean compressed, GLuint dims,
+  GLenum target, GLint level, GLint internalFormat,
+  GLsizei width, GLsizei height, GLsizei depth,
+  GLint border, GLenum format, GLenum type,
+  GLsizei imageSize, const GLvoid *pixels)
+{
+   teximage(ctx, compressed, dims, target, level, internalFormat, width, 
height,
+depth, border, format, type, imageSize, pixels, true);
+}
+
+
 /*
  * Called from the API.  Note that width includes the border.
  */
@@ -3144,6 +3156,40 @@ _mesa_TexImage3DEXT( GLenum target, GLint level, GLenum 
internalFormat,
 
 
 void GLAPIENTRY
+_mesa_TexImage1D_no_error(GLenum target, GLint level, GLint internalFormat,
+  GLsizei width, GLint border, GLenum format,
+  GLenum type, const GLvoid *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   teximage_no_error(ctx, GL_FALSE, 1, target, level, internalFormat, width, 1,
+ 1, border, format, type, 0, pixels);
+}
+
+
+void GLAPIENTRY
+_mesa_TexImage2D_no_error(GLenum target, GLint level, GLint internalFormat,
+  GLsizei width, GLsizei height, GLint border,
+  GLenum format, GLenum type, const GLvoid *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   teximage_no_error(ctx, GL_FALSE, 2, target, level, internalFormat, width,
+ height, 1, border, format, type, 0, pixels);
+}
+
+
+void GLAPIENTRY
+_mesa_TexImage3D_no_error(GLenum target, GLint level, GLint internalFormat,
+  GLsizei width, GLsizei height, GLsizei depth,
+  GLint border, GLenum format, GLenum type,
+  const GLvoid *pixels )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   teximage_no_error(ctx, GL_FALSE, 3, target, level, internalFormat,
+ width, height, depth, border, format, type, 0, pixels);
+}
+
+
+void GLAPIENTRY
 _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
 {
struct gl_texture_object *texObj;
@@ -4571,6 +4617,42 @@ _mesa_CompressedTexImage3D(GLenum target, GLint level,
 }
 
 
+void GLAPIENTRY
+_mesa_CompressedTexImage1D_no_error(GLenum target, GLint level,
+GLenum internalFormat, GLsizei width,
+GLint border, GLsizei imageSize,
+const GLvoid *data)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   teximage_no_error(ctx, GL_TRUE, 1, target, level, internalFormat, width, 1,
+ 1, border, GL_NONE, GL_NONE, imageSize, data);
+}
+
+
+void GLAPIENTRY
+_mesa_CompressedTexImage2D_no_error(GLenum target, GLint level,
+GLenum internalFormat, GLsizei width,
+GLsizei height, GLint border,
+GLsizei imageSize, const GLvoid *data)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   teximage_no_error(ctx, GL_TRUE, 2, target, level, internalFormat, width,
+ height, 1, border, GL_NONE, GL_NONE, imageSize, data);
+}
+
+
+void GLAPIENTRY
+_mesa_CompressedTexImage3D_no_error(GLenum target, GLint level,
+GLenum internalFormat, GLsizei width,
+GLsizei height, GLsizei depth, GLint 
border,
+GLsizei imageSize, const GLvoid *data)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   teximage_no_error(ctx, GL_TRUE, 3, target, level, internalFormat, width,
+ height, depth, border, GL_NONE, GL_NONE, imageSize, data);
+}
+
+
 /**
  * Common helper for glCompressedTexSubImage1/2/3D() and
  * glCompressedTextureSubImage1/2/3D().
diff --git a/src/mesa/main/teximage.h b/src/m

[Mesa-dev] [PATCH 2/3] mesa: add no error support to teximage()

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/teximage.c | 61 +---
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 4301070..4ff7d33 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2870,14 +2870,14 @@ teximage(struct gl_context *ctx, GLboolean compressed, 
GLuint dims,
  GLenum target, GLint level, GLint internalFormat,
  GLsizei width, GLsizei height, GLsizei depth,
  GLint border, GLenum format, GLenum type,
- GLsizei imageSize, const GLvoid *pixels)
+ GLsizei imageSize, const GLvoid *pixels, bool no_error)
 {
const char *func = compressed ? "glCompressedTexImage" : "glTexImage";
struct gl_pixelstore_attrib unpack_no_border;
const struct gl_pixelstore_attrib *unpack = &ctx->Unpack;
struct gl_texture_object *texObj;
mesa_format texFormat;
-   GLboolean dimensionsOK, sizeOK;
+   bool dimensionsOK = true, sizeOK = true;
 
FLUSH_VERTICES(ctx, 0);
 
@@ -2902,26 +2902,27 @@ teximage(struct gl_context *ctx, GLboolean compressed, 
GLuint dims,
 
internalFormat = override_internal_format(internalFormat, width, height);
 
-   /* target error checking */
-   if (!legal_teximage_target(ctx, dims, target)) {
-  _mesa_error(ctx, GL_INVALID_ENUM, "%s%uD(target=%s)",
-  func, dims, _mesa_enum_to_string(target));
-  return;
-   }
-
-   /* general error checking */
-   if (compressed) {
-  if (compressed_texture_error_check(ctx, dims, target, level,
- internalFormat,
- width, height, depth,
- border, imageSize, pixels))
- return;
-   }
-   else {
-  if (texture_error_check(ctx, dims, target, level, internalFormat,
-  format, type, width, height, depth, border,
-  pixels))
+   if (!no_error) {
+  /* target error checking */
+  if (!legal_teximage_target(ctx, dims, target)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s%uD(target=%s)",
+ func, dims, _mesa_enum_to_string(target));
  return;
+  }
+
+  /* general error checking */
+  if (compressed) {
+ if (compressed_texture_error_check(ctx, dims, target, level,
+internalFormat,
+width, height, depth,
+border, imageSize, pixels))
+return;
+  } else {
+ if (texture_error_check(ctx, dims, target, level, internalFormat,
+ format, type, width, height, depth, border,
+ pixels))
+return;
+  }
}
 
/* Here we convert a cpal compressed image into a regular glTexImage2D
@@ -2976,14 +2977,16 @@ teximage(struct gl_context *ctx, GLboolean compressed, 
GLuint dims,
 
assert(texFormat != MESA_FORMAT_NONE);
 
-   /* check that width, height, depth are legal for the mipmap level */
-   dimensionsOK = _mesa_legal_texture_dimensions(ctx, target, level, width,
- height, depth, border);
+   if (!no_error) {
+  /* check that width, height, depth are legal for the mipmap level */
+  dimensionsOK = _mesa_legal_texture_dimensions(ctx, target, level, width,
+height, depth, border);
 
-   /* check that the texture won't take too much memory, etc */
-   sizeOK = ctx->Driver.TestProxyTexImage(ctx, proxy_target(target),
-  0, level, texFormat, 1,
-  width, height, depth);
+  /* check that the texture won't take too much memory, etc */
+  sizeOK = ctx->Driver.TestProxyTexImage(ctx, proxy_target(target),
+ 0, level, texFormat, 1,
+ width, height, depth);
+   }
 
if (_mesa_is_proxy_texture(target)) {
   /* Proxy texture: just clear or set state depending on error checking */
@@ -3083,7 +3086,7 @@ teximage_err(struct gl_context *ctx, GLboolean 
compressed, GLuint dims,
  GLsizei imageSize, const GLvoid *pixels)
 {
teximage(ctx, compressed, dims, target, level, internalFormat, width, 
height,
-depth, border, format, type, imageSize, pixels);
+depth, border, format, type, imageSize, pixels, false);
 }
 
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] mesa: create wrapper around teximage()

2017-06-25 Thread Timothy Arceri

This is used to inline KHR_no_error logic without inlining
the function into all its callers.
---
 src/mesa/main/teximage.c | 40 ++--
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 69ad882..4301070 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2865,7 +2865,7 @@ strip_texture_border(GLenum target,
  * \param type  the user's image type (only used if !compressed)
  * \param imageSize  only used for glCompressedTexImage1D/2D/3D calls.
  */
-static void
+static ALWAYS_INLINE void
 teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims,
  GLenum target, GLint level, GLint internalFormat,
  GLsizei width, GLsizei height, GLsizei depth,
@@ -3072,6 +3072,19 @@ teximage(struct gl_context *ctx, GLboolean compressed, 
GLuint dims,
}
 }
 
+/* This is a wrapper around teximage() so that we can force the KHR_no_error
+ * logic to be inlined without inlining the function into all the callers.
+ */
+static void
+teximage_err(struct gl_context *ctx, GLboolean compressed, GLuint dims,
+ GLenum target, GLint level, GLint internalFormat,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLint border, GLenum format, GLenum type,
+ GLsizei imageSize, const GLvoid *pixels)
+{
+   teximage(ctx, compressed, dims, target, level, internalFormat, width, 
height,
+depth, border, format, type, imageSize, pixels);
+}
 
 
 /*
@@ -3083,8 +3096,8 @@ _mesa_TexImage1D( GLenum target, GLint level, GLint 
internalFormat,
   GLenum type, const GLvoid *pixels )
 {
GET_CURRENT_CONTEXT(ctx);
-   teximage(ctx, GL_FALSE, 1, target, level, internalFormat, width, 1, 1,
-border, format, type, 0, pixels);
+   teximage_err(ctx, GL_FALSE, 1, target, level, internalFormat, width, 1, 1,
+border, format, type, 0, pixels);
 }
 
 
@@ -3095,8 +3108,8 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint 
internalFormat,
   const GLvoid *pixels )
 {
GET_CURRENT_CONTEXT(ctx);
-   teximage(ctx, GL_FALSE, 2, target, level, internalFormat, width, height, 1,
-border, format, type, 0, pixels);
+   teximage_err(ctx, GL_FALSE, 2, target, level, internalFormat, width, 
height, 1,
+border, format, type, 0, pixels);
 }
 
 
@@ -3111,9 +3124,8 @@ _mesa_TexImage3D( GLenum target, GLint level, GLint 
internalFormat,
   const GLvoid *pixels )
 {
GET_CURRENT_CONTEXT(ctx);
-   teximage(ctx, GL_FALSE, 3, target, level, internalFormat,
-width, height, depth,
-border, format, type, 0, pixels);
+   teximage_err(ctx, GL_FALSE, 3, target, level, internalFormat,
+width, height, depth, border, format, type, 0, pixels);
 }
 
 
@@ -4527,8 +4539,8 @@ _mesa_CompressedTexImage1D(GLenum target, GLint level,
   const GLvoid *data)
 {
GET_CURRENT_CONTEXT(ctx);
-   teximage(ctx, GL_TRUE, 1, target, level, internalFormat,
-width, 1, 1, border, GL_NONE, GL_NONE, imageSize, data);
+   teximage_err(ctx, GL_TRUE, 1, target, level, internalFormat,
+width, 1, 1, border, GL_NONE, GL_NONE, imageSize, data);
 }
 
 
@@ -4539,8 +4551,8 @@ _mesa_CompressedTexImage2D(GLenum target, GLint level,
   const GLvoid *data)
 {
GET_CURRENT_CONTEXT(ctx);
-   teximage(ctx, GL_TRUE, 2, target, level, internalFormat,
-width, height, 1, border, GL_NONE, GL_NONE, imageSize, data);
+   teximage_err(ctx, GL_TRUE, 2, target, level, internalFormat,
+width, height, 1, border, GL_NONE, GL_NONE, imageSize, data);
 }
 
 
@@ -4551,8 +4563,8 @@ _mesa_CompressedTexImage3D(GLenum target, GLint level,
   GLsizei imageSize, const GLvoid *data)
 {
GET_CURRENT_CONTEXT(ctx);
-   teximage(ctx, GL_TRUE, 3, target, level, internalFormat,
-width, height, depth, border, GL_NONE, GL_NONE, imageSize, data);
+   teximage_err(ctx, GL_TRUE, 3, target, level, internalFormat, width, height,
+depth, border, GL_NONE, GL_NONE, imageSize, data);
 }
 
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [AppVeyor] mesa master #4738 failed

2017-06-25 Thread AppVeyor




Build mesa 4738 failed


Commit 4a34f3244a by Dave Airlie on 6/14/2017 10:46 PM:

radv/meta: don't need vertex info for resolve shader.\n\nReviewed-by: Bas Nieuwenhuizen \nSigned-off-by: Dave Airlie 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Jason Ekstrand


On June 25, 2017 3:47:09 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 6:24 PM, Nicolai Hähnle  wrote:

On 25.06.2017 23:42, Ilia Mirkin wrote:


On Sun, Jun 25, 2017 at 5:39 PM, Jason Ekstrand 
wrote:


On June 25, 2017 2:31:56 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 5:25 PM, Jason Ekstrand 
wrote:



On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand 
wrote:




Anyone have thoughts on this? Am I being foolish for trying to hold
onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)






Yes.  Chances are that you're also doing texture views wrong.  Are
there
any
 formats supported on nvidia hardware?





Well, any permutation of a  format is easily supportable for
texturing.

What's wrong with the texture views? Should be fine... unless I'm
missing something. It's really just this issue with RB <-> Texture
copies where the RB's internal format is GL_RGBA4 while the *real*
internal format is RGBA8. Texture views between e.g. RGB565 and
RGBA should work fine (and the fb will fail to validate as one
might expect if one were to attach a GL_RGBA4 texture view of a
GL_RGB565 texture).





But what if you have a  and you render to as  and then try to
texture from it as 565?




If you have a  texture and attach it to a FB, then that FB will
not be complete. So you can't render to it in the first place.




Ok, then I'm confused as to how you have a copy_image problem.  Is the
issue
just that gallium won't let you CopyImageSubData into a texture you can't
render into?  Why not just treat it as R16 when using it as a copy_image
destination?



No, the issue is copying between a GL_RGBA4 RB and GL_RGBA4 texture.
Behind the scenes, the GL_RGBA4 RB gets upgraded to a
8-bit-per-channel format, while the GL_RGBA4 texture has a
4-bit-per-channel format. Then when trying to copy-image between them,
you have two totally differently-formatted textures.


Oh, you have different formats for texture vs. renderbuffer.  That's 
awkward but I understand the problem now.



Bind the GL_RGBA4 destination texture as a GL_RG8UI or GL_R16UI surface and
render to it using a shader that does the conversion manually?


Right, so there are 2 directions here... RGBA4 -> RGBA8 I can do with
a plain blit. RGBA8 -> RGBA4 I need to do something custom, like you
said.


Sounds about right.


Do I also need to worry about component ordering? The RGBA8 thing is
guaranteed to be the renderbuffer side, which I think has a lot less
opportunity for weirdness -- most of the texture weirdness comes from
glTexImage format/type options.


I think you're probably ok there assuming the fallbacks work in a 
reasonable way.



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: fix unused variable warning in release builds

2017-06-25 Thread Timothy Arceri

---
 src/mesa/main/uniforms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 91c3bf6..1c3d89b 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -104,7 +104,7 @@ _mesa_update_shader_textures_used(struct gl_shader_program 
*shProg,
GLbitfield mask = prog->SamplersUsed;
gl_shader_stage prog_stage =
   _mesa_program_enum_to_shader_stage(prog->Target);
-   struct gl_linked_shader *shader = shProg->_LinkedShaders[prog_stage];
+   UNUSED struct gl_linked_shader *shader = shProg->_LinkedShaders[prog_stage];
GLuint s;
 
assert(shader);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/marshal: add custom marshalling for glNamedBuffer(Sub)Data

2017-06-25 Thread Timothy Arceri


On 25/06/17 18:31, Grigori Goronzy wrote:


On 2017-06-25 02:37, Timothy Arceri wrote:

Please try the series from Marek which reduces the batch size [1], the
reduced size helps reduce the impact of syncs. MARSHAL_MAX_CMD_SIZE is
also greatly reduced to help reduce thrashing the cache so its
possible this patch won't be as effective anymore. However you might
not even need it.



Sorry, I forgot to mention, the 30% improvement measured is with this 
patch on top of Marek's series compared to just Marek's series. That 
series alone is improving glthread with Alien Isolation as well, but I 
didn't measure exactly how much. It wouldn't surprise me if it is in 
the 40-50% region with both, though.


Ok, thanks for confirming. I've fixed the indentation issues, reworded 
the commit message and pushed. Thanks for the patch :)





Best regards
Grigori

[1] 
https://lists.freedesktop.org/archives/mesa-dev/2017-June/160329.html


On 25/06/17 02:59, Grigori Goronzy wrote:

These entry points are used by Alien Isolation and caused
synchronization with glthread. The async marshalling implementation
is similar to glBuffer(Sub)Data.

Results in an approximately 6x drop in glthread synchronizations and a
~30% FPS jump in Alien Isolation (Medium preset, Athlon 860K, RX 480).

This does not care about the EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD special
case like the Buffer(Sub)Data marshalling functions.
---
I'm not a fan of the code duplication and I'll try to address that in
further changes to glthread/marshalling, but the improvement is so
noticeable that I'd like to share it. Alien Isolation is now 
playable on

my system while it wasn't before.

  src/mapi/glapi/gen/ARB_direct_state_access.xml |   4 +-
  src/mesa/main/marshal.c| 108 
+

  src/mesa/main/marshal.h|  18 +
  3 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml

index cb24d79..d3d2246 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -61,14 +61,14 @@

 
  -   
+   




 
  -   
+   marshal="custom">




diff --git a/src/mesa/main/marshal.c b/src/mesa/main/marshal.c
index 4840f32..1fddf8e 100644
--- a/src/mesa/main/marshal.c
+++ b/src/mesa/main/marshal.c
@@ -408,6 +408,114 @@ _mesa_marshal_BufferSubData(GLenum target, 
GLintptr offset, GLsizeiptr size,

 }
  }
  +/* NamedBufferData: marshalled asynchronously */
+struct marshal_cmd_NamedBufferData
+{
+   struct marshal_cmd_base cmd_base;
+   GLuint name;
+   GLsizei size;
+   GLenum usage;
+   /* Next size bytes are GLubyte data[size] */
+};
+
+void
+_mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
+const struct 
marshal_cmd_NamedBufferData *cmd)

+{
+   const GLuint name = cmd->name;
+   const GLsizei size = cmd->size;
+   const GLenum usage = cmd->usage;
+   const void *data = (const void *) (cmd + 1);
+
+   CALL_NamedBufferData(ctx->CurrentServerDispatch,
+  (name, size, data, usage));
+}
+
+void GLAPIENTRY
+_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
+  const GLvoid * data, GLenum usage)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   size_t cmd_size = sizeof(struct marshal_cmd_NamedBufferData) + 
size;

+
+   debug_print_marshal("NamedBufferData");
+   if (unlikely(size < 0)) {
+  _mesa_glthread_finish(ctx);
+  _mesa_error(ctx, GL_INVALID_VALUE, "NamedBufferData(size < 0)");
+  return;
+   }
+
+   if (buffer > 0 && cmd_size <= MARSHAL_MAX_CMD_SIZE) {
+  struct marshal_cmd_NamedBufferData *cmd =
+ _mesa_glthread_allocate_command(ctx, 
DISPATCH_CMD_NamedBufferData,

+ cmd_size);
+  cmd->name = buffer;
+  cmd->size = size;
+  cmd->usage = usage;
+  char *variable_data = (char *) (cmd + 1);
+  memcpy(variable_data, data, size);
+  _mesa_post_marshal_hook(ctx);
+   } else {
+  _mesa_glthread_finish(ctx);
+  CALL_NamedBufferData(ctx->CurrentServerDispatch,
+ (buffer, size, data, usage));
+   }
+}
+
+/* NamedBufferSubData: marshalled asynchronously */
+struct marshal_cmd_NamedBufferSubData
+{
+   struct marshal_cmd_base cmd_base;
+   GLuint name;
+   GLintptr offset;
+   GLsizei size;
+   /* Next size bytes are GLubyte data[size] */
+};
+
+void
+_mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
+   const struct 
marshal_cmd_NamedBufferSubData *cmd)

+{
+   const GLuint name = cmd->name;
+   const GLintptr offset = cmd->offset;
+   const GLsizei size = cmd->size;
+   const void *data = (const void *) (cmd + 1);
+
+   CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
+  (name, offset, size, data));
+}
+
+void GLAPIENTRY
+_

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Ilia Mirkin

On Sun, Jun 25, 2017 at 6:24 PM, Nicolai Hähnle  wrote:
> On 25.06.2017 23:42, Ilia Mirkin wrote:
>>
>> On Sun, Jun 25, 2017 at 5:39 PM, Jason Ekstrand 
>> wrote:
>>>
>>> On June 25, 2017 2:31:56 PM Ilia Mirkin  wrote:
>>>
 On Sun, Jun 25, 2017 at 5:25 PM, Jason Ekstrand 
 wrote:
>
>
> On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:
>
>> On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand 
>> wrote:



 Anyone have thoughts on this? Am I being foolish for trying to hold
 onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)
>>>
>>>
>>>
>>>
>>>
>>> Yes.  Chances are that you're also doing texture views wrong.  Are
>>> there
>>> any
>>>  formats supported on nvidia hardware?
>>
>>
>>
>>
>> Well, any permutation of a  format is easily supportable for
>> texturing.
>>
>> What's wrong with the texture views? Should be fine... unless I'm
>> missing something. It's really just this issue with RB <-> Texture
>> copies where the RB's internal format is GL_RGBA4 while the *real*
>> internal format is RGBA8. Texture views between e.g. RGB565 and
>> RGBA should work fine (and the fb will fail to validate as one
>> might expect if one were to attach a GL_RGBA4 texture view of a
>> GL_RGB565 texture).
>
>
>
>
> But what if you have a  and you render to as  and then try to
> texture from it as 565?



 If you have a  texture and attach it to a FB, then that FB will
 not be complete. So you can't render to it in the first place.
>>>
>>>
>>>
>>> Ok, then I'm confused as to how you have a copy_image problem.  Is the
>>> issue
>>> just that gallium won't let you CopyImageSubData into a texture you can't
>>> render into?  Why not just treat it as R16 when using it as a copy_image
>>> destination?
>>
>>
>> No, the issue is copying between a GL_RGBA4 RB and GL_RGBA4 texture.
>> Behind the scenes, the GL_RGBA4 RB gets upgraded to a
>> 8-bit-per-channel format, while the GL_RGBA4 texture has a
>> 4-bit-per-channel format. Then when trying to copy-image between them,
>> you have two totally differently-formatted textures.
>
>
> Bind the GL_RGBA4 destination texture as a GL_RG8UI or GL_R16UI surface and
> render to it using a shader that does the conversion manually?

Right, so there are 2 directions here... RGBA4 -> RGBA8 I can do with
a plain blit. RGBA8 -> RGBA4 I need to do something custom, like you
said.

Do I also need to worry about component ordering? The RGBA8 thing is
guaranteed to be the renderbuffer side, which I think has a lot less
opportunity for weirdness -- most of the texture weirdness comes from
glTexImage format/type options.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] radv: Use correct image layout for blit based copies.

2017-06-25 Thread Dave Airlie

On 26 June 2017 at 06:55, Bas Nieuwenhuizen  wrote:
> On Sun, Jun 25, 2017 at 10:29 PM, Dave Airlie  wrote:
>> On 26 June 2017 at 06:19, Bas Nieuwenhuizen  wrote:
>>> v2: Don't pass layout to image view usage mask.
>>>
>>> Signed-off-by: Bas Nieuwenhuizen 
>>> Fixes: 0628580eff6 "radv: Specify semantics of HTILE layout helpers."
>>
>> Just wondering what this actually fixes, I've mostly left GENERAL in
>> use in meta as it seems like it would avoid transitions in meta paths,
>> which mostly seems like the correct thing to do, the app should be
>> transitioning things before/after the entrypoint that causes the meta
>> path to be hit.
>>
>> Though of course there are problems knowing exactly what to flush
>> before we know which meta path we want to use.
>
> Because HTILE is enabled for TRANSFER_DST_OPTIMAL, but not for
> GENERAL. Ignoring a compressed HTILE is not a good idea.
>
> So I think it makes sense to at least keep these as
> TRANSFER_DST_OPTIMAL to keep them in sync with whatever we decide the
> compression to be. This doesn't result in any transitions either, as
> the initial_layout/final_layout are also set to TRANSFER_DST_OPTIMAL.

Cool thanks for explaining!

Reviewed-by: Dave Airlie 

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: Remove unused args of radv_image_view_init.

2017-06-25 Thread Dave Airlie

On 26 June 2017 at 07:01, Bas Nieuwenhuizen  wrote:
> Signed-off-by: Bas Nieuwenhuizen 

Reviewed-by: Dave Airlie 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Nicolai Hähnle


On 25.06.2017 23:42, Ilia Mirkin wrote:

On Sun, Jun 25, 2017 at 5:39 PM, Jason Ekstrand  wrote:

On June 25, 2017 2:31:56 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 5:25 PM, Jason Ekstrand 
wrote:


On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand 
wrote:



Anyone have thoughts on this? Am I being foolish for trying to hold
onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)





Yes.  Chances are that you're also doing texture views wrong.  Are
there
any
 formats supported on nvidia hardware?




Well, any permutation of a  format is easily supportable for
texturing.

What's wrong with the texture views? Should be fine... unless I'm
missing something. It's really just this issue with RB <-> Texture
copies where the RB's internal format is GL_RGBA4 while the *real*
internal format is RGBA8. Texture views between e.g. RGB565 and
RGBA should work fine (and the fb will fail to validate as one
might expect if one were to attach a GL_RGBA4 texture view of a
GL_RGB565 texture).




But what if you have a  and you render to as  and then try to
texture from it as 565?



If you have a  texture and attach it to a FB, then that FB will
not be complete. So you can't render to it in the first place.



Ok, then I'm confused as to how you have a copy_image problem.  Is the issue
just that gallium won't let you CopyImageSubData into a texture you can't
render into?  Why not just treat it as R16 when using it as a copy_image
destination?


No, the issue is copying between a GL_RGBA4 RB and GL_RGBA4 texture.
Behind the scenes, the GL_RGBA4 RB gets upgraded to a
8-bit-per-channel format, while the GL_RGBA4 texture has a
4-bit-per-channel format. Then when trying to copy-image between them,
you have two totally differently-formatted textures.


Bind the GL_RGBA4 destination texture as a GL_RG8UI or GL_R16UI surface 
and render to it using a shader that does the conversion manually?


Cheers,
Nicolai




   -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Ilia Mirkin

On Sun, Jun 25, 2017 at 5:39 PM, Jason Ekstrand  wrote:
> On June 25, 2017 2:31:56 PM Ilia Mirkin  wrote:
>
>> On Sun, Jun 25, 2017 at 5:25 PM, Jason Ekstrand 
>> wrote:
>>>
>>> On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:
>>>
 On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand 
 wrote:
>>
>>
>> Anyone have thoughts on this? Am I being foolish for trying to hold
>> onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)
>
>
>
>
> Yes.  Chances are that you're also doing texture views wrong.  Are
> there
> any
>  formats supported on nvidia hardware?



 Well, any permutation of a  format is easily supportable for
 texturing.

 What's wrong with the texture views? Should be fine... unless I'm
 missing something. It's really just this issue with RB <-> Texture
 copies where the RB's internal format is GL_RGBA4 while the *real*
 internal format is RGBA8. Texture views between e.g. RGB565 and
 RGBA should work fine (and the fb will fail to validate as one
 might expect if one were to attach a GL_RGBA4 texture view of a
 GL_RGB565 texture).
>>>
>>>
>>>
>>> But what if you have a  and you render to as  and then try to
>>> texture from it as 565?
>>
>>
>> If you have a  texture and attach it to a FB, then that FB will
>> not be complete. So you can't render to it in the first place.
>
>
> Ok, then I'm confused as to how you have a copy_image problem.  Is the issue
> just that gallium won't let you CopyImageSubData into a texture you can't
> render into?  Why not just treat it as R16 when using it as a copy_image
> destination?

No, the issue is copying between a GL_RGBA4 RB and GL_RGBA4 texture.
Behind the scenes, the GL_RGBA4 RB gets upgraded to a
8-bit-per-channel format, while the GL_RGBA4 texture has a
4-bit-per-channel format. Then when trying to copy-image between them,
you have two totally differently-formatted textures.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Jason Ekstrand


On June 25, 2017 2:31:56 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 5:25 PM, Jason Ekstrand  wrote:

On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand 
wrote:


Anyone have thoughts on this? Am I being foolish for trying to hold
onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)




Yes.  Chances are that you're also doing texture views wrong.  Are there
any
 formats supported on nvidia hardware?



Well, any permutation of a  format is easily supportable for
texturing.

What's wrong with the texture views? Should be fine... unless I'm
missing something. It's really just this issue with RB <-> Texture
copies where the RB's internal format is GL_RGBA4 while the *real*
internal format is RGBA8. Texture views between e.g. RGB565 and
RGBA should work fine (and the fb will fail to validate as one
might expect if one were to attach a GL_RGBA4 texture view of a
GL_RGB565 texture).



But what if you have a  and you render to as  and then try to
texture from it as 565?


If you have a  texture and attach it to a FB, then that FB will
not be complete. So you can't render to it in the first place.


Ok, then I'm confused as to how you have a copy_image problem.  Is the 
issue just that gallium won't let you CopyImageSubData into a texture you 
can't render into?  Why not just treat it as R16 when using it as a 
copy_image destination?



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Ilia Mirkin

On Sun, Jun 25, 2017 at 5:25 PM, Jason Ekstrand  wrote:
> On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:
>
>> On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand 
>> wrote:

 Anyone have thoughts on this? Am I being foolish for trying to hold
 onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)
>>>
>>>
>>>
>>> Yes.  Chances are that you're also doing texture views wrong.  Are there
>>> any
>>>  formats supported on nvidia hardware?
>>
>>
>> Well, any permutation of a  format is easily supportable for
>> texturing.
>>
>> What's wrong with the texture views? Should be fine... unless I'm
>> missing something. It's really just this issue with RB <-> Texture
>> copies where the RB's internal format is GL_RGBA4 while the *real*
>> internal format is RGBA8. Texture views between e.g. RGB565 and
>> RGBA should work fine (and the fb will fail to validate as one
>> might expect if one were to attach a GL_RGBA4 texture view of a
>> GL_RGB565 texture).
>
>
> But what if you have a  and you render to as  and then try to
> texture from it as 565?

If you have a  texture and attach it to a FB, then that FB will
not be complete. So you can't render to it in the first place.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Jason Ekstrand


On June 25, 2017 1:40:09 PM Ilia Mirkin  wrote:


On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand  wrote:

Anyone have thoughts on this? Am I being foolish for trying to hold
onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)



Yes.  Chances are that you're also doing texture views wrong.  Are there any
 formats supported on nvidia hardware?


Well, any permutation of a  format is easily supportable for texturing.

What's wrong with the texture views? Should be fine... unless I'm
missing something. It's really just this issue with RB <-> Texture
copies where the RB's internal format is GL_RGBA4 while the *real*
internal format is RGBA8. Texture views between e.g. RGB565 and
RGBA should work fine (and the fb will fail to validate as one
might expect if one were to attach a GL_RGBA4 texture view of a
GL_RGB565 texture).


But what if you have a  and you render to as  and then try to 
texture from it as 565?



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: Remove unused args of radv_image_view_init.

2017-06-25 Thread Bas Nieuwenhuizen

Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/vulkan/radv_image.c   |  6 ++
 src/amd/vulkan/radv_meta_blit.c   | 12 ++--
 src/amd/vulkan/radv_meta_blit2d.c | 16 +++-
 src/amd/vulkan/radv_meta_bufimage.c   | 13 ++---
 src/amd/vulkan/radv_meta_clear.c  |  3 +--
 src/amd/vulkan/radv_meta_decompress.c |  3 +--
 src/amd/vulkan/radv_meta_fast_clear.c |  3 +--
 src/amd/vulkan/radv_meta_resolve.c|  6 ++
 src/amd/vulkan/radv_meta_resolve_cs.c |  6 ++
 src/amd/vulkan/radv_meta_resolve_fs.c |  6 ++
 src/amd/vulkan/radv_private.h |  4 +---
 11 files changed, 23 insertions(+), 55 deletions(-)

diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 91c7e5ff79f..147ebed7341 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -837,9 +837,7 @@ radv_image_create(VkDevice _device,
 void
 radv_image_view_init(struct radv_image_view *iview,
 struct radv_device *device,
-const VkImageViewCreateInfo* pCreateInfo,
-struct radv_cmd_buffer *cmd_buffer,
-VkImageUsageFlags usage_mask)
+const VkImageViewCreateInfo* pCreateInfo)
 {
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
@@ -1013,7 +1011,7 @@ radv_CreateImageView(VkDevice _device,
if (view == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   radv_image_view_init(view, device, pCreateInfo, NULL, ~0);
+   radv_image_view_init(view, device, pCreateInfo);
 
*pView = radv_image_view_to_handle(view);
 
diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c
index f63fa6795c0..89ff82ec68d 100644
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -526,8 +526,7 @@ void radv_CmdBlitImage(
 .baseArrayLayer = 
src_res->baseArrayLayer,
 .layerCount = 1
 },
-},
-cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
+});
 
unsigned dst_start, dst_end;
if (dest_image->type == VK_IMAGE_TYPE_3D) {
@@ -575,12 +574,6 @@ void radv_CmdBlitImage(
dest_box.extent.height = abs(dst_y1 - dst_y0);
 
struct radv_image_view dest_iview;
-   unsigned usage;
-   if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
-   usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-   else
-   usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
-
const unsigned num_layers = dst_end - dst_start;
for (unsigned i = 0; i < num_layers; i++) {
const VkOffset3D dest_offset_0 = {
@@ -620,8 +613,7 @@ void radv_CmdBlitImage(
 .baseArrayLayer = 
dest_array_slice,
 .layerCount = 1
 },
-},
-cmd_buffer, usage);
+});
meta_emit_blit(cmd_buffer,
   src_image, &src_iview,
   src_offset_0, src_offset_1,
diff --git a/src/amd/vulkan/radv_meta_blit2d.c 
b/src/amd/vulkan/radv_meta_blit2d.c
index 7d12d6cb559..fb14cfbcdda 100644
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -53,7 +53,6 @@ enum blit2d_src_type {
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
  struct radv_meta_blit2d_surf *surf,
- VkImageUsageFlags usage,
  struct radv_image_view *iview, VkFormat depth_format)
 {
VkFormat format;
@@ -76,7 +75,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
 .baseArrayLayer = surf->layer,
 .layerCount = 1
 },
-}, cmd_buffer, usage);
+});
 }
 
 static void
@@ -139,8 +138,7 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
  VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
  &src_buf->pitch);
} else {
-   create_iview(cmd_buffer, src_img, VK_IMAGE_USAGE_SAMPLED_BIT, 
&tmp->iview,
-depth_format);
+   create_iview(cmd_buffer, src_img, &tmp->iview, depth_format);
 
radv_meta_p

Re: [Mesa-dev] [PATCH v2] radv: Use correct image layout for blit based copies.

2017-06-25 Thread Bas Nieuwenhuizen

On Sun, Jun 25, 2017 at 10:29 PM, Dave Airlie  wrote:
> On 26 June 2017 at 06:19, Bas Nieuwenhuizen  wrote:
>> v2: Don't pass layout to image view usage mask.
>>
>> Signed-off-by: Bas Nieuwenhuizen 
>> Fixes: 0628580eff6 "radv: Specify semantics of HTILE layout helpers."
>
> Just wondering what this actually fixes, I've mostly left GENERAL in
> use in meta as it seems like it would avoid transitions in meta paths,
> which mostly seems like the correct thing to do, the app should be
> transitioning things before/after the entrypoint that causes the meta
> path to be hit.
>
> Though of course there are problems knowing exactly what to flush
> before we know which meta path we want to use.

Because HTILE is enabled for TRANSFER_DST_OPTIMAL, but not for
GENERAL. Ignoring a compressed HTILE is not a good idea.

So I think it makes sense to at least keep these as
TRANSFER_DST_OPTIMAL to keep them in sync with whatever we decide the
compression to be. This doesn't result in any transitions either, as
the initial_layout/final_layout are also set to TRANSFER_DST_OPTIMAL.
>
> Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Ilia Mirkin

On Sun, Jun 25, 2017 at 4:33 PM, Jason Ekstrand  wrote:
>> Anyone have thoughts on this? Am I being foolish for trying to hold
>> onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)
>
>
> Yes.  Chances are that you're also doing texture views wrong.  Are there any
>  formats supported on nvidia hardware?

Well, any permutation of a  format is easily supportable for texturing.

What's wrong with the texture views? Should be fine... unless I'm
missing something. It's really just this issue with RB <-> Texture
copies where the RB's internal format is GL_RGBA4 while the *real*
internal format is RGBA8. Texture views between e.g. RGB565 and
RGBA should work fine (and the fb will fail to validate as one
might expect if one were to attach a GL_RGBA4 texture view of a
GL_RGB565 texture).

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Jason Ekstrand


On June 25, 2017 1:21:33 PM Ilia Mirkin  wrote:


I was hoping I could solicit some ideas from the group.

As you're likely aware, CopyImageSubData supports copying between a
texture and a renderbuffer that were created with sized formats.

NVIDIA hardware supports texturing from BGRA4 but not rendering to it.
So when a renderbuffer is created, st/mesa automatically chooses BGRA8
as the TexFormat, while maintaining the "official" internal format of
GL_RGBA4. However a texture created with that internal format would
get a BGRA4 TexFormat (and then fail to render to if attached to a
framebuffer).

The issue I'm running into is when someone tries to do a
CopyImageSubData between two such items -- while the GL-level internal
format matches, which means that the copy must be supported, the
"real" internal format differs by more than just component order.
(There's a CTS test for this, probably dEQP too.)

So, assuming I don't want to drop BGRA4 texturing support from
nouveau, what are my options? I basically have to do a blit between
the two, while preserving component order. (I can't see any other
way.) st/mesa would be the natural place to have something like this.
However handling the full set of possibilities is pretty daunting. I'm
thinking that it should be easier to deal with a reduced set of
possibilities that one is likely to run into in real life.

Anyone have thoughts on this? Am I being foolish for trying to hold
onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)


Yes.  Chances are that you're also doing texture views wrong.  Are there 
any  formats supported on nvidia hardware?  One thing we do in Vulkan 
is to fake certain formats with others that have the same bit layout but a 
different channel order and then just swizzle behind the client's back.


--Jason


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] radv: Use correct image layout for blit based copies.

2017-06-25 Thread Dave Airlie

On 26 June 2017 at 06:19, Bas Nieuwenhuizen  wrote:
> v2: Don't pass layout to image view usage mask.
>
> Signed-off-by: Bas Nieuwenhuizen 
> Fixes: 0628580eff6 "radv: Specify semantics of HTILE layout helpers."

Just wondering what this actually fixes, I've mostly left GENERAL in
use in meta as it seems like it would avoid transitions in meta paths,
which mostly seems like the correct thing to do, the app should be
transitioning things before/after the entrypoint that causes the meta
path to be hit.

Though of course there are problems knowing exactly what to flush
before we know which meta path we want to use.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Help! CopyImageSubData is defeating me

2017-06-25 Thread Ilia Mirkin

I was hoping I could solicit some ideas from the group.

As you're likely aware, CopyImageSubData supports copying between a
texture and a renderbuffer that were created with sized formats.

NVIDIA hardware supports texturing from BGRA4 but not rendering to it.
So when a renderbuffer is created, st/mesa automatically chooses BGRA8
as the TexFormat, while maintaining the "official" internal format of
GL_RGBA4. However a texture created with that internal format would
get a BGRA4 TexFormat (and then fail to render to if attached to a
framebuffer).

The issue I'm running into is when someone tries to do a
CopyImageSubData between two such items -- while the GL-level internal
format matches, which means that the copy must be supported, the
"real" internal format differs by more than just component order.
(There's a CTS test for this, probably dEQP too.)

So, assuming I don't want to drop BGRA4 texturing support from
nouveau, what are my options? I basically have to do a blit between
the two, while preserving component order. (I can't see any other
way.) st/mesa would be the natural place to have something like this.
However handling the full set of possibilities is pretty daunting. I'm
thinking that it should be easier to deal with a reduced set of
possibilities that one is likely to run into in real life.

Anyone have thoughts on this? Am I being foolish for trying to hold
onto GL_RGBA4 texturing? (Any decision here also affects st/nine.)

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2] radv: Use correct image layout for blit based copies.

2017-06-25 Thread Bas Nieuwenhuizen

v2: Don't pass layout to image view usage mask.

Signed-off-by: Bas Nieuwenhuizen 
Fixes: 0628580eff6 "radv: Specify semantics of HTILE layout helpers."
---
 src/amd/vulkan/radv_meta_blit2d.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_blit2d.c 
b/src/amd/vulkan/radv_meta_blit2d.c
index aae35d2a79e..7d12d6cb559 100644
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -713,8 +713,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
   .format = format,
   .loadOp = 
VK_ATTACHMENT_LOAD_OP_LOAD,
   .storeOp = 
VK_ATTACHMENT_STORE_OP_STORE,
-  .initialLayout = 
VK_IMAGE_LAYOUT_GENERAL,
-  .finalLayout = 
VK_IMAGE_LAYOUT_GENERAL,
+  .initialLayout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+  .finalLayout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
   },
   .subpassCount = 1,
   .pSubpasses = 
&(VkSubpassDescription) {
@@ -723,12 +723,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
   .colorAttachmentCount = 
1,
   .pColorAttachments = 
&(VkAttachmentReference) {
   .attachment = 0,
-  .layout = 
VK_IMAGE_LAYOUT_GENERAL,
+  .layout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
},
   .pResolveAttachments = NULL,
   .pDepthStencilAttachment = 
&(VkAttachmentReference) {
   .attachment = 
VK_ATTACHMENT_UNUSED,
-  .layout = 
VK_IMAGE_LAYOUT_GENERAL,
+  .layout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
   },
   .preserveAttachmentCount = 1,
   .pPreserveAttachments = 
(uint32_t[]) { 0 },
@@ -871,8 +871,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
   .format = 0,
   .loadOp = 
VK_ATTACHMENT_LOAD_OP_LOAD,
   .storeOp = 
VK_ATTACHMENT_STORE_OP_STORE,
-  .initialLayout = 
VK_IMAGE_LAYOUT_GENERAL,
-  .finalLayout = 
VK_IMAGE_LAYOUT_GENERAL,
+  .initialLayout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+  .finalLayout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
   },
   .subpassCount = 1,
   .pSubpasses = 
&(VkSubpassDescription) {
@@ -883,7 +883,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
   .pResolveAttachments = 
NULL,
   .pDepthStencilAttachment 
= &(VkAttachmentReference) {
   .attachment = 0,
-  .layout = 
VK_IMAGE_LAYOUT_GENERAL,
+  .layout = 
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
   },
   .preserveAttachmentCount 
= 1,
   .pPreserveAttachments = 
(uint32_t[]) { 0 },
@@ -1026,8 +1026,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device 
*device,
   .format = 0,
   .loadOp = 
VK_ATTACHMENT_LOAD_OP_LOAD,
   .storeOp = 
VK_ATTACHMENT_STORE_OP_STORE,
-

Re: [Mesa-dev] [PATCH] radv: Use correct image layout for blit based copies.

2017-06-25 Thread Bas Nieuwenhuizen

On Sun, Jun 25, 2017 at 9:42 PM, Dave Airlie  wrote:
> On 25 June 2017 at 04:58, Bas Nieuwenhuizen  wrote:
>> Signed-off-by: Bas Nieuwenhuizen 
>> Fixes: 0628580eff6 "radv: Specify semantics of HTILE layout helpers."
>> ---
>>  src/amd/vulkan/radv_meta_blit2d.c | 29 +++--
>>  1 file changed, 11 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_meta_blit2d.c 
>> b/src/amd/vulkan/radv_meta_blit2d.c
>> index aae35d2a79e..80bcf370d41 100644
>> --- a/src/amd/vulkan/radv_meta_blit2d.c
>> +++ b/src/amd/vulkan/radv_meta_blit2d.c
>> @@ -179,14 +179,7 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
>> VkFormat depth_format,
>>  struct blit2d_dst_temps *tmp)
>>  {
>> -   VkImageUsageFlagBits bits;
>> -
>> -   if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT)
>> -   bits = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
>> -   else
>> -   bits = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
>> -
>> -   create_iview(cmd_buffer, dst, bits,
>> +   create_iview(cmd_buffer, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>>  &tmp->iview, depth_format);
>
> were we mixing up enums here before or after this? bits is usage, you
> changed it to
> pass LAYOUT.

oops, you're right, it was usage, not layouts. Will revert this chunk
then. btw radv_image_view_init doesn't use the arg, so will send a
follow up patch to remove it, since apps won't be passing it in
anyways.
>
> Dave.
>
>>
>> radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
>> @@ -713,8 +706,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
>>.format = format,
>>.loadOp = 
>> VK_ATTACHMENT_LOAD_OP_LOAD,
>>.storeOp = 
>> VK_ATTACHMENT_STORE_OP_STORE,
>> -  .initialLayout = 
>> VK_IMAGE_LAYOUT_GENERAL,
>> -  .finalLayout = 
>> VK_IMAGE_LAYOUT_GENERAL,
>> +  .initialLayout = 
>> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>> +  .finalLayout = 
>> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>>},
>>.subpassCount = 1,
>>.pSubpasses = 
>> &(VkSubpassDescription) {
>> @@ -723,12 +716,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
>>.colorAttachmentCount 
>> = 1,
>>.pColorAttachments = 
>> &(VkAttachmentReference) {
>>.attachment = 
>> 0,
>> -  .layout = 
>> VK_IMAGE_LAYOUT_GENERAL,
>> +  .layout = 
>> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>> },
>>.pResolveAttachments = NULL,
>>.pDepthStencilAttachment = 
>> &(VkAttachmentReference) {
>>.attachment = 
>> VK_ATTACHMENT_UNUSED,
>> -  .layout = 
>> VK_IMAGE_LAYOUT_GENERAL,
>> +  .layout = 
>> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>>},
>>.preserveAttachmentCount = 1,
>>.pPreserveAttachments = 
>> (uint32_t[]) { 0 },
>> @@ -871,8 +864,8 @@ blit2d_init_depth_only_pipeline(struct radv_device 
>> *device,
>>.format = 0,
>>.loadOp = 
>> VK_ATTACHMENT_LOAD_OP_LOAD,
>>.storeOp = 
>> VK_ATTACHMENT_STORE_OP_STORE,
>> -  
>> .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
>> -  .finalLayout 
>> = VK_IMAGE_LAYOUT_GENERAL,
>> +  
>> .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>> +  .finalLayout 
>> = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>>},
>>.subpassCount = 1,
>>

Re: [Mesa-dev] [PATCH] blorp: Use normalized coordinates on Gen6

2017-06-25 Thread Eduardo Lima Mitev

Looks good to me.

Reviewed-by: Eduardo Lima Mitev 

On 06/23/2017 10:51 PM, Ian Romanick wrote:
> From: Ian Romanick 
> 
> Apparently, the sampler has some sort of precision issues for
> non-normalized texture coordinates with linear filtering.  This caused
> some small precision issues in scaled blits.  Work around this by using
> normalized coordinates.  There is some extra work necessary because Gen6
> uses TEX (instead of TXF) for some multisample resolve blits.
> 
> Fixes piglit.spec.arb_framebuffer_object.fbo-blit-stretch on SNB.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68365
> Signed-off-by: Ian Romanick 
> ---
>  src/intel/blorp/blorp_blit.c  | 11 +++
>  src/intel/blorp/blorp_genX_exec.h |  2 +-
>  2 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
> index f552302..0850473 100644
> --- a/src/intel/blorp/blorp_blit.c
> +++ b/src/intel/blorp/blorp_blit.c
> @@ -1195,9 +1195,10 @@ brw_blorp_build_nir_shader(struct blorp_context 
> *blorp, void *mem_ctx,
>* representing the four samples that maxe up a pixel.  So we need
>* to multiply our X and Y coordinates each by 2 and then add 1.
>*/
> - src_pos = nir_ishl(&b, src_pos, nir_imm_int(&b, 1));
> - src_pos = nir_iadd(&b, src_pos, nir_imm_int(&b, 1));
> - src_pos = nir_i2f32(&b, src_pos);
> + assert(key->src_coords_normalized);
> + src_pos = nir_fadd(&b,
> +nir_i2f32(&b, src_pos),
> +nir_imm_float(&b, 0.5f));
>   color = blorp_nir_tex(&b, &v, key, src_pos);
>} else {
>   /* Gen7+ hardware doesn't automaticaly blend. */
> @@ -1825,7 +1826,9 @@ try_blorp_blit(struct blorp_batch *batch,
>  
> params->num_samples = params->dst.surf.samples;
>  
> -   if (wm_prog_key->bilinear_filter && batch->blorp->isl_dev->info->gen < 6) 
> {
> +   if ((wm_prog_key->bilinear_filter ||
> +(wm_prog_key->blend && !wm_prog_key->blit_scaled)) &&
> +   batch->blorp->isl_dev->info->gen <= 6) {
>/* Gen4-5 don't support non-normalized texture coordinates */
>wm_prog_key->src_coords_normalized = true;
>params->wm_inputs.src_inv_size[0] =
> diff --git a/src/intel/blorp/blorp_genX_exec.h 
> b/src/intel/blorp/blorp_genX_exec.h
> index 1d77877..91c0756 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -535,7 +535,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch,
>sampler.VAddressMagFilterRoundingEnable = true;
>sampler.UAddressMinFilterRoundingEnable = true;
>sampler.UAddressMagFilterRoundingEnable = true;
> -#if GEN_GEN >= 6
> +#if GEN_GEN > 6
>sampler.NonnormalizedCoordinateEnable = true;
>  #endif
> }
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: Use correct image layout for blit based copies.

2017-06-25 Thread Dave Airlie

On 25 June 2017 at 04:58, Bas Nieuwenhuizen  wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> Fixes: 0628580eff6 "radv: Specify semantics of HTILE layout helpers."
> ---
>  src/amd/vulkan/radv_meta_blit2d.c | 29 +++--
>  1 file changed, 11 insertions(+), 18 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_blit2d.c 
> b/src/amd/vulkan/radv_meta_blit2d.c
> index aae35d2a79e..80bcf370d41 100644
> --- a/src/amd/vulkan/radv_meta_blit2d.c
> +++ b/src/amd/vulkan/radv_meta_blit2d.c
> @@ -179,14 +179,7 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
> VkFormat depth_format,
>  struct blit2d_dst_temps *tmp)
>  {
> -   VkImageUsageFlagBits bits;
> -
> -   if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT)
> -   bits = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
> -   else
> -   bits = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
> -
> -   create_iview(cmd_buffer, dst, bits,
> +   create_iview(cmd_buffer, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>  &tmp->iview, depth_format);

were we mixing up enums here before or after this? bits is usage, you
changed it to
pass LAYOUT.

Dave.

>
> radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
> @@ -713,8 +706,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
>.format = format,
>.loadOp = 
> VK_ATTACHMENT_LOAD_OP_LOAD,
>.storeOp = 
> VK_ATTACHMENT_STORE_OP_STORE,
> -  .initialLayout = 
> VK_IMAGE_LAYOUT_GENERAL,
> -  .finalLayout = 
> VK_IMAGE_LAYOUT_GENERAL,
> +  .initialLayout = 
> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
> +  .finalLayout = 
> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>},
>.subpassCount = 1,
>.pSubpasses = 
> &(VkSubpassDescription) {
> @@ -723,12 +716,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
>.colorAttachmentCount 
> = 1,
>.pColorAttachments = 
> &(VkAttachmentReference) {
>.attachment = 
> 0,
> -  .layout = 
> VK_IMAGE_LAYOUT_GENERAL,
> +  .layout = 
> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
> },
>.pResolveAttachments = NULL,
>.pDepthStencilAttachment = 
> &(VkAttachmentReference) {
>.attachment = 
> VK_ATTACHMENT_UNUSED,
> -  .layout = 
> VK_IMAGE_LAYOUT_GENERAL,
> +  .layout = 
> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>},
>.preserveAttachmentCount = 1,
>.pPreserveAttachments = 
> (uint32_t[]) { 0 },
> @@ -871,8 +864,8 @@ blit2d_init_depth_only_pipeline(struct radv_device 
> *device,
>.format = 0,
>.loadOp = 
> VK_ATTACHMENT_LOAD_OP_LOAD,
>.storeOp = 
> VK_ATTACHMENT_STORE_OP_STORE,
> -  .initialLayout 
> = VK_IMAGE_LAYOUT_GENERAL,
> -  .finalLayout = 
> VK_IMAGE_LAYOUT_GENERAL,
> +  .initialLayout 
> = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
> +  .finalLayout = 
> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
>},
>.subpassCount = 1,
>.pSubpasses = 
> &(VkSubpassDescription) {
> @@ -883,7 +876,7 @@ blit2d_init_depth_only_pipeline(struct radv_device 
> *device,
>.pResolveAttachments = 
> NULL,
>
> .pDepthStencilAttachment = &(VkAttachmentReference)

[Mesa-dev] [Bug 99856] OpenCL Hello world returns "unsupported call to function get_local_size"

2017-06-25 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=99856

tuks...@gmail.com changed:

   What|Removed |Added

 CC||tuks...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Updating Khronos-supplied headers (and gl.xml)

2017-06-25 Thread Matt Turner

On Sun, Jun 25, 2017 at 7:32 AM, Nicolai Hähnle  wrote:
> Hi all,
>
> For the ARB_gl_spirv work I need some new definitions from the Khronos
> headers, and I'd like to push the corresponding update to our master branch
> already.
>
> The patch is rather big, so I'm posting it here:
> https://cgit.freedesktop.org/~nh/mesa/commit/?h=khronos-update&id=aa846e6b55179736b99493fcdc45d80cdabbcaf2
>
> I'm not sure why some of the files grew by quite so much, but my
> understanding is that we take the files verbatim from Khronos, so that's
> just the way it is.
>
> I've had the patch in my local tree without issues for a while. Please
> review!

in glxext.h:

-#ifdef BUILDING_MESA
-/* Avoid uint <-> void* warnings */
-typedef unsigned long GLhandleARB;
-#else
typedef void *GLhandleARB;
-#endif

Other than that, I didn't see that we have any changes to files.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: enable LLVM sisched for Unigine Superposition

2017-06-25 Thread Marek Olšák

On Sun, Jun 25, 2017 at 12:55 PM, Grigori Goronzy  wrote:
> On 2017-06-22 17:10, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> +2.3% better score on Fiji. It might be better without HBM.
>
>
> Is this really useful? Superposition is a benchmark. It would make more
> sense if this also targeted some actual games.
> Optimizations specific to only benchmarks are considered "cheating"
> sometimes.

The change isn't just meant to be limited to 1 app. However, nobody
has nominated any other app for switching to sisched yet. This is the
first official nomination for sisched in the history of the driver. It
will hopefully inspire other people to make more nominations. If you
want better overall performance, you have to start somewhere.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] mesa/glthread: track buffer creation/destruction

2017-06-25 Thread Gregory Hainaut

Hello Fredrik,

Yes the shadow hash feels useless now. I will update the patch in a couple
of days (vacation currently).

Cheers,
Gregory

Le 22 juin 2017 2:24 PM, "Fredrik Höglund"  a écrit :

> On Thursday 22 June 2017, Timothy Arceri wrote:
> > From: Gregory Hainaut 
> >
> > It would be used in next commit to allow asynchronous PBO transfer.
> >
> > The tracking saves the buffer name into a hash. Saving pointer
> > will be more complex as the buffer is created in BindBuffer due to
> IsBuffer
> > insanity.
> >
> > Perf wise DeleteBuffers is now synchronous for robustness.
> >
> > v5: properly delete hash element with the help of _mesa_HashDeleteAll
> >
> > v6: rebase
> > Signed-off-by: Gregory Hainaut 
> > ---
> >  src/mapi/glapi/gen/ARB_direct_state_access.xml |   2 +-
> >  src/mapi/glapi/gen/gl_API.xml  |   4 +-
> >  src/mesa/main/glthread.h   |  10 +++
> >  src/mesa/main/marshal.c| 113
> +
> >  src/mesa/main/marshal.h|  24 ++
> >  src/mesa/main/mtypes.h |   5 ++
> >  src/mesa/main/shared.c |  14 +++
> >  7 files changed, 169 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml
> b/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > index cb24d79..b75c772a 100644
> > --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > @@ -42,21 +42,21 @@
> >
> > 
> >
> >
> >
> >
> > 
> >
> > 
> >
> > -   
> > +   
> >
> >
> > 
> >
> > 
> >
> >
> >
> >
> > 
> > diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.
> xml
> > index 8784f05..aa34c04 100644
> > --- a/src/mapi/glapi/gen/gl_API.xml
> > +++ b/src/mapi/glapi/gen/gl_API.xml
> > @@ -5053,27 +5053,27 @@
> >
> >   >no_error="true">
> >  
> >  
> >  
> >  
> >  
> >  
> >
> > -
> > +
> >  
> >  
> >  
> >  
> >
> > -
> > +
> >  
> >  
> >  
> >  
> >
> >  
> >  
> >  
> >   variable_param="pname"/>
> >  
> > diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
> > index dd65931..351be6c 100644
> > --- a/src/mesa/main/glthread.h
> > +++ b/src/mesa/main/glthread.h
> > @@ -89,20 +89,30 @@ struct glthread_state
> >  * Tracks on the main thread side whether the current vertex array
> binding
> >  * is in a VBO.
> >  */
> > bool vertex_array_is_vbo;
> >
> > /**
> >  * Tracks on the main thread side whether the current element array
> (index
> >  * buffer) binding is in a VBO.
> >  */
> > bool element_array_is_vbo;
> > +
> > +   /**
> > +* Tracks on the main thread side the bound unpack pixel buffer
> > +*/
> > +   GLint pixel_unpack_buffer_bound;
> > +
> > +   /**
> > +* Tracks on the main thread side the bound pack pixel buffer
> > +*/
> > +   GLint pixel_pack_buffer_bound;
> >  };
>
> I suggest naming these bound_pixel_unpack_buffer and
> bound_pixel_pack_buffer, respectively.  The name pixel_pack_buffer_bound
> suggests that this is a boolean value.
>
> The type should also be GLuint.
>
> One more comment below:
>
> >
> >  void _mesa_glthread_init(struct gl_context *ctx);
> >  void _mesa_glthread_destroy(struct gl_context *ctx);
> >
> >  void _mesa_glthread_restore_dispatch(struct gl_context *ctx);
> >  void _mesa_glthread_flush_batch(struct gl_context *ctx);
> >  void _mesa_glthread_finish(struct gl_context *ctx);
> >
> >  #endif /* _GLTHREAD_H*/
> > diff --git a/src/mesa/main/marshal.c b/src/mesa/main/marshal.c
> > index 4840f32..b1731ab 100644
> > --- a/src/mesa/main/marshal.c
> > +++ b/src/mesa/main/marshal.c
> > @@ -25,20 +25,21 @@
> >   *
> >   * Custom functions for marshalling GL calls from the main thread to a
> worker
> >   * thread when automatic code generation isn't appropriate.
> >   */
> >
> >  #include "main/enums.h"
> >  #include "main/macros.h"
> >  #include "marshal.h"
> >  #include "dispatch.h"
> >  #include "marshal_generated.h"
> > +#include "hash.h"
> >
> >  struct marshal_cmd_Flush
> >  {
> > struct marshal_cmd_base cmd_base;
> >  };
> >
> >
> >  void
> >  _mesa_unmarshal_Flush(struct gl_context *ctx,
> >const struct marshal_cmd_Flush *cmd)
> > @@ -187,20 +188,132 @@ _mesa_marshal_ShaderSource(GLuint shader,
> GLsizei count,
> >}
> >_mesa_post_marshal_hook(ctx);
> > } else {
> >_mesa_glthread_finish(ctx);
> >CALL_ShaderSource(ctx->CurrentServerDispatch,
> >  (shader, count, string, length_tmp));
> > }
> > free(length_tmp);
> >  }
> >
> > +/**
> > + * Used as a placeholder for track_buffers_creation/track_
> buffers_destruction
> > + * so w

[Mesa-dev] Updating Khronos-supplied headers (and gl.xml)

2017-06-25 Thread Nicolai Hähnle


Hi all,

For the ARB_gl_spirv work I need some new definitions from the Khronos 
headers, and I'd like to push the corresponding update to our master 
branch already.


The patch is rather big, so I'm posting it here:
https://cgit.freedesktop.org/~nh/mesa/commit/?h=khronos-update&id=aa846e6b55179736b99493fcdc45d80cdabbcaf2

I'm not sure why some of the files grew by quite so much, but my 
understanding is that we take the files verbatim from Khronos, so that's 
just the way it is.


I've had the patch in my local tree without issues for a while. Please 
review!


Thanks,
Nicolai
--
 include/GL/glcorearb.h | 2220 --
 include/GL/glext.h |  302 +++-
 include/GL/glxext.h|   13 +-
 include/GL/wglext.h|   20 +-
 include/GLES/egl.h |   18 +-
 include/GLES/gl.h  |  448 ++
 include/GLES/glext.h   | 1519 
 include/GLES/glplatform.h  |   26 +-
 include/GLES2/gl2.h|   14 +-
 include/GLES2/gl2ext.h |  250 +++-
 include/GLES2/gl2platform.h|   26 +-
 include/GLES3/gl3.h|   16 +-
 include/GLES3/gl31.h   |   12 +-
 include/GLES3/gl32.h   |   14 +-
 include/GLES3/gl3platform.h|   26 +-
 src/mapi/glapi/registry/gl.xml | 2160 +
 16 files changed, 5060 insertions(+), 2024 deletions(-)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: enable LLVM sisched for Unigine Superposition

2017-06-25 Thread Grigori Goronzy


On 2017-06-22 17:10, Marek Olšák wrote:

From: Marek Olšák 

+2.3% better score on Fiji. It might be better without HBM.


Is this really useful? Superposition is a benchmark. It would make more 
sense if this also targeted some actual games.
Optimizations specific to only benchmarks are considered "cheating" 
sometimes.


Best regards
Grigori


---
 src/gallium/drivers/radeonsi/si_pipe.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
b/src/gallium/drivers/radeonsi/si_pipe.c
index ff787ad..4088849 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -935,20 +935,27 @@ struct pipe_screen
*radeonsi_screen_create(struct radeon_winsys *ws,

si_init_screen_state_functions(sscreen);

if (!r600_common_screen_init(&sscreen->b, ws, flags) ||
!si_init_gs_info(sscreen) ||
!si_init_shader_cache(sscreen)) {
FREE(sscreen);
return NULL;
}

+   /* Enable sisched where it helps. */
+   char process[128];
+   if (os_get_process_name(process, sizeof(process)) &&
+   /* Unigine Superposition */
+   !strcmp(process, "superposition"))
+   sscreen->b.debug_flags |= DBG_SI_SCHED;
+
 	/* Only enable as many threads as we have target machines, but at 
most

 * the number of CPUs - 1 if there is more than one.
 */
num_threads = sysconf(_SC_NPROCESSORS_ONLN);
num_threads = MAX2(1, num_threads - 1);
num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
num_compiler_threads_lowprio =
MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));

if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: enable LLVM sisched for Unigine Superposition

2017-06-25 Thread Marek Olšák

On Jun 25, 2017 9:23 AM, "Nicolai Hähnle"  wrote:

On 24.06.2017 13:35, Marek Olšák wrote:

>
>
> On Jun 24, 2017 12:01 PM, "Nicolai Hähnle"  nhaeh...@gmail.com>> wrote:
>
> On 22.06.2017 17 :10, Marek Olšák wrote:
>
> From: Marek Olšák mailto:marek.ol...@amd.com
> >>
>
>
> +2.3% better score on Fiji. It might be better without HBM.
>
>
> Can this be a drirc option?
>
>
> The answer is the same as for: Can sisched be a flag defined in
> p_defines.h, drirc, and plumbed by st/dri? Nobody has ever put
> driver-specific stuff into p_defines.h.
>

Obviously the answer to these is no. However, some interface that provides
direct generic access to drirc could be plumbed through.


How?

Marek



Nicolai




> Marek
>
>
>
>
>
> ---
>src/gallium/drivers/radeonsi/si_pipe.c | 7 +++
>1 file changed, 7 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index ff787ad..4088849 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -935,20 +935,27 @@ struct pipe_screen
> *radeonsi_screen_create(struct radeon_winsys *ws,
>  si_init_screen_state_functions(sscreen);
>  if (!r600_common_screen_init(&sscreen->b, ws, flags) ||
>  !si_init_gs_info(sscreen) ||
>  !si_init_shader_cache(sscreen)) {
>  FREE(sscreen);
>  return NULL;
>  }
>+ /* Enable sisched where it helps. */
> +   char process[128];
> +   if (os_get_process_name(process, sizeof(process)) &&
> +   /* Unigine Superposition */
> +   !strcmp(process, "superposition"))
> +   sscreen->b.debug_flags |= DBG_SI_SCHED;
> +
>  /* Only enable as many threads as we have target
> machines, but at most
>   * the number of CPUs - 1 if there is more than one.
>   */
>  num_threads = sysconf(_SC_NPROCESSORS_ONLN);
>  num_threads = MAX2(1, num_threads - 1);
>  num_compiler_threads = MIN2(num_threads,
> ARRAY_SIZE(sscreen->tm));
>  num_compiler_threads_lowprio =
>  MIN2(num_threads,
> ARRAY_SIZE(sscreen->tm_low_priority));
>  if (!util_queue_init(&sscreen->shader_compiler_queue,
> "si_shader",
>
>
>
> -- Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
>
>
>

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/marshal: add custom marshalling for glNamedBuffer(Sub)Data

2017-06-25 Thread Grigori Goronzy


On 2017-06-25 02:37, Timothy Arceri wrote:

Please try the series from Marek which reduces the batch size [1], the
reduced size helps reduce the impact of syncs. MARSHAL_MAX_CMD_SIZE is
also greatly reduced to help reduce thrashing the cache so its
possible this patch won't be as effective anymore. However you might
not even need it.



Sorry, I forgot to mention, the 30% improvement measured is with this 
patch on top of Marek's series compared to just Marek's series. That 
series alone is improving glthread with Alien Isolation as well, but I 
didn't measure exactly how much. It wouldn't surprise me if it is in the 
40-50% region with both, though.


Best regards
Grigori

[1] 
https://lists.freedesktop.org/archives/mesa-dev/2017-June/160329.html


On 25/06/17 02:59, Grigori Goronzy wrote:

These entry points are used by Alien Isolation and caused
synchronization with glthread. The async marshalling implementation
is similar to glBuffer(Sub)Data.

Results in an approximately 6x drop in glthread synchronizations and a
~30% FPS jump in Alien Isolation (Medium preset, Athlon 860K, RX 480).

This does not care about the EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD 
special

case like the Buffer(Sub)Data marshalling functions.
---
I'm not a fan of the code duplication and I'll try to address that in
further changes to glthread/marshalling, but the improvement is so
noticeable that I'd like to share it. Alien Isolation is now playable 
on

my system while it wasn't before.

  src/mapi/glapi/gen/ARB_direct_state_access.xml |   4 +-
  src/mesa/main/marshal.c| 108 
+

  src/mesa/main/marshal.h|  18 +
  3 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml

index cb24d79..d3d2246 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -61,14 +61,14 @@

 
  -   
+   




 
  -   
+   marshal="custom">




diff --git a/src/mesa/main/marshal.c b/src/mesa/main/marshal.c
index 4840f32..1fddf8e 100644
--- a/src/mesa/main/marshal.c
+++ b/src/mesa/main/marshal.c
@@ -408,6 +408,114 @@ _mesa_marshal_BufferSubData(GLenum target, 
GLintptr offset, GLsizeiptr size,

 }
  }
  +/* NamedBufferData: marshalled asynchronously */
+struct marshal_cmd_NamedBufferData
+{
+   struct marshal_cmd_base cmd_base;
+   GLuint name;
+   GLsizei size;
+   GLenum usage;
+   /* Next size bytes are GLubyte data[size] */
+};
+
+void
+_mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
+const struct 
marshal_cmd_NamedBufferData *cmd)

+{
+   const GLuint name = cmd->name;
+   const GLsizei size = cmd->size;
+   const GLenum usage = cmd->usage;
+   const void *data = (const void *) (cmd + 1);
+
+   CALL_NamedBufferData(ctx->CurrentServerDispatch,
+  (name, size, data, usage));
+}
+
+void GLAPIENTRY
+_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
+  const GLvoid * data, GLenum usage)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   size_t cmd_size = sizeof(struct marshal_cmd_NamedBufferData) + 
size;

+
+   debug_print_marshal("NamedBufferData");
+   if (unlikely(size < 0)) {
+  _mesa_glthread_finish(ctx);
+  _mesa_error(ctx, GL_INVALID_VALUE, "NamedBufferData(size < 
0)");

+  return;
+   }
+
+   if (buffer > 0 && cmd_size <= MARSHAL_MAX_CMD_SIZE) {
+  struct marshal_cmd_NamedBufferData *cmd =
+ _mesa_glthread_allocate_command(ctx, 
DISPATCH_CMD_NamedBufferData,

+ cmd_size);
+  cmd->name = buffer;
+  cmd->size = size;
+  cmd->usage = usage;
+  char *variable_data = (char *) (cmd + 1);
+  memcpy(variable_data, data, size);
+  _mesa_post_marshal_hook(ctx);
+   } else {
+  _mesa_glthread_finish(ctx);
+  CALL_NamedBufferData(ctx->CurrentServerDispatch,
+ (buffer, size, data, usage));
+   }
+}
+
+/* NamedBufferSubData: marshalled asynchronously */
+struct marshal_cmd_NamedBufferSubData
+{
+   struct marshal_cmd_base cmd_base;
+   GLuint name;
+   GLintptr offset;
+   GLsizei size;
+   /* Next size bytes are GLubyte data[size] */
+};
+
+void
+_mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
+   const struct 
marshal_cmd_NamedBufferSubData *cmd)

+{
+   const GLuint name = cmd->name;
+   const GLintptr offset = cmd->offset;
+   const GLsizei size = cmd->size;
+   const void *data = (const void *) (cmd + 1);
+
+   CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
+  (name, offset, size, data));
+}
+
+void GLAPIENTRY
+_mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
+ GLsizeiptr size, const GLvoid * 
data)

+{
+   GET_CURRENT_CONTEXT(ctx);
+

Re: [Mesa-dev] RFC uniform packing for gallium V2

2017-06-25 Thread Timothy Arceri

On Sun, Jun 25, 2017, at 12:30 AM, Nicolai Hähnle wrote:
> On 25.06.2017 09:18, Nicolai Hähnle wrote:
> > On 25.06.2017 03:31, Timothy Arceri wrote:
> >> There are still a handful of piglit tests failing and I'm yet to test
> >> that there are no regressions in the non-packed path, but I'd really
> >> like some feedback on the approach as Dave has flagged it as a possible
> >> controversial tgsi change.
> >>
> >> In order to avoid complicated swizzling and array element adjustments
> >> when dealing with arrays, this series simply adjusts the constant buffer
> >> index to point to the right location. There are some small changes to
> >> deal with indirect indexing but these also remain very simple and easy
> >> to follow.
> >>
> >> Dave has raised concerns that others might not like this as it doesn't
> >> strictly follow the tgsi approach that everything is a vec4. I would
> >> argue however that this is by far the simplest approch.
> >> Doing this with swizzles and array adjustments is going to require
> >> something like lower_packed_varyings.cpp which would be unnecessarily
> >> complicated IMO, I started off down that track and soon changed
> >> direction.
> > 
> > Yeah, I don't like the approach either. All register files are by vec4 
> > in TGSI, and changing that feels pretty wrong.
> 
> Actually, just thinking about the fact that there are more sources of 
> TGSI than just GLSL means that this is a definite no-go. Think of nine 
> and some of the blitter shaders, for example. Just use LOAD, please.

That could be easily worked around with a CAP. 

Anyway if we really don't want to do it this way, I'll leave this for
now. It's really not as simple as it seems to just use LOAD. Working
with the param list is awkward, and we are adding things to it all over
the place, tracking offsets is going to get overly complicated. I'm not
interested in going down that road right now.

> 
> Cheers,
> Nicolai
> 
> > 
> > I would suggest lowering loads from CONST[0] to LOAD instructions, in 
> > the same way that is used for SSBOs. This has the additional advantage 
> > that we could then use the same code paths to support std430 packing for 
> > UBOs (via a GL extension, I suppose).
> > 
> > 
> >> The main goal of this series is to reduce the cpu overhead cause by
> >> _mesa_propagate_uniforms_to_driver_storage(). The function is slow 
> >> since we
> >> need to deal with strides etc because we are copying packed data to an
> >> unpacked destination. It's also copying data that we have only just 
> >> copied
> >> to anouther duplicate uniform storage that gets created by the linker.
> > 
> > The duplicate copy is necessary unless we start using the same constant 
> > buffer for all shaders in a program, which actually might not be such a 
> > bad idea.
> > 
> > Cheers,
> > Nicolai
> > 
> > 
> >> This series fixes both of these issues and also reduces the size of the
> >> drivers const buffer as a side effect.
> >>
> >> Patches 2-3 just rework the way we use the param list.
> >>
> >> The remaining add the packing support enabled by the
> >> PackedDriverUniformStorage const.
> >>
> >> You can get the series in my test4 branch [1].
> >>
> >> [1] https://github.com/tarceri/Mesa.git
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >>
> > 
> > 
> 
> 
> -- 
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] RFC uniform packing for gallium V2

2017-06-25 Thread Timothy Arceri

On Sun, Jun 25, 2017, at 12:18 AM, Nicolai Hähnle wrote:
> On 25.06.2017 03:31, Timothy Arceri wrote:
> > There are still a handful of piglit tests failing and I'm yet to test
> > that there are no regressions in the non-packed path, but I'd really
> > like some feedback on the approach as Dave has flagged it as a possible
> > controversial tgsi change.
> > 
> > In order to avoid complicated swizzling and array element adjustments
> > when dealing with arrays, this series simply adjusts the constant buffer
> > index to point to the right location. There are some small changes to
> > deal with indirect indexing but these also remain very simple and easy
> > to follow.
> >
> > Dave has raised concerns that others might not like this as it doesn't
> > strictly follow the tgsi approach that everything is a vec4. I would
> > argue however that this is by far the simplest approch.
> > Doing this with swizzles and array adjustments is going to require
> > something like lower_packed_varyings.cpp which would be unnecessarily
> > complicated IMO, I started off down that track and soon changed
> > direction.
> 
> Yeah, I don't like the approach either. All register files are by vec4 
> in TGSI, and changing that feels pretty wrong.

Sorry, can I ask if you looked at the patches? The change is fairly
limited, we just change the index so that it points to the exact buffer
location rather than something that needs to be multiplied by 4 later
on. 

> 
> I would suggest lowering loads from CONST[0] to LOAD instructions, in 
> the same way that is used for SSBOs. This has the additional advantage 
> that we could then use the same code paths to support std430 packing for 
> UBOs (via a GL extension, I suppose).

It not really that simple. For example if you have an array you could
end up having to create something like CONST[idx - 2].yz to get what you
want, you also need to handle structs etc. If we must go this way I'd
say this task will move to the very end of my TODO list :P

> 
> 
> > The main goal of this series is to reduce the cpu overhead cause by
> > _mesa_propagate_uniforms_to_driver_storage(). The function is slow since we
> > need to deal with strides etc because we are copying packed data to an
> > unpacked destination. It's also copying data that we have only just copied
> > to anouther duplicate uniform storage that gets created by the linker.
> 
> The duplicate copy is necessary unless we start using the same constant 
> buffer for all shaders in a program, which actually might not be such a 
> bad idea.

I think we are talking about different things. I'm not talking about the
copies in the driver, I'm talking about [1] followed by [2]. There is no
need to do both. 

[1]
https://cgit.freedesktop.org/mesa/mesa/tree/src/mesa/main/uniform_query.cpp#n1081
[2]
https://cgit.freedesktop.org/mesa/mesa/tree/src/mesa/main/uniform_query.cpp#n1107

> 
> Cheers,
> Nicolai
> 
> 
> > This series fixes both of these issues and also reduces the size of the
> > drivers const buffer as a side effect.
> > 
> > Patches 2-3 just rework the way we use the param list.
> > 
> > The remaining add the packing support enabled by the
> > PackedDriverUniformStorage const.
> > 
> > You can get the series in my test4 branch [1].
> > 
> > [1] https://github.com/tarceri/Mesa.git
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> 
> 
> -- 
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] RFC uniform packing for gallium V2

2017-06-25 Thread Nicolai Hähnle


On 25.06.2017 09:18, Nicolai Hähnle wrote:

On 25.06.2017 03:31, Timothy Arceri wrote:

There are still a handful of piglit tests failing and I'm yet to test
that there are no regressions in the non-packed path, but I'd really
like some feedback on the approach as Dave has flagged it as a possible
controversial tgsi change.

In order to avoid complicated swizzling and array element adjustments
when dealing with arrays, this series simply adjusts the constant buffer
index to point to the right location. There are some small changes to
deal with indirect indexing but these also remain very simple and easy
to follow.

Dave has raised concerns that others might not like this as it doesn't
strictly follow the tgsi approach that everything is a vec4. I would
argue however that this is by far the simplest approch.
Doing this with swizzles and array adjustments is going to require
something like lower_packed_varyings.cpp which would be unnecessarily
complicated IMO, I started off down that track and soon changed
direction.


Yeah, I don't like the approach either. All register files are by vec4 
in TGSI, and changing that feels pretty wrong.


Actually, just thinking about the fact that there are more sources of 
TGSI than just GLSL means that this is a definite no-go. Think of nine 
and some of the blitter shaders, for example. Just use LOAD, please.


Cheers,
Nicolai



I would suggest lowering loads from CONST[0] to LOAD instructions, in 
the same way that is used for SSBOs. This has the additional advantage 
that we could then use the same code paths to support std430 packing for 
UBOs (via a GL extension, I suppose).




The main goal of this series is to reduce the cpu overhead cause by
_mesa_propagate_uniforms_to_driver_storage(). The function is slow 
since we

need to deal with strides etc because we are copying packed data to an
unpacked destination. It's also copying data that we have only just 
copied

to anouther duplicate uniform storage that gets created by the linker.


The duplicate copy is necessary unless we start using the same constant 
buffer for all shaders in a program, which actually might not be such a 
bad idea.


Cheers,
Nicolai



This series fixes both of these issues and also reduces the size of the
drivers const buffer as a side effect.

Patches 2-3 just rework the way we use the param list.

The remaining add the packing support enabled by the
PackedDriverUniformStorage const.

You can get the series in my test4 branch [1].

[1] https://github.com/tarceri/Mesa.git

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev







--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: enable LLVM sisched for Unigine Superposition

2017-06-25 Thread Nicolai Hähnle


On 24.06.2017 13:35, Marek Olšák wrote:



On Jun 24, 2017 12:01 PM, "Nicolai Hähnle" > wrote:


On 22.06.2017 17 :10, Marek Olšák wrote:

From: Marek Olšák mailto:marek.ol...@amd.com>>

+2.3% better score on Fiji. It might be better without HBM.


Can this be a drirc option?


The answer is the same as for: Can sisched be a flag defined in 
p_defines.h, drirc, and plumbed by st/dri? Nobody has ever put 
driver-specific stuff into p_defines.h.


Obviously the answer to these is no. However, some interface that 
provides direct generic access to drirc could be plumbed through.


Nicolai




Marek





---
   src/gallium/drivers/radeonsi/si_pipe.c | 7 +++
   1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
b/src/gallium/drivers/radeonsi/si_pipe.c
index ff787ad..4088849 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -935,20 +935,27 @@ struct pipe_screen
*radeonsi_screen_create(struct radeon_winsys *ws,
 si_init_screen_state_functions(sscreen);
 if (!r600_common_screen_init(&sscreen->b, ws, flags) ||
 !si_init_gs_info(sscreen) ||
 !si_init_shader_cache(sscreen)) {
 FREE(sscreen);
 return NULL;
 }
   + /* Enable sisched where it helps. */
+   char process[128];
+   if (os_get_process_name(process, sizeof(process)) &&
+   /* Unigine Superposition */
+   !strcmp(process, "superposition"))
+   sscreen->b.debug_flags |= DBG_SI_SCHED;
+
 /* Only enable as many threads as we have target
machines, but at most
  * the number of CPUs - 1 if there is more than one.
  */
 num_threads = sysconf(_SC_NPROCESSORS_ONLN);
 num_threads = MAX2(1, num_threads - 1);
 num_compiler_threads = MIN2(num_threads,
ARRAY_SIZE(sscreen->tm));
 num_compiler_threads_lowprio =
 MIN2(num_threads,
ARRAY_SIZE(sscreen->tm_low_priority));
 if (!util_queue_init(&sscreen->shader_compiler_queue,
"si_shader",



-- 
Lerne, wie die Welt wirklich ist,

Aber vergiss niemals, wie sie sein sollte.





--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 1/6] mesa/st: glsl_to_tgsi move some helper classes to extra files

2017-06-25 Thread Gert Wollny

To prepare the implementation of a temp register lifetime tracker
some of the classes are moved into seperate header/implementation
files to make them accessible from other files.

Specifically these are:

class st_src_reg;
class st_dst_reg;
class glsl_to_tgsi_instruction;
struct rename_reg_pair;

int swizzle_for_type(const glsl_type *type, int component);

  as inline:

bool is_resource_instruction(unsigned opcode);
unsigned num_inst_dst_regs(const glsl_to_tgsi_instruction *op);
unsigned num_inst_src_regs(const glsl_to_tgsi_instruction *op);
---
 src/mesa/Makefile.sources  |   2 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 288 +
 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp | 205 +++
 src/mesa/state_tracker/st_glsl_to_tgsi_private.h   | 164 
 4 files changed, 374 insertions(+), 285 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index b80882fb8d..21f9167bda 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -507,6 +507,8 @@ STATETRACKER_FILES = \
state_tracker/st_glsl_to_nir.cpp \
state_tracker/st_glsl_to_tgsi.cpp \
state_tracker/st_glsl_to_tgsi.h \
+   state_tracker/st_glsl_to_tgsi_private.cpp \
+   state_tracker/st_glsl_to_tgsi_private.h \
state_tracker/st_glsl_types.cpp \
state_tracker/st_glsl_types.h \
state_tracker/st_manager.c \
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7852941acd..528fc4cc64 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -55,6 +55,7 @@
 #include "st_glsl_types.h"
 #include "st_nir.h"
 #include "st_shader_cache.h"
+#include "st_glsl_to_tgsi_private.h"
 
 #include "util/hash_table.h"
 #include 
@@ -65,251 +66,7 @@
 
 #define MAX_GLSL_TEXTURE_OFFSET 4
 
-class st_src_reg;
-class st_dst_reg;
-
-static int swizzle_for_size(int size);
-
-static int swizzle_for_type(const glsl_type *type, int component = 0)
-{
-   unsigned num_elements = 4;
-
-   if (type) {
-  type = type->without_array();
-  if (type->is_scalar() || type->is_vector() || type->is_matrix())
- num_elements = type->vector_elements;
-   }
-
-   int swizzle = swizzle_for_size(num_elements);
-   assert(num_elements + component <= 4);
-
-   swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1);
-   return swizzle;
-}
-
-/**
- * This struct is a corresponding struct to TGSI ureg_src.
- */
-class st_src_reg {
-public:
-   st_src_reg(gl_register_file file, int index, const glsl_type *type,
-  int component = 0, unsigned array_id = 0)
-   {
-  assert(file != PROGRAM_ARRAY || array_id != 0);
-  this->file = file;
-  this->index = index;
-  this->swizzle = swizzle_for_type(type, component);
-  this->negate = 0;
-  this->abs = 0;
-  this->index2D = 0;
-  this->type = type ? type->base_type : GLSL_TYPE_ERROR;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = array_id;
-  this->is_double_vertex_input = false;
-   }
-
-   st_src_reg(gl_register_file file, int index, enum glsl_base_type type)
-   {
-  assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
-  this->type = type;
-  this->file = file;
-  this->index = index;
-  this->index2D = 0;
-  this->swizzle = SWIZZLE_XYZW;
-  this->negate = 0;
-  this->abs = 0;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = 0;
-  this->is_double_vertex_input = false;
-   }
-
-   st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int 
index2D)
-   {
-  assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
-  this->type = type;
-  this->file = file;
-  this->index = index;
-  this->index2D = index2D;
-  this->swizzle = SWIZZLE_XYZW;
-  this->negate = 0;
-  this->abs = 0;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = 0;
-  this->is_double_vertex_input = false;
-   }
-
-   st_src_reg()
-   {
-  this->type = GLSL_TYPE_ERROR;
-  this->file = PROGRAM_UNDEFINED;
-  this->index = 0;
-  this->index2D = 0;
-  this->swizzle = 0;
-  this->negate = 0;
-  this->abs = 0;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = 0;
-  this->is_double_vertex_input = false;
-   }
-
-   explicit st_src_reg(st_dst_reg reg);
-
-   int32_t index; /**< temporary index

[Mesa-dev] [PATCH v5 6/6] mesa/st: glsl_to_tgsi: tie in new temporary register merge approach

2017-06-25 Thread Gert Wollny

This patch ties in the new temporary register lifetime estiamtion and
rename mapping evaluation. In order to enable it, the evironment
variable MESA_GLSL_TO_TGSI_NEW_MERGE must be set.

Performance to compare between the current and the new implementation
were measured by running the shader-db in one thread; Numbers are in
% of total run.

---
 old new(qsort)   new(std::sort)

 valgrind -
merge0.21   0.20  0.13
estimate lifetime0.03   0.05  0.05
evaluate mapping  (incl=0.16)   0.12  0.06
apply mapping0.02   0.02  0.02

---   perf (approximate because of statistic sampling) ---
merge0.24   0.20  0.14
estimate lifetime0.03   0.05  0.05
evaluate mapping  (incl=0.16)   0.10  0.04
apply mapping0.05   0.05  0.05
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 29 ++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 528fc4cc64..d4abee9d02 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -55,7 +55,7 @@
 #include "st_glsl_types.h"
 #include "st_nir.h"
 #include "st_shader_cache.h"
-#include "st_glsl_to_tgsi_private.h"
+#include "st_glsl_to_tgsi_temprename.h"
 
 #include "util/hash_table.h"
 #include 
@@ -322,6 +322,7 @@ public:
 
void merge_two_dsts(void);
void merge_registers(void);
+   void merge_registers_alternative(void);
void renumber_registers(void);
 
void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
@@ -5139,6 +5140,23 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
}
 }
 
+void
+glsl_to_tgsi_visitor::merge_registers_alternative(void)
+{
+   struct rename_reg_pair *renames =
+ rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
+   struct lifetime *lifetimes =
+ rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
+
+   get_temp_registers_required_lifetimes(mem_ctx, &this->instructions,
+ this->next_temp, lifetimes);
+   get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames);
+   rename_temp_registers(renames);
+
+   ralloc_free(lifetimes);
+   ralloc_free(renames);
+}
+
 /* Merges temporary registers together where possible to reduce the number of
  * registers needed to run a program.
  *
@@ -6603,8 +6621,13 @@ get_mesa_program_tgsi(struct gl_context *ctx,
while (v->eliminate_dead_code());
 
v->merge_two_dsts();
-   if (!skip_merge_registers)
-  v->merge_registers();
+   if (!skip_merge_registers) {
+  if (getenv("MESA_GLSL_TO_TGSI_NEW_MERGE") != NULL)
+ v->merge_registers_alternative();
+  else
+ v->merge_registers();
+   }
+
v->renumber_registers();
 
/* Write the END instruction. */
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 5/6] mesa/st: glsl_to_tgsi: Add test set for evaluation of rename mapping

2017-06-25 Thread Gert Wollny

The patch adds tests for the register rename mapping evaluation.
---
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 94 ++
 1 file changed, 94 insertions(+)

diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp 
b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
index 5f3378637a..f53b5c23a1 100644
--- a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
+++ b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
@@ -89,6 +89,13 @@ protected:
void check(const vector& result, const expectation& e);
 };
 
+/* With this test class the renaming mepping estimation is tested */
+class RegisterRemapping : public MesaTestWithMemCtx {
+protected:
+   void run(const vector& lt, const vector& expect);
+};
+
+
 /* This test class checks that the life time covers at least
  * in the expected range. It is used for cases where we know that
  * a the implementation could be improved on estimating the minimal
@@ -466,6 +473,29 @@ TEST_F(LifetimeEvaluatorExactTest, 
LoopWithReadWriteInSwitchDifferentCase)
run (code, expectation({{-1,-1},{0, 9}}));
 }
 
+/* Here we read and write to the same temp, but it is conditional,
+ * so the lifetime must start with the first read */
+TEST_F(LifetimeEvaluatorExactTest, WriteConditionallyFromSelf)
+{
+   const vector code = {
+  {TGSI_OPCODE_USEQ, {0}, {in0, in1}, {}},
+  {TGSI_OPCODE_UCMP, {1}, {0, in1, 1},  {}},
+  {TGSI_OPCODE_UCMP, {1}, {0, in1, 1},  {}},
+  {TGSI_OPCODE_UCMP, {1}, {0, in1, 1},  {}},
+  {TGSI_OPCODE_UCMP, {1}, {0, in1, 1},  {}},
+  {TGSI_OPCODE_FSLT, {2}, {1, in1},  {}},
+  {TGSI_OPCODE_UIF, {2},  {},  {}},
+  {TGSI_OPCODE_MOV, {3},  {in1}, {}},
+  {TGSI_OPCODE_ELSE},
+  {TGSI_OPCODE_MOV, {4}, {in1}, {}},
+  {TGSI_OPCODE_MOV, {4}, {4}, {}},
+  {TGSI_OPCODE_MOV, {3}, {4}, {}},
+  {TGSI_OPCODE_ENDIF},
+  {TGSI_OPCODE_MOV,{out1}, {3}, {}},
+  {TGSI_OPCODE_END}
+   };
+   run (code, expectation({{-1,-1},{1, 5}, {5, 6}, {7, 13}, {9, 11}}));
+}
 
 TEST_F(LifetimeEvaluatorExactTest, LoopRWInSwitchCaseLastCaseWithoutBreak)
 {
@@ -831,6 +861,47 @@ TEST_F(LifetimeEvaluatorExactTest, 
NestedLoopWithWriteAfterBreak)
run (code, expectation({{-1,-1},{0, 8}}));
 }
 
+TEST_F(RegisterRemapping, RegisterRemapping1)
+{
+   vector lt({{-1,-1},
+{0, 1},
+{0, 2},
+{1, 2},
+{2, 10},
+{3, 5},
+{5, 10}
+   });
+
+   vector expect({0, 1, 2, 1, 1, 2, 2});
+   run(lt, expect);
+}
+
+
+TEST_F(RegisterRemapping, RegisterRemapping2)
+{
+   vector lt({{-1,-1},
+{0, 1},
+{0, 2},
+{3, 3},
+{4, 4},
+   });
+   vector expect({0, 1, 2, 1, 1});
+   run(lt, expect);
+}
+
+TEST_F(RegisterRemapping, RegisterRemappingMergeAll)
+{
+   vector lt({{-1,-1},
+{0, 1},
+{1, 2},
+{2, 3},
+{3, 4},
+   });
+   vector expect({0, 1, 1, 1, 1});
+   run(lt, expect);
+}
+
+
 /* Implementation of helper and test classes */
 
 MockShader::~MockShader()
@@ -974,3 +1045,26 @@ void LifetimeEvaluatorAtLeastTest::check( const 
vector& lifetimes,
   EXPECT_GE(lifetimes[i].end, e[i][1]);
}
 }
+
+void RegisterRemapping::run(const vector& lt,
+const vector& expect)
+{
+   rename_reg_pair proto{false, 0};
+   vector result(lt.size(), proto);
+
+   get_temp_registers_remapping(mem_ctx, lt.size(), <[0], &result[0]);
+
+   vector remap(lt.size());
+   for (unsigned i = 0; i < lt.size(); ++i) {
+  remap[i] = result[i].valid ? result[i].new_reg : i;
+   }
+
+   std::transform(remap.begin(), remap.end(), result.begin(), remap.begin(),
+  [](int x, const rename_reg_pair& rn) {
+ return rn.valid ? rn.new_reg : x;
+  });
+
+   for(unsigned  i = 1; i < remap.size(); ++i) {
+  EXPECT_EQ(remap[i], expect[i]);
+   }
+}
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 3/6] mesa/st: glsl_to_tgsi: add tests for the new temporary lifetime tracker

2017-06-25 Thread Gert Wollny

This patch adds a set of unit tests for the new lifetime tracker.
---
 configure.ac   |   1 +
 src/mesa/Makefile.am   |   2 +-
 src/mesa/state_tracker/tests/Makefile.am   |  36 +
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 976 +
 4 files changed, 1014 insertions(+), 1 deletion(-)
 create mode 100644 src/mesa/state_tracker/tests/Makefile.am
 create mode 100644 src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp

diff --git a/configure.ac b/configure.ac
index da7b2f8f81..5279b231ed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2839,6 +2839,7 @@ AC_CONFIG_FILES([Makefile
src/mesa/drivers/osmesa/osmesa.pc
src/mesa/drivers/x11/Makefile
src/mesa/main/tests/Makefile
+   src/mesa/state_tracker/tests/Makefile
src/util/Makefile
src/util/tests/hash_table/Makefile
src/vulkan/Makefile])
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 53f311d2a9..a88a94165d 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -19,7 +19,7 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = . main/tests
+SUBDIRS = . main/tests state_tracker/tests
 
 if HAVE_XLIB_GLX
 SUBDIRS += drivers/x11
diff --git a/src/mesa/state_tracker/tests/Makefile.am 
b/src/mesa/state_tracker/tests/Makefile.am
new file mode 100644
index 00..fb64cf9dc2
--- /dev/null
+++ b/src/mesa/state_tracker/tests/Makefile.am
@@ -0,0 +1,36 @@
+AM_CFLAGS = \
+   $(PTHREAD_CFLAGS)
+
+AM_CXXFLAGS = \
+   $(LLVM_CXXFLAGS)
+
+AM_CPPFLAGS = \
+   -I$(top_srcdir)/src/gtest/include \
+   -I$(top_srcdir)/src \
+   -I$(top_srcdir)/src/mapi \
+   -I$(top_builddir)/src/mesa \
+   -I$(top_srcdir)/src/mesa \
+   -I$(top_srcdir)/include \
+   -I$(top_srcdir)/src/gallium/include \
+   -I$(top_srcdir)/src/gallium/auxiliary \
+   $(DEFINES)
+
+TESTS = st-renumerate-test
+check_PROGRAMS = st-renumerate-test
+
+st_renumerate_test_SOURCES =   \
+   test_glsl_to_tgsi_lifetime.cpp
+
+st_renumerate_test_LDFLAGS = \
+   $(LLVM_LDFLAGS)
+
+st_renumerate_test_LDADD = \
+   $(top_builddir)/src/mesa/libmesagallium.la \
+   $(top_builddir)/src/mapi/shared-glapi/libglapi.la \
+   $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+   $(top_builddir)/src/util/libmesautil.la \
+   $(top_builddir)/src/gtest/libgtest.la \
+   $(GALLIUM_COMMON_LIB_DEPS) \
+   $(LLVM_LIBS) \
+   $(PTHREAD_LIBS) \
+   $(DLOPEN_LIBS)
diff --git a/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp 
b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
new file mode 100644
index 00..5f3378637a
--- /dev/null
+++ b/src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp
@@ -0,0 +1,976 @@
+/*
+ * Copyright © 2017 Gert Wollny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using std::vector;
+using std::pair;
+
+/* A line to describe a TGSI instruction for building mock shaders */
+struct MockCodeline {
+   MockCodeline(unsigned  _op): op(_op) {}
+   MockCodeline(unsigned _op, const vector& _dst, const vector& 
_src, const vector&_to):
+  op(_op), dst(_dst), src(_src), tex_offsets(_to){}
+   unsigned op;
+   vector dst;
+   vector src;
+   vector tex_offsets;
+};
+
+const int in0 = 0;
+const int in1 = -1;
+const int in2 = -2;
+
+const int out0 = 0;
+const int out1 = -1;
+
+class MockShader {
+public:
+   MockShader(const vector& source);
+   ~MockShader();
+
+   void free();
+
+   exec_list* get_program();
+   int get_num_temps();
+private:
+   st_src_reg create_src_register(int src_idx);
+   st_dst_reg create_dst_register(int dst_idx);
+   exec_list* program;
+

[Mesa-dev] [PATCH v5 4/6] mesa/st: glsl_to_tgsi: add register renamame mapping evaluator

2017-06-25 Thread Gert Wollny

The remapping evaluator first sorts the temporary registers ascending
based on their first life time instruction, and then uses a binary search
to find merge canidates.
For the initial sorting it uses std::sort because qsort is quite slow in
comparison. By removing the define USE_STL_SORT in
  src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
one can enable the alternative code path that uses qsort.
---
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 124 +
 .../state_tracker/st_glsl_to_tgsi_temprename.h |   3 +
 2 files changed, 127 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
index 729d77130e..d52d912951 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
@@ -27,6 +27,12 @@
 #include 
 #include 
 
+/* std::sort is significanter than qsort */
+#define USE_STL_SORT
+#ifdef USE_STL_SORT
+#include 
+#endif
+
 /* Without c++11 define the nullptr for forward-compatibility
  * and better readibility */
 #if __cplusplus < 201103L
@@ -660,3 +666,121 @@ prog_scope_storage::create(prog_scope *p, e_scope_type 
type, int id,
storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
return &storage[current_slot++];
 }
+
+/* helper class for sorting and searching the registers based
+ * on life times. */
+struct access_record {
+   int begin;
+   int end;
+   int reg;
+   bool erase;
+
+   bool operator < (const access_record& rhs) {
+  return begin < rhs.begin;
+   }
+};
+
+/* Find the next register between [start, end) that has a life time starting
+ * at or after bound by using a binary search.
+ * start points at the beginning of the search range,
+ * end points at the element past the end of the search range, and
+ * the array comprising [start, end) must be sorted in ascending order.
+ */
+access_record*
+find_next_rename(access_record* start, access_record* end, int bound)
+{
+   int delta = (end - start);
+
+   while (delta > 0)  {
+
+  int half = delta >> 1;
+  access_record* middle = start + half;
+
+  if (bound <= middle->begin) {
+ delta = half;
+  } else {
+ start = middle;
+ ++start;
+ delta -= half + 1;
+  }
+   }
+
+   return start;
+}
+
+#ifndef USE_STL_SORT
+int access_record_compare (const void *a, const void *b) {
+   const access_record *aa = static_cast(a);
+   const access_record *bb = static_cast(b);
+   return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
+}
+#endif
+
+/* This functions evaluates the register merges by using an O(n log n)
+ * algorithm to find suitable merge candidates. */
+void get_temp_registers_remapping(void *mem_ctx, int ntemps,
+  const struct lifetime* lifetimes,
+  struct rename_reg_pair *result)
+{
+   access_record *m = ralloc_array(mem_ctx, access_record, ntemps - 1);
+
+   for (int i = 1; i < ntemps; ++i) {
+  m[i-1].begin =  lifetimes[i].begin;
+  m[i-1].end = lifetimes[i].end;
+  m[i-1].reg = i;
+  m[i-1].erase = false;
+   }
+
+#ifdef USE_STL_SORT
+   std::sort(m, m + ntemps - 1);
+#else
+   std::qsort(m, ntemps - 1, sizeof(access_record), access_record_compare);
+#endif
+
+   access_record *trgt = m;
+   access_record *mend = m + ntemps - 1;
+   access_record *first_erase = mend;
+   access_record *search_start = trgt + 1;
+
+   while (trgt != mend) {
+
+  access_record *src = find_next_rename(search_start, mend, trgt->end);
+
+  if (src !=  mend) {
+ result[src->reg].new_reg = trgt->reg;
+ result[src->reg].valid = true;
+ trgt->end = src->end;
+
+ /* Since we only search forward, don't remove the renamed
+  * register just now, only mark it. */
+ src->erase = true;
+
+ if (first_erase == mend)
+first_erase = src;
+
+ search_start = src + 1;
+  } else {
+ /* Moving to the next target register it is time to remove
+  * the already merged registers from the search range */
+ if (first_erase != mend) {
+
+access_record *out = first_erase;
+access_record *in_start = first_erase + 1;
+
+while (in_start != mend) {
+
+   if (!in_start->erase)
+  *out++ = *in_start;
+
+   ++in_start;
+}
+mend = out;
+first_erase = mend;
+ }
+
+ ++trgt;
+ search_start = trgt + 1;
+  }
+   }
+   ralloc_free(m);
+}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h 
b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h
index a4124b4659..f6a89ed0d3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h
@@ -31,3 +31,6 @@ struct lifetime {
 void
 get_temp_registers_required_lifetimes(void *mem_ctx, exec_l

[Mesa-dev] [PATCH v5 0/6] mesa/st: glsl_to_tgsi: improved temp-reg lifetime estimation

2017-06-25 Thread Gert Wollny

Dear all,

this is a minor update to the patch set. Changes are:

- correct formatting following Emil's suggetions
- remove un-needed libraries for the tests
- rebase to master (e25950808f4eee)

I didn't change anything to the code logic and I'm using mesa with the
patch applied for a few days now without noting any regressions.

As noted before, I don't have write access to mesa-git, so I'll need someone
who sponsors this patch. 

Many thanks for any additional comments,
Gert 


Gert Wollny (6):
  mesa/st: glsl_to_tgsi move some helper classes to  extra  files
  mesa/st: glsl_to_tgsi: implement new temporary  register lifetime
tracker
  mesa/st: glsl_to_tgsi: add tests for the new temporary lifetime
tracker
  mesa/st: glsl_to_tgsi: add register renamame mapping  evaluator
  mesa/st: glsl_to_tgsi: Add test set for evaluation of  rename mapping
  mesa/st: glsl_to_tgsi: tie in new temporary register  merge approach

 configure.ac   |1 +
 src/mesa/Makefile.am   |2 +-
 src/mesa/Makefile.sources  |4 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  315 +-
 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp |  207 
 src/mesa/state_tracker/st_glsl_to_tgsi_private.h   |  165 +++
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   |  786 ++
 .../state_tracker/st_glsl_to_tgsi_temprename.h |   36 +
 src/mesa/state_tracker/tests/Makefile.am   |   37 +
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 1070 
 10 files changed, 2335 insertions(+), 288 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.h
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h
 create mode 100644 src/mesa/state_tracker/tests/Makefile.am
 create mode 100644 src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp

-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v5 2/6] mesa/st: glsl_to_tgsi: implement new temporary register lifetime tracker

2017-06-25 Thread Gert Wollny

This patch adds a class for tracking the life times of temporary registers
in the glsl to tgsi translation. The algorithm runs in three steps:
First, in order to minimize the number of needed memory allocations the
program is scanned to evaluate the number of scopes.
Then, the program is scanned  second time to recorc the important register
access time points: first and last reads and writes and their link to the
execution scope (loop, if/else branch, switch case).
In the third step for each register the actuall minimal life time is
evaluated.
---
 src/mesa/Makefile.sources  |   2 +
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 662 +
 .../state_tracker/st_glsl_to_tgsi_temprename.h |  33 +
 3 files changed, 697 insertions(+)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 21f9167bda..2359ec3c7d 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -509,6 +509,8 @@ STATETRACKER_FILES = \
state_tracker/st_glsl_to_tgsi.h \
state_tracker/st_glsl_to_tgsi_private.cpp \
state_tracker/st_glsl_to_tgsi_private.h \
+   state_tracker/st_glsl_to_tgsi_temprename.cpp \
+   state_tracker/st_glsl_to_tgsi_temprename.h \
state_tracker/st_glsl_types.cpp \
state_tracker/st_glsl_types.h \
state_tracker/st_manager.c \
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
new file mode 100644
index 00..729d77130e
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
@@ -0,0 +1,662 @@
+/*
+ * Copyright © 2017 Gert Wollny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "st_glsl_to_tgsi_temprename.h"
+#include 
+#include 
+#include 
+
+/* Without c++11 define the nullptr for forward-compatibility
+ * and better readibility */
+#if __cplusplus < 201103L
+#define nullptr 0
+#endif
+
+using std::numeric_limits;
+
+enum e_scope_type {
+   sct_outer,
+   sct_loop,
+   sct_if,
+   sct_else,
+   sct_switch,
+   sct_switch_case,
+   sct_switch_default,
+   sct_unknown
+};
+
+enum e_acc_type {
+   acc_read,
+   acc_write,
+   acc_write_cond_from_self
+};
+
+class prog_scope {
+
+public:
+   prog_scope(prog_scope *parent, e_scope_type type, int id, int depth,
+  int begin);
+
+   e_scope_type type() const;
+   prog_scope *parent() const;
+   int nesting_depth() const;
+   int id() const;
+   int end() const;
+   int begin() const;
+   int loop_continue_line() const;
+
+   const prog_scope *in_ifelse_scope() const;
+   const prog_scope *in_switchcase_scope() const;
+   const prog_scope *innermost_loop() const;
+   const prog_scope *outermost_loop() const;
+
+   bool in_loop() const;
+   bool is_conditional() const;
+   bool break_is_for_switchcase() const;
+   bool contains(const prog_scope& other) const;
+
+   void set_end(int end);
+   void set_previous_case_scope(prog_scope *prev);
+   void set_continue_line(int line);
+
+private:
+   e_scope_type scope_type;
+   int scope_id;
+   int scope_nesting_depth;
+   int scope_begin;
+   int scope_end;
+   int loop_cont_line;
+   prog_scope *previous_case_scope;
+   prog_scope *parent_scope;
+};
+
+class temp_access {
+public:
+   temp_access();
+   void record(int line, e_acc_type rw, prog_scope *scope);
+   lifetime get_required_lifetime();
+private:
+   prog_scope *last_read_scope;
+   prog_scope *first_read_scope;
+   prog_scope *first_write_scope;
+   int first_dominant_write;
+   int last_read;
+   int last_write;
+   int first_read;
+   bool keep_for_full_loop;
+};
+
+/* Some storage class to encapsulate the prog_scope (de-)allocations */
+class prog_scope_storage {
+public:
+   prog_scope_storage(void *mem_

Re: [Mesa-dev] RFC uniform packing for gallium V2

2017-06-25 Thread Nicolai Hähnle


On 25.06.2017 03:31, Timothy Arceri wrote:

There are still a handful of piglit tests failing and I'm yet to test
that there are no regressions in the non-packed path, but I'd really
like some feedback on the approach as Dave has flagged it as a possible
controversial tgsi change.

In order to avoid complicated swizzling and array element adjustments
when dealing with arrays, this series simply adjusts the constant buffer
index to point to the right location. There are some small changes to
deal with indirect indexing but these also remain very simple and easy
to follow.

Dave has raised concerns that others might not like this as it doesn't
strictly follow the tgsi approach that everything is a vec4. I would
argue however that this is by far the simplest approch.
Doing this with swizzles and array adjustments is going to require
something like lower_packed_varyings.cpp which would be unnecessarily
complicated IMO, I started off down that track and soon changed
direction.


Yeah, I don't like the approach either. All register files are by vec4 
in TGSI, and changing that feels pretty wrong.


I would suggest lowering loads from CONST[0] to LOAD instructions, in 
the same way that is used for SSBOs. This has the additional advantage 
that we could then use the same code paths to support std430 packing for 
UBOs (via a GL extension, I suppose).




The main goal of this series is to reduce the cpu overhead cause by
_mesa_propagate_uniforms_to_driver_storage(). The function is slow since we
need to deal with strides etc because we are copying packed data to an
unpacked destination. It's also copying data that we have only just copied
to anouther duplicate uniform storage that gets created by the linker.


The duplicate copy is necessary unless we start using the same constant 
buffer for all shaders in a program, which actually might not be such a 
bad idea.


Cheers,
Nicolai



This series fixes both of these issues and also reduces the size of the
drivers const buffer as a side effect.

Patches 2-3 just rework the way we use the param list.

The remaining add the packing support enabled by the
PackedDriverUniformStorage const.

You can get the series in my test4 branch [1].

[1] https://github.com/tarceri/Mesa.git

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

62 matches

Mail list logo