[Mesa-dev] [PATCH] egl: EXT_pixel_format_float plumbing

2017-11-08 Thread Tapani Pälli
Patch adds support and capability to match with new surface attribute,
component type. Currently no configs with floating point type are exposed.

With this change, following dEQP test starts to pass:

   dEQP-EGL.functional.choose_config.color_component_type_ext.dont_care
   dEQP-EGL.functional.choose_config.color_component_type_ext.fixed
   dEQP-EGL.functional.choose_config.color_component_type_ext.float

Signed-off-by: Tapani Pälli 
---
 src/egl/drivers/dri2/egl_dri2.c | 2 ++
 src/egl/main/eglapi.c   | 1 +
 src/egl/main/eglconfig.c| 9 +
 src/egl/main/eglconfig.h| 2 ++
 src/egl/main/egldisplay.h   | 1 +
 5 files changed, 15 insertions(+)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index b486be7487..a511081df8 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -695,6 +695,8 @@ dri2_setup_screen(_EGLDisplay *disp)
   dri2_renderer_query_integer(dri2_dpy,
   __DRI2_RENDERER_HAS_CONTEXT_PRIORITY);
 
+   disp->Extensions.EXT_pixel_format_float = EGL_TRUE;
+
if (dri2_renderer_query_integer(dri2_dpy,
__DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
   disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 76dabba2eb..c1bf5bbfe1 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -514,6 +514,7 @@ _eglCreateExtensionsString(_EGLDisplay *dpy)
_EGL_CHECK_EXTENSION(KHR_surfaceless_context);
if (dpy->Extensions.EXT_swap_buffers_with_damage)
   _eglAppendExtension(, "EGL_KHR_swap_buffers_with_damage");
+   _EGL_CHECK_EXTENSION(EXT_pixel_format_float);
_EGL_CHECK_EXTENSION(KHR_wait_sync);
 
if (dpy->Extensions.KHR_no_config_context)
diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c
index f057b0bfe8..2d3b3ddd90 100644
--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -68,6 +68,7 @@ _eglInitConfig(_EGLConfig *conf, _EGLDisplay *dpy, EGLint id)
conf->TransparentType = EGL_NONE;
conf->NativeVisualType = EGL_NONE;
conf->ColorBufferType = EGL_RGB_BUFFER;
+   conf->ComponentType = EGL_COLOR_COMPONENT_TYPE_FIXED_EXT;
 }
 
 
@@ -254,6 +255,9 @@ static const struct {
{ EGL_RECORDABLE_ANDROID,ATTRIB_TYPE_BOOLEAN,
 ATTRIB_CRITERION_EXACT,
 EGL_DONT_CARE },
+   { EGL_COLOR_COMPONENT_TYPE_EXT,  ATTRIB_TYPE_ENUM,
+ATTRIB_CRITERION_EXACT,
+EGL_COLOR_COMPONENT_TYPE_FIXED_EXT },
 };
 
 
@@ -316,6 +320,11 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean 
for_matching)
 if (val != EGL_RGB_BUFFER && val != EGL_LUMINANCE_BUFFER)
valid = EGL_FALSE;
 break;
+ case EGL_COLOR_COMPONENT_TYPE_EXT:
+if (val != EGL_COLOR_COMPONENT_TYPE_FIXED_EXT &&
+val != EGL_COLOR_COMPONENT_TYPE_FLOAT_EXT)
+   valid = EGL_FALSE;
+break;
  default:
 assert(0);
 break;
diff --git a/src/egl/main/eglconfig.h b/src/egl/main/eglconfig.h
index 22da697e83..21f0a28412 100644
--- a/src/egl/main/eglconfig.h
+++ b/src/egl/main/eglconfig.h
@@ -88,6 +88,7 @@ struct _egl_config
EGLint YInvertedNOK;
EGLint FramebufferTargetAndroid;
EGLint RecordableAndroid;
+   EGLint ComponentType;
 };
 
 
@@ -137,6 +138,7 @@ _eglOffsetOfConfig(EGLint attr)
ATTRIB_MAP(EGL_Y_INVERTED_NOK,YInvertedNOK);
ATTRIB_MAP(EGL_FRAMEBUFFER_TARGET_ANDROID, FramebufferTargetAndroid);
ATTRIB_MAP(EGL_RECORDABLE_ANDROID,RecordableAndroid);
+   ATTRIB_MAP(EGL_COLOR_COMPONENT_TYPE_EXT,  ComponentType);
 #undef ATTRIB_MAP
default:
   return -1;
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 981cbf4ca0..0f57680b21 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -103,6 +103,7 @@ struct _egl_extensions
EGLBoolean EXT_create_context_robustness;
EGLBoolean EXT_image_dma_buf_import;
EGLBoolean EXT_image_dma_buf_import_modifiers;
+   EGLBoolean EXT_pixel_format_float;
EGLBoolean EXT_swap_buffers_with_damage;
 
unsigned int IMG_context_priority;
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default

2017-11-08 Thread Jordan Justen
On 2017-11-08 17:26:47, Timothy Arceri wrote:
> Reviewed-by: Timothy Arceri 
> 
> Mark may want to consider adding some of the once a day type CI runs for 
> this. For example running the test suite for two consecutive runs on the 
> same build so that the second run uses the shader cache and also a 
> second run the uses MESA_GLSL=cache_fb to force testing of the cache 
> fallback path.

Yeah. We discussed this previously, but I don't think it's been
implemented yet. My opinion is that it could perhaps be a weekly test.

We also discussed a nir serialization test, similar to our current nir
clone daily test. I don't think this is implemented yet either.

-Jordan

> 
> On 09/11/17 11:58, Jordan Justen wrote:
> > f9d5a7add42af5a2e4410526d1480a08f41317ae along with
> > a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one
> > known regression with shader cache. (Deus Ex instability.)
> > 
> > We should enable the shader cache by default to stabilize it before
> > the next major Mesa release.
> > 
> > Signed-off-by: Jordan Justen 
> > ---
> >   docs/relnotes/17.4.0.html  | 2 +-
> >   src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 ---
> >   2 files changed, 1 insertion(+), 4 deletions(-)
> > 
> > diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
> > index f81b5bd62d3..48dcd5cce38 100644
> > --- a/docs/relnotes/17.4.0.html
> > +++ b/docs/relnotes/17.4.0.html
> > @@ -44,7 +44,7 @@ Note: some of the new features are only available with 
> > certain drivers.
> >   
> >   
> >   
> > -Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE 
> > environment variable is set to "0" or "false"
> > +Disk shader cache support for i965
> >   
> >   
> >   Bug fixes
> > diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
> > b/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > index 853ea98af03..cd0524c5cbf 100644
> > --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
> > @@ -420,9 +420,6 @@ void
> >   brw_disk_cache_init(struct brw_context *brw)
> >   {
> >   #ifdef ENABLE_SHADER_CACHE
> > -   if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true))
> > -  return;
> > -
> >  char renderer[10];
> >  MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), 
> > "i965_%04x",
> >  brw->screen->deviceID);
> > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] glsl/linker: location aliasing requires types to have the same width

2017-11-08 Thread Iago Toral
Hi Ilia, are you okay with this version of the patch?

Iago

On Tue, 2017-11-07 at 10:50 +0100, Iago Toral Quiroga wrote:
> Regarding location aliasing requirements, the OpenGL spec says:
> 
>   "Further, when location aliasing, the aliases sharing the location
>    must have the same underlying numerical type  (floating-point or
>    integer)."
> 
> Khronos has further clarified that this also requires the underlying
> types to have the same width, so we can't put a float and a double
> in the same location slot for example. Future versions of the spec
> will
> be corrected to make this clear.
> 
> This patch amends our implementation to account for this restriction.
> 
> In the process of doing this, I also noticed that we would attempt
> to check aliasing requirements for record variables (including the
> test
> for the numerical type) which is not allowed, instead, we should be
> producing a linker error as soon as we see any attempt to do location
> aliasing on non-numerical variables. For the particular case of
> structs,
> we were producing a linker error in this case, but only because we
> assumed that struct fields use all components in each location, so
> any attempt to alias locations consumed by struct fields would
> produce
> a link error due to component aliasing, which is not accurate of the
> actual problem. This patch would make it produce an error for
> attempting
> to alias a non-numerical variable instead, which is always accurate.
> 
> v2:
>   - Do not assert if we see invalid numerical types. These come
> straight from shader code, so we should produce linker errors if
> shaders attempt to do location aliasing on variables that are not
> numerical such as records.
>   - While we are at it, improve error reporting for the case of
> numerical type mismatch to include the shader stage.
> 
> v3:
>   - Allow location aliasing of images and samplers. If we get these
> it means bindless support is active and they should be handled
> as 64-bit integers (Ilia)
>   - Make sure we produce link errors for any non-numerical type
> for which we attempt location aliasing, not just structs.
> ---
>  src/compiler/glsl/link_varyings.cpp | 64 ++-
> --
>  1 file changed, 46 insertions(+), 18 deletions(-)
> 
> diff --git a/src/compiler/glsl/link_varyings.cpp
> b/src/compiler/glsl/link_varyings.cpp
> index 1a9894baab..e0d757eaaf 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -405,15 +405,15 @@ compute_variable_location_slot(ir_variable
> *var, gl_shader_stage stage)
>  
>  struct explicit_location_info {
> ir_variable *var;
> -   unsigned numerical_type;
> +   int numerical_type;
> unsigned interpolation;
> bool centroid;
> bool sample;
> bool patch;
>  };
>  
> -static inline unsigned
> -get_numerical_type(const glsl_type *type)
> +static inline int
> +get_numerical_sized_type(const glsl_type *type)
>  {
> /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout
> Qualifiers, Page 68,
>  * (Location aliasing):
> @@ -421,10 +421,25 @@ get_numerical_type(const glsl_type *type)
>  *"Further, when location aliasing, the aliases sharing the
> location
>  * must have the same underlying numerical type  (floating-
> point or
>  * integer)
> +*
> +* Khronos has further clarified that this also requires the
> underlying
> +* types to have the same width, so we can't put a float and a
> double
> +* in the same location slot for example. Future versions of the
> spec will
> +* be corrected to make this clear.
> +*
> +* Notice that we allow location aliasing for bindless
> image/samplers too
> +* since these are defined as 64-bit integers.
>  */
> -   if (type->is_float() || type->is_double())
> +   if (type->is_float())
>    return GLSL_TYPE_FLOAT;
> -   return GLSL_TYPE_INT;
> +   else if (type->is_integer())
> +  return GLSL_TYPE_INT;
> +   else if (type->is_double())
> +  return GLSL_TYPE_DOUBLE;
> +   else if (type->is_integer_64() || type->is_sampler() || type-
> >is_image())
> +  return GLSL_TYPE_INT64;
> +
> +   return -1; /* Not a numerical type */
>  }
>  
>  static bool
> @@ -442,14 +457,17 @@ check_location_aliasing(struct
> explicit_location_info explicit_locations[][4],
>  gl_shader_stage stage)
>  {
> unsigned last_comp;
> -   if (type->without_array()->is_record()) {
> -  /* The component qualifier can't be used on structs so just
> treat
> -   * all component slots as used.
> +   const glsl_type *type_without_array = type->without_array();
> +   int numerical_type =
> get_numerical_sized_type(type_without_array);
> +   if (numerical_type == -1) {
> +  /* The component qualifier can't be used on non-numerical
> types so just
> +   * treat all component slots as used. This will also make it
> so that
> +   * any location aliasing attempt on 

[Mesa-dev] [PATCH 11/17] main: Support 1 Mesa format with get for GL_PROGRAM_BINARY_FORMATS

2017-11-08 Thread Jordan Justen
Mesa supports either 0 or 1 formats. If 1 format is supported, it is
GL_PROGRAM_BINARY_FORMAT_MESA as defined in the
GL_MESA_program_binary_formats extension spec.

Signed-off-by: Jordan Justen 
---
 src/mesa/main/get.c  | 9 +
 src/mesa/main/get_hash_params.py | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index e68a93b10ee..8b28f583018 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1153,6 +1153,15 @@ find_custom_value(struct gl_context *ctx, const struct 
value_desc *d, union valu
  }
   }
   break;
+
+   /* GL_ARB_get_program_binary */
+   case GL_PROGRAM_BINARY_FORMATS:
+  assert(ctx->Const.NumProgramBinaryFormats <= 1);
+  v->value_int_n.n = MIN2(ctx->Const.NumProgramBinaryFormats, 1);
+  if (ctx->Const.NumProgramBinaryFormats > 0) {
+ v->value_int_n.ints[0] = GL_PROGRAM_BINARY_FORMAT_MESA;
+  }
+  break;
}
 }
 
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 8c6193d761f..be0c68e4181 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -325,7 +325,7 @@ descriptor=[
 
 # GL_ARB_get_program_binary / GL_OES_get_program_binary
   [ "NUM_PROGRAM_BINARY_FORMATS", 
"CONTEXT_UINT(Const.NumProgramBinaryFormats), NO_EXTRA" ],
-  [ "PROGRAM_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INVALID, 0, NO_EXTRA" ],
+  [ "PROGRAM_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INT_N, 0, NO_EXTRA" ],
 
 # GL_INTEL_performance_query
   [ "PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL", 
"CONST(MAX_PERFQUERY_QUERY_NAME_LENGTH), extra_INTEL_performance_query" ],
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/17] main: Support calling driver for ProgramBinary

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/main/dd.h|  4 
 src/mesa/main/shaderapi.c | 38 +-
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index c20d8b80e1d..b46f2693b83 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1141,6 +1141,10 @@ struct dd_function_table {
 struct gl_shader_program *shProg,
 GLsizei bufSize, GLsizei *length,
 GLenum *binaryFormat, GLvoid *binary);
+   void (*ProgramBinary)(struct gl_context *ctx,
+ struct gl_shader_program *shProg,
+ GLenum binaryFormat, const GLvoid *binary,
+ GLsizei length);
/*@}*/
 };
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 28711a67f03..47a51279353 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2224,9 +2224,6 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat,
if (!shProg)
   return;
 
-   (void) binaryFormat;
-   (void) binary;
-
/* Section 2.3.1 (Errors) of the OpenGL 4.5 spec says:
 *
 * "If a negative number is provided where an argument of type sizei or
@@ -2237,20 +2234,27 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat,
   return;
}
 
-   /* The ARB_get_program_binary spec says:
-*
-* " and  must be those returned by a previous
-* call to GetProgramBinary, and  must be the length of the
-* program binary as returned by GetProgramBinary or GetProgramiv with
-*  PROGRAM_BINARY_LENGTH. Loading the program binary will fail,
-* setting the LINK_STATUS of  to FALSE, if these conditions
-* are not met."
-*
-* Since any value of binaryFormat passed "is not one of those specified as
-* allowable for [this] command, an INVALID_ENUM error is generated."
-*/
-   shProg->data->LinkStatus = linking_failure;
-   _mesa_error(ctx, GL_INVALID_ENUM, "glProgramBinary");
+   assert(ctx->Const.NumProgramBinaryFormats == 0 ||
+  ctx->Driver.ProgramBinary);
+   if (ctx->Const.NumProgramBinaryFormats == 0 ||
+   binaryFormat != GL_PROGRAM_BINARY_FORMAT_MESA) {
+  /* The ARB_get_program_binary spec says:
+   *
+   * " and  must be those returned by a previous
+   * call to GetProgramBinary, and  must be the length of the
+   * program binary as returned by GetProgramBinary or GetProgramiv 
with
+   *  PROGRAM_BINARY_LENGTH. Loading the program binary will 
fail,
+   * setting the LINK_STATUS of  to FALSE, if these conditions
+   * are not met."
+   *
+   * Since any value of binaryFormat passed "is not one of those specified 
as
+   * allowable for [this] command, an INVALID_ENUM error is generated."
+   */
+  shProg->data->LinkStatus = linking_failure;
+  _mesa_error(ctx, GL_INVALID_ENUM, "glProgramBinary");
+   } else {
+  ctx->Driver.ProgramBinary(ctx, shProg, binaryFormat, binary, length);
+   }
 }
 
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/17] main: Support calling driver for GetProgramBinary

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/main/dd.h|  4 
 src/mesa/main/shaderapi.c | 17 +++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 91eff55f84d..c20d8b80e1d 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1137,6 +1137,10 @@ struct dd_function_table {
void (*GetProgramBinaryLength)(struct gl_context *ctx,
   struct gl_shader_program *shProg,
   GLint *length);
+   void (*GetProgramBinary)(struct gl_context *ctx,
+struct gl_shader_program *shProg,
+GLsizei bufSize, GLsizei *length,
+GLenum *binaryFormat, GLvoid *binary);
/*@}*/
 };
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index f12825d2536..28711a67f03 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2200,12 +2200,17 @@ _mesa_GetProgramBinary(GLuint program, GLsizei bufSize, 
GLsizei *length,
   return;
}
 
-   *length = 0;
-   _mesa_error(ctx, GL_INVALID_OPERATION,
-   "glGetProgramBinary(driver supports zero binary formats)");
-
-   (void) binaryFormat;
-   (void) binary;
+   assert(ctx->Const.NumProgramBinaryFormats == 0 ||
+  ctx->Driver.GetProgramBinary);
+   if (ctx->Const.NumProgramBinaryFormats == 0) {
+  *length = 0;
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glGetProgramBinary(driver supports zero binary formats)");
+   } else {
+  ctx->Driver.GetProgramBinary(ctx, shProg, bufSize, length,
+   binaryFormat, binary);
+  assert(*length == 0 || *binaryFormat == GL_PROGRAM_BINARY_FORMAT_MESA);
+   }
 }
 
 void GLAPIENTRY
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/17] i965: Add brw_program_serialize_nir

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_link.cpp  |  9 ++---
 src/mesa/drivers/dri/i965/brw_program.c | 12 
 src/mesa/drivers/dri/i965/brw_program.h |  3 +++
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 24545d52ecb..878b3240841 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -300,13 +300,8 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   NIR_PASS_V(prog->nir, nir_lower_samplers, shProg);
   NIR_PASS_V(prog->nir, nir_lower_atomics, shProg);
 
-  if (brw->ctx.Cache) {
- struct blob writer;
- blob_init();
- nir_serialize(, prog->nir);
- prog->driver_cache_blob = ralloc_size(NULL, writer.size);
- memcpy(prog->driver_cache_blob, writer.data, writer.size);
- prog->driver_cache_blob_size = writer.size;
+  if (ctx->Cache) {
+ brw_program_serialize_nir(ctx, prog, (gl_shader_stage)stage);
   }
 
   infos[stage] = >nir->info;
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 809766574f8..798b7d24dd6 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -781,6 +781,18 @@ brw_assign_common_binding_table_offsets(const struct 
gen_device_info *devinfo,
return next_binding_table_offset;
 }
 
+void
+brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog,
+  gl_shader_stage stage)
+{
+   struct blob writer;
+   blob_init();
+   nir_serialize(, prog->nir);
+   prog->driver_cache_blob = ralloc_size(NULL, writer.size);
+   memcpy(prog->driver_cache_blob, writer.data, writer.size);
+   prog->driver_cache_blob_size = writer.size;
+}
+
 void
 brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog,
 gl_shader_stage stage)
diff --git a/src/mesa/drivers/dri/i965/brw_program.h 
b/src/mesa/drivers/dri/i965/brw_program.h
index bd9b4ad168a..a5e41522841 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -82,6 +82,9 @@ brw_assign_common_binding_table_offsets(const struct 
gen_device_info *devinfo,
 uint32_t next_binding_table_offset);
 
 void
+brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog,
+  gl_shader_stage stage);
+void
 brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog,
 gl_shader_stage stage);
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/17] main: Allow non-zero NUM_PROGRAM_BINARY_FORMATS

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/main/get_hash_params.py | 2 +-
 src/mesa/main/mtypes.h   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index acd5cd1f011..8c6193d761f 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -324,7 +324,7 @@ descriptor=[
   [ "SHADER_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INVALID, 0, 
extra_ARB_ES2_compatibility_api_es2" ],
 
 # GL_ARB_get_program_binary / GL_OES_get_program_binary
-  [ "NUM_PROGRAM_BINARY_FORMATS", "CONST(0), NO_EXTRA" ],
+  [ "NUM_PROGRAM_BINARY_FORMATS", 
"CONTEXT_UINT(Const.NumProgramBinaryFormats), NO_EXTRA" ],
   [ "PROGRAM_BINARY_FORMATS", "LOC_CUSTOM, TYPE_INVALID, 0, NO_EXTRA" ],
 
 # GL_INTEL_performance_query
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 023692cc0e1..f64bf0a2ad4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4002,6 +4002,9 @@ struct gl_constants
 
/** When drivers are OK with mapped buffers during draw and other calls. */
bool AllowMappedBuffersDuringExecution;
+
+   /** GL_ARB_get_program_binary */
+   GLuint NumProgramBinaryFormats;
 };
 
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/17] i965: Add brw_program_deserialize_nir

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_disk_cache.c | 31 --
 src/mesa/drivers/dri/i965/brw_program.c| 16 +++
 src/mesa/drivers/dri/i965/brw_program.h|  4 
 3 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index 853ea98af03..65bb52726eb 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -24,7 +24,6 @@
 #include "compiler/blob.h"
 #include "compiler/glsl/ir_uniform.h"
 #include "compiler/glsl/shader_cache.h"
-#include "compiler/nir/nir_serialize.h"
 #include "main/mtypes.h"
 #include "util/build_id.h"
 #include "util/debug.h"
@@ -61,27 +60,6 @@ gen_shader_sha1(struct brw_context *brw, struct gl_program 
*prog,
_mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
 }
 
-static void
-restore_serialized_nir_shader(struct brw_context *brw, struct gl_program *prog,
-  gl_shader_stage stage)
-{
-   prog->program_written_to_cache = false;
-   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-  fprintf(stderr, "falling back to nir %s.\n",
-  _mesa_shader_stage_to_abbrev(prog->info.stage));
-   }
-
-   if (!prog->nir) {
-  assert(prog->driver_cache_blob && prog->driver_cache_blob_size > 0);
-  const struct nir_shader_compiler_options *options =
- brw->ctx.Const.ShaderCompilerOptions[stage].NirOptions;
-  struct blob_reader reader;
-  blob_reader_init(, prog->driver_cache_blob,
-   prog->driver_cache_blob_size);
-  prog->nir = nir_deserialize(NULL, options, );
-   }
-}
-
 static void
 write_blob_program_data(struct blob *binary, gl_shader_stage stage,
 const void *program,
@@ -298,7 +276,14 @@ brw_disk_cache_upload_program(struct brw_context *brw, 
gl_shader_stage stage)
return true;
 
 fail:
-   restore_serialized_nir_shader(brw, prog, stage);
+   prog->program_written_to_cache = false;
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+  fprintf(stderr, "falling back to nir %s.\n",
+  _mesa_shader_stage_to_abbrev(prog->info.stage));
+   }
+
+   brw_program_deserialize_nir(>ctx, prog, stage);
+
return false;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 7607bc38840..39308f306df 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -40,6 +40,7 @@
 #include "util/ralloc.h"
 #include "compiler/glsl/ir.h"
 #include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/nir/nir_serialize.h"
 
 #include "brw_program.h"
 #include "brw_context.h"
@@ -779,3 +780,18 @@ brw_assign_common_binding_table_offsets(const struct 
gen_device_info *devinfo,
assert(next_binding_table_offset <= BRW_MAX_SURFACES);
return next_binding_table_offset;
 }
+
+void
+brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog,
+gl_shader_stage stage)
+{
+   if (!prog->nir) {
+  assert(prog->driver_cache_blob && prog->driver_cache_blob_size > 0);
+  const struct nir_shader_compiler_options *options =
+ ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+  struct blob_reader reader;
+  blob_reader_init(, prog->driver_cache_blob,
+   prog->driver_cache_blob_size);
+  prog->nir = nir_deserialize(NULL, options, );
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.h 
b/src/mesa/drivers/dri/i965/brw_program.h
index 701b8da482e..bd9b4ad168a 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -81,6 +81,10 @@ brw_assign_common_binding_table_offsets(const struct 
gen_device_info *devinfo,
 struct brw_stage_prog_data 
*stage_prog_data,
 uint32_t next_binding_table_offset);
 
+void
+brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog,
+gl_shader_stage stage);
+
 void
 brw_stage_prog_data_free(const void *prog_data);
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/17] compiler: Fold shader_cache in with libglsl sources

2017-11-08 Thread Jordan Justen
It appears that we include the shader cache sources into libglsl
regardless.

The Meson build already does this.

Signed-off-by: Jordan Justen 
---
 src/compiler/Android.glsl.mk  | 3 +--
 src/compiler/Makefile.glsl.am | 3 +--
 src/compiler/Makefile.sources | 6 ++
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk
index 0aabafa2673..8533830833f 100644
--- a/src/compiler/Android.glsl.mk
+++ b/src/compiler/Android.glsl.mk
@@ -35,8 +35,7 @@ include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := \
$(LIBGLCPP_FILES) \
-   $(LIBGLSL_FILES) \
-   $(LIBGLSL_SHADER_CACHE_FILES)
+   $(LIBGLSL_FILES)
 
 LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
diff --git a/src/compiler/Makefile.glsl.am b/src/compiler/Makefile.glsl.am
index 179f415fe6f..11ff14b2852 100644
--- a/src/compiler/Makefile.glsl.am
+++ b/src/compiler/Makefile.glsl.am
@@ -134,8 +134,7 @@ glsl_libglsl_la_LIBADD = \
 
 glsl_libglsl_la_SOURCES =  \
$(LIBGLSL_GENERATED_FILES)  \
-   $(LIBGLSL_FILES)\
-   $(LIBGLSL_SHADER_CACHE_FILES)
+   $(LIBGLSL_FILES)
 
 glsl_libstandalone_la_SOURCES = \
$(GLSL_COMPILER_CXX_FILES)
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index abcd8c6641f..f7eb69dc2bc 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -141,13 +141,11 @@ LIBGLSL_FILES = \
glsl/propagate_invariance.cpp \
glsl/s_expression.cpp \
glsl/s_expression.h \
+   glsl/shader_cache.cpp \
+   glsl/shader_cache.h \
glsl/string_to_uint_map.cpp \
glsl/string_to_uint_map.h
 
-LIBGLSL_SHADER_CACHE_FILES = \
-   glsl/shader_cache.cpp \
-   glsl/shader_cache.h
-
 # glsl_compiler
 
 GLSL_COMPILER_CXX_FILES = \
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/17] glsl: Split out shader program serialization

2017-11-08 Thread Jordan Justen
This will allow us to use the program serialization to implement
ARB_get_program_binary.

Signed-off-by: Jordan Justen 
---
 src/compiler/Makefile.sources  |2 +
 src/compiler/glsl/meson.build  |2 +
 src/compiler/glsl/serialize.cpp| 1238 
 src/compiler/glsl/serialize.h  |   50 ++
 src/compiler/glsl/shader_cache.cpp | 1185 +-
 src/compiler/shader_info.h |1 +
 6 files changed, 1297 insertions(+), 1181 deletions(-)
 create mode 100644 src/compiler/glsl/serialize.cpp
 create mode 100644 src/compiler/glsl/serialize.h

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index f7eb69dc2bc..2d3b8aecad8 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -141,6 +141,8 @@ LIBGLSL_FILES = \
glsl/propagate_invariance.cpp \
glsl/s_expression.cpp \
glsl/s_expression.h \
+   glsl/serialize.cpp \
+   glsl/serialize.h \
glsl/shader_cache.cpp \
glsl/shader_cache.h \
glsl/string_to_uint_map.cpp \
diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build
index aa0e7153f42..a200b9f1458 100644
--- a/src/compiler/glsl/meson.build
+++ b/src/compiler/glsl/meson.build
@@ -182,6 +182,8 @@ files_libglsl = files(
   's_expression.h',
   'string_to_uint_map.cpp',
   'string_to_uint_map.h',
+  'serialize.cpp',
+  'serialize.h',
   'shader_cache.cpp',
   'shader_cache.h',
 )
diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp
new file mode 100644
index 000..b4c9545702e
--- /dev/null
+++ b/src/compiler/glsl/serialize.cpp
@@ -0,0 +1,1238 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file serialize.cpp
+ *
+ * GLSL serialization
+ *
+ * Supports serializing and deserializing glsl programs using a blob.
+ */
+
+#include "compiler/glsl_types.h"
+#include "compiler/shader_info.h"
+#include "ir_uniform.h"
+#include "main/mtypes.h"
+#include "string_to_uint_map.h"
+
+extern "C" {
+#include "main/shaderobj.h"
+#include "program/program.h"
+}
+
+static void
+write_subroutines(struct blob *metadata, struct gl_shader_program *prog)
+{
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+  struct gl_linked_shader *sh = prog->_LinkedShaders[i];
+  if (!sh)
+ continue;
+
+  struct gl_program *glprog = sh->Program;
+
+  blob_write_uint32(metadata, glprog->sh.NumSubroutineUniforms);
+  blob_write_uint32(metadata, glprog->sh.MaxSubroutineFunctionIndex);
+  blob_write_uint32(metadata, glprog->sh.NumSubroutineFunctions);
+  for (unsigned j = 0; j < glprog->sh.NumSubroutineFunctions; j++) {
+ int num_types = glprog->sh.SubroutineFunctions[j].num_compat_types;
+
+ blob_write_string(metadata, glprog->sh.SubroutineFunctions[j].name);
+ blob_write_uint32(metadata, glprog->sh.SubroutineFunctions[j].index);
+ blob_write_uint32(metadata, num_types);
+
+ for (int k = 0; k < num_types; k++) {
+encode_type_to_blob(metadata,
+glprog->sh.SubroutineFunctions[j].types[k]);
+ }
+  }
+   }
+}
+
+static void
+read_subroutines(struct blob_reader *metadata, struct gl_shader_program *prog)
+{
+   struct gl_subroutine_function *subs;
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+  struct gl_linked_shader *sh = prog->_LinkedShaders[i];
+  if (!sh)
+ continue;
+
+  struct gl_program *glprog = sh->Program;
+
+  glprog->sh.NumSubroutineUniforms = blob_read_uint32(metadata);
+  glprog->sh.MaxSubroutineFunctionIndex = blob_read_uint32(metadata);
+  glprog->sh.NumSubroutineFunctions = blob_read_uint32(metadata);
+
+  subs = rzalloc_array(prog, struct gl_subroutine_function,
+

[Mesa-dev] [PATCH 07/17] i965: Free serialized nir after deserializing

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_program.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 39308f306df..809766574f8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -794,4 +794,10 @@ brw_program_deserialize_nir(struct gl_context *ctx, struct 
gl_program *prog,
prog->driver_cache_blob_size);
   prog->nir = nir_deserialize(NULL, options, );
}
+
+   if (prog->driver_cache_blob) {
+  ralloc_free(prog->driver_cache_blob);
+  prog->driver_cache_blob = NULL;
+  prog->driver_cache_blob_size = 0;
+   }
 }
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/17] util: Add Mesa ARB_get_program_binary helper functions

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/util/Makefile.sources |   2 +
 src/util/meson.build  |   2 +
 src/util/program_binary.c | 322 ++
 src/util/program_binary.h |  91 +
 4 files changed, 417 insertions(+)
 create mode 100644 src/util/program_binary.c
 create mode 100644 src/util/program_binary.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index c7f6516a992..d9048bbd182 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -21,6 +21,8 @@ MESA_UTIL_FILES := \
macros.h \
mesa-sha1.c \
mesa-sha1.h \
+   program_binary.c \
+   program_binary.h \
sha1/sha1.c \
sha1/sha1.h \
ralloc.c \
diff --git a/src/util/meson.build b/src/util/meson.build
index c9cb3e861e9..9bc10222a72 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -45,6 +45,8 @@ files_mesa_util = files(
   'macros.h',
   'mesa-sha1.c',
   'mesa-sha1.h',
+  'program_binary.c',
+  'program_binary.h',
   'sha1/sha1.c',
   'sha1/sha1.h',
   'ralloc.c',
diff --git a/src/util/program_binary.c b/src/util/program_binary.c
new file mode 100644
index 000..4447dd632d9
--- /dev/null
+++ b/src/util/program_binary.c
@@ -0,0 +1,322 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (c) 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file program_binary.c
+ *
+ * Helper functions for serializing a binary program.
+ */
+
+
+#include "main/mtypes.h"
+#include "crc32.h"
+#include "program_binary.h"
+#include "zlib.h"
+
+/**
+ * Mesa supports one binary format, but it must differentiate between formats
+ * produced by different drivers and different Mesa versions.
+ *
+ * Mesa uses a uint32_t value to specify an internal format. The only format
+ * defined has one uint32_t value of 0, followed by 20 bytes specifying a sha1
+ * that uniquely identifies the Mesa driver type and version.
+ */
+
+struct program_binary_header {
+   /* If internal_format is 0, it must be followed by the 20 byte sha1 that
+* identifies the Mesa driver and version supported. If we want to support
+* something besides a sha1, then a new internal_format value can be added.
+*/
+   uint32_t internal_format;
+   uint8_t sha1[20];
+   /* Fields following sha1 can be changed since the sha1 will guarantee that
+* the binary only works with the same Mesa version.
+*/
+   uint32_t deflated_size;
+   uint32_t inflated_size;
+   uint32_t crc32;
+};
+
+unsigned
+get_program_binary_max_size(unsigned payload_size)
+{
+   return sizeof(struct program_binary_header) + payload_size;
+}
+
+/* From the zlib docs:
+ *"If the memory is available, buffers sizes on the order of 128K or 256K
+ *bytes should be used."
+ */
+#define BUFSIZE 256 * 1024
+
+/**
+ * Compresses buffer
+ */
+static size_t
+deflate_and_write_to_buf(const void *in_data, size_t in_data_size,
+ uint8_t *dest, size_t max_dest_size)
+{
+   unsigned char out[BUFSIZE];
+   size_t written = 0;
+
+   /* allocate deflate state */
+   z_stream strm;
+   strm.zalloc = Z_NULL;
+   strm.zfree = Z_NULL;
+   strm.opaque = Z_NULL;
+   strm.next_in = (uint8_t *) in_data;
+   strm.avail_in = in_data_size;
+
+   int ret = deflateInit(, Z_BEST_COMPRESSION);
+   if (ret != Z_OK)
+   return 0;
+
+   /* compress until end of in_data */
+   size_t compressed_size = 0;
+   int flush;
+   do {
+  int remaining = in_data_size - BUFSIZE;
+  flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
+  in_data_size -= BUFSIZE;
+
+  /* Run deflate() on input until the output buffer is not full (which
+   * means there is no more data to deflate).
+   */
+  do {
+ strm.avail_out = BUFSIZE;
+ strm.next_out = out;
+
+ ret = deflate(, flush);/* no bad return value */
+ assert(ret != Z_STREAM_ERROR);  

[Mesa-dev] [PATCH 09/17] i965: Fix memory leak when serializing nir

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_program.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 798b7d24dd6..f795fc1dbc3 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -791,6 +791,7 @@ brw_program_serialize_nir(struct gl_context *ctx, struct 
gl_program *prog,
prog->driver_cache_blob = ralloc_size(NULL, writer.size);
memcpy(prog->driver_cache_blob, writer.data, writer.size);
prog->driver_cache_blob_size = writer.size;
+   blob_finish();
 }
 
 void
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/17] main: Clear shader program data whenever ProgramBinary is called

2017-11-08 Thread Jordan Justen
The GL_ARB_get_program_binary extension spec says:

 "If ProgramBinary fails to load a binary, no error is generated, but
  any information about a previous link or load of that program object
  is lost."

Signed-off-by: Jordan Justen 
---
 src/mesa/main/shaderapi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 47a51279353..03e8488ccee 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2224,6 +2224,8 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat,
if (!shProg)
   return;
 
+   _mesa_clear_shader_program_data(ctx, shProg);
+
/* Section 2.3.1 (Errors) of the OpenGL 4.5 spec says:
 *
 * "If a negative number is provided where an argument of type sizei or
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/17] i965: Add ARB_get_program_binary support using nir_serialization

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_context.c|   9 ++
 src/mesa/drivers/dri/i965/brw_context.h|  16 ++
 src/mesa/drivers/dri/i965/brw_program_binary.c | 200 +
 src/mesa/drivers/dri/i965/meson.build  |   1 +
 5 files changed, 227 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_program_binary.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 2980cdb3c54..3fba8dc17ef 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -37,6 +37,7 @@ i965_FILES = \
brw_performance_query.c \
brw_program.c \
brw_program.h \
+   brw_program_binary.c \
brw_program_cache.c \
brw_primitive_restart.c \
brw_queryobj.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 0ebd4329935..b685e53852b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -329,6 +329,12 @@ brw_init_driver_functions(struct brw_context *brw,
 
if (devinfo->gen >= 6)
   functions->GetSamplePosition = gen6_get_sample_position;
+
+   /* GL_ARB_get_program_binary */
+   brw_program_binary_init(brw->screen->deviceID);
+   functions->GetProgramBinaryLength = brw_get_program_binary_length;
+   functions->GetProgramBinary = brw_get_program_binary;
+   functions->ProgramBinary = brw_program_binary;
 }
 
 static void
@@ -697,6 +703,9 @@ brw_initialize_context_constants(struct brw_context *brw)
 
if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
   ctx->Const.AllowMappedBuffersDuringExecution = true;
+
+   /* GL_ARB_get_program_binary */
+   ctx->Const.NumProgramBinaryFormats = 1;
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 8aa0c5ff64c..0c10db0ef34 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1556,6 +1556,22 @@ brw_check_for_reset(struct brw_context *brw);
 extern void
 brw_init_compute_functions(struct dd_function_table *functions);
 
+/* brw_program_binary.c */
+extern void
+brw_program_binary_init(unsigned device_id);
+extern void
+brw_get_program_binary_length(struct gl_context *ctx,
+  struct gl_shader_program *sh_prog,
+  GLint *length);
+extern void
+brw_get_program_binary(struct gl_context *ctx,
+   struct gl_shader_program *sh_prog,
+   GLsizei bufSize, GLsizei *length,
+   GLenum *binary_format, GLvoid *binary);
+extern void
+brw_program_binary(struct gl_context *ctx, struct gl_shader_program *sh_prog,
+   GLenum binary_format, const GLvoid *binary, GLsizei length);
+
 /*==
  * Inline conversion functions.  These are better-typed than the
  * macros used previously:
diff --git a/src/mesa/drivers/dri/i965/brw_program_binary.c 
b/src/mesa/drivers/dri/i965/brw_program_binary.c
new file mode 100644
index 000..55a2d097b8c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_program_binary.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "compiler/blob.h"
+#include "compiler/glsl/serialize.h"
+#include "compiler/nir/nir_serialize.h"
+#include "main/mtypes.h"
+#include "util/build_id.h"
+#include "util/debug.h"
+#include "util/mesa-sha1.h"
+#include "util/program_binary.h"
+#include "program/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_program.h"
+
+static uint8_t driver_sha1[20];
+
+void

[Mesa-dev] [PATCH 12/17] main: Support calling driver for GL_PROGRAM_BINARY_LENGTH

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/main/dd.h| 12 
 src/mesa/main/shaderapi.c |  8 +++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index da03b2e8b94..91eff55f84d 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1126,6 +1126,18 @@ struct dd_function_table {
 GLuint64 size,
 int fd);
/*@}*/
+
+   /**
+* \name GL_ARB_get_program_binary
+*/
+   /*@{*/
+   /**
+* Called to retrieve a binary serialized copy of the current program.
+*/
+   void (*GetProgramBinaryLength)(struct gl_context *ctx,
+  struct gl_shader_program *shProg,
+  GLint *length);
+   /*@}*/
 };
 
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 72824355838..f12825d2536 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -834,7 +834,13 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
   *params = shProg->BinaryRetreivableHint;
   return;
case GL_PROGRAM_BINARY_LENGTH:
-  *params = 0;
+  assert(ctx->Const.NumProgramBinaryFormats == 0 ||
+ ctx->Driver.GetProgramBinaryLength);
+  if (ctx->Const.NumProgramBinaryFormats == 0) {
+ *params = 0;
+  } else {
+ ctx->Driver.GetProgramBinaryLength(ctx, shProg, params);
+  }
   return;
case GL_ACTIVE_ATOMIC_COUNTER_BUFFERS:
   if (!ctx->Extensions.ARB_shader_atomic_counters)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/17] main, glsl: Add UniformDataDefaults which stores uniform defaults

2017-11-08 Thread Jordan Justen
The ARB_get_program_binary extension requires that uniform values in a
program be restored to their initial value just after linking.

This patch saves off the initial values just after linking. When the
program is restored by glProgramBinary, we can use this to copy the
initial value of uniforms into UniformDataSlots.

Signed-off-by: Jordan Justen 
---
 src/compiler/glsl/link_uniform_initializers.cpp |  2 ++
 src/compiler/glsl/link_uniforms.cpp |  3 +++
 src/compiler/glsl/serialize.cpp | 18 --
 src/mesa/main/mtypes.h  |  1 +
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_uniform_initializers.cpp 
b/src/compiler/glsl/link_uniform_initializers.cpp
index f70d9100e12..2395f5cf695 100644
--- a/src/compiler/glsl/link_uniform_initializers.cpp
+++ b/src/compiler/glsl/link_uniform_initializers.cpp
@@ -354,5 +354,7 @@ link_set_uniform_initializers(struct gl_shader_program 
*prog,
   }
}
 
+   memcpy(prog->data->UniformDataDefaults, prog->data->UniformDataSlots,
+  sizeof(union gl_constant_value) * prog->data->NumUniformDataSlots);
ralloc_free(mem_ctx);
 }
diff --git a/src/compiler/glsl/link_uniforms.cpp 
b/src/compiler/glsl/link_uniforms.cpp
index 7d141549f55..51e02bcf840 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -1338,6 +1338,9 @@ link_assign_uniform_storage(struct gl_context *ctx,
  
prog->data->NumUniformStorage);
   data = rzalloc_array(prog->data->UniformStorage,
union gl_constant_value, num_data_slots);
+  prog->data->UniformDataDefaults =
+ rzalloc_array(prog->data->UniformStorage,
+   union gl_constant_value, num_data_slots);
} else {
   data = prog->data->UniformDataSlots;
}
diff --git a/src/compiler/glsl/serialize.cpp b/src/compiler/glsl/serialize.cpp
index b4c9545702e..e55f1680ffc 100644
--- a/src/compiler/glsl/serialize.cpp
+++ b/src/compiler/glsl/serialize.cpp
@@ -449,7 +449,12 @@ write_uniforms(struct blob *metadata, struct 
gl_shader_program *prog)
  unsigned vec_size =
 prog->data->UniformStorage[i].type->component_slots() *
 MAX2(prog->data->UniformStorage[i].array_elements, 1);
- blob_write_bytes(metadata, prog->data->UniformStorage[i].storage,
+ unsigned slot =
+prog->data->UniformStorage[i].storage -
+prog->data->UniformDataSlots;
+ blob_write_bytes(metadata, >data->UniformDataSlots[slot],
+  sizeof(union gl_constant_value) * vec_size);
+ blob_write_bytes(metadata, >data->UniformDataDefaults[slot],
   sizeof(union gl_constant_value) * vec_size);
   }
}
@@ -472,6 +477,9 @@ read_uniforms(struct blob_reader *metadata, struct 
gl_shader_program *prog)
data = rzalloc_array(uniforms, union gl_constant_value,
 prog->data->NumUniformDataSlots);
prog->data->UniformDataSlots = data;
+   prog->data->UniformDataDefaults =
+  rzalloc_array(uniforms, union gl_constant_value,
+prog->data->NumUniformDataSlots);
 
prog->UniformHash = new string_to_uint_map;
 
@@ -512,8 +520,14 @@ read_uniforms(struct blob_reader *metadata, struct 
gl_shader_program *prog)
  unsigned vec_size =
 prog->data->UniformStorage[i].type->component_slots() *
 MAX2(prog->data->UniformStorage[i].array_elements, 1);
+ unsigned slot =
+prog->data->UniformStorage[i].storage -
+prog->data->UniformDataSlots;
+ blob_copy_bytes(metadata,
+ (uint8_t *) >data->UniformDataSlots[slot],
+ sizeof(union gl_constant_value) * vec_size);
  blob_copy_bytes(metadata,
- (uint8_t *) prog->data->UniformStorage[i].storage,
+ (uint8_t *) >data->UniformDataDefaults[slot],
  sizeof(union gl_constant_value) * vec_size);
 
 assert(vec_size + prog->data->UniformStorage[i].storage <=
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2acf64eb56d..023692cc0e1 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2871,6 +2871,7 @@ struct gl_shader_program_data
/* Shader cache variables used during restore */
unsigned NumUniformDataSlots;
union gl_constant_value *UniformDataSlots;
+   union gl_constant_value *UniformDataDefaults;
 
bool cache_fallback;
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/17] docs/specs: Add GL_MESA_program_binary_formats extension spec

2017-11-08 Thread Jordan Justen
Similar idea to Tim's "spec: MESA_program_binary", but simplified and
written to support both ARB_get_program_binary and
OES_get_program_binary.

Signed-off-by: Jordan Justen 
Cc: Ian Romanick 
Cc: Timothy Arceri 
---
 docs/specs/MESA_program_binary_formats.txt | 59 ++
 docs/specs/enums.txt   |  3 ++
 src/mapi/glapi/registry/gl.xml |  2 +-
 3 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 docs/specs/MESA_program_binary_formats.txt

diff --git a/docs/specs/MESA_program_binary_formats.txt 
b/docs/specs/MESA_program_binary_formats.txt
new file mode 100644
index 000..65bd1a1a739
--- /dev/null
+++ b/docs/specs/MESA_program_binary_formats.txt
@@ -0,0 +1,59 @@
+Name
+
+MESA_program_binary_formats
+
+Name Strings
+
+GL_MESA_program_binary_formats
+
+Contact
+
+Jordan Justen (jordan.l.justen 'at' intel.com)
+
+Status
+
+Complete.
+
+Version
+
+Last Modified Date: October 28, 2017
+Revision: #1
+
+Number
+
+TBD
+
+Dependencies
+
+For use with the OpenGL ARB_get_program_binary extension, or the
+OpenGL ES OES_get_program_binary extension.
+
+Overview
+
+The get_program_binary exensions require a GLenum binaryFormat.
+This extension documents that format for use with Mesa.
+
+New Procedures and Functions
+
+None.
+
+New Tokens
+
+GL_PROGRAM_BINARY_FORMAT_MESA   0x875F
+
+For ARB_get_program_binary, GL_PROGRAM_BINARY_FORMAT_MESA may be
+returned from GetProgramBinary calls in the 
+parameter and when retrieving the value of PROGRAM_BINARY_FORMATS.
+
+For OES_get_program_binary, GL_PROGRAM_BINARY_FORMAT_MESA may be
+returned from GetProgramBinaryOES calls in the 
+parameter and when retrieving the value of
+PROGRAM_BINARY_FORMATS_OES.
+
+New State
+
+None.
+
+Revision History
+
+#0110/28/2017Jordan Justen   First draft.
diff --git a/docs/specs/enums.txt b/docs/specs/enums.txt
index 4b0485f3490..8109cc50f97 100644
--- a/docs/specs/enums.txt
+++ b/docs/specs/enums.txt
@@ -63,6 +63,9 @@ GL_MESAX_texture_stack:
GL_TEXTURE_1D_STACK_BINDING_MESAX0x875D
GL_TEXTURE_2D_STACK_BINDING_MESAX0x875E
 
+GL_MESA_program_binary_formats:
+GL_PROGRAM_BINARY_FORMAT_MESA   0x875F
+
 GL_MESA_program_debug
GL_FRAGMENT_PROGRAM_POSITION_MESA   0x8BB0
GL_FRAGMENT_PROGRAM_CALLBACK_MESA   0x8BB1
diff --git a/src/mapi/glapi/registry/gl.xml b/src/mapi/glapi/registry/gl.xml
index cbabe11b398..7360414bb35 100644
--- a/src/mapi/glapi/registry/gl.xml
+++ b/src/mapi/glapi/registry/gl.xml
@@ -5505,7 +5505,7 @@ typedef unsigned int GLhandleARB;
 
 
 
-
+
 
 
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/17] include/GL: Add GL_MESA_program_binary_formats

2017-11-08 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 include/GL/gl.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/GL/gl.h b/include/GL/gl.h
index 5b284802885..6ae8088f6cb 100644
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -2101,6 +2101,14 @@ typedef void (APIENTRYP 
PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum t
 #endif
 
 
+#ifndef GL_MESA_program_binary_formats
+#define GL_MESA_program_binary_formats 1
+
+#define GL_PROGRAM_BINARY_FORMAT_MESA  0x875F
+
+#endif /* GL_MESA_program_binary_formats */
+
+
 /**
  ** NOTE!  If you add new functions to this file, or update
  ** glext.h be sure to regenerate the gl_mangle.h file.  See comments
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/17] i965 ARB_get_program_binary support

2017-11-08 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa i965-get-program-binary-v1

This series adds i965 support for ARB_get_program_binary with greater
than 0 supported formats. Today we support this extension, but
advertise support for 0 formats. This series allows i965 to advertise
support for 1 format.

This series defines a common Mesa format for ARB_get_program_binary,
along with helper functions to read and write the format. We also
define an OpenGL Mesa spec to be used with this binary format. The
binary saved can only be reloaded on the exact same Mesa build using
the exact same hardware.

The i965 implementation saves out a serialize nir represenation of the
program. Later we can add support for saving the gen binary program as
well. (We will still need the nir program for state based recompiles.)

This implementation passes piglit, deqp and glcts functions. It also
works with Dota 2, which appears to make use of the extension.

Jordan Justen (17):
  docs/specs: Add GL_MESA_program_binary_formats extension spec
  include/GL: Add GL_MESA_program_binary_formats
  compiler: Fold shader_cache in with libglsl sources
  glsl: Split out shader program serialization
  main, glsl: Add UniformDataDefaults which stores uniform defaults
  i965: Add brw_program_deserialize_nir
  i965: Free serialized nir after deserializing
  i965: Add brw_program_serialize_nir
  i965: Fix memory leak when serializing nir
  main: Allow non-zero NUM_PROGRAM_BINARY_FORMATS
  main: Support 1 Mesa format with get for GL_PROGRAM_BINARY_FORMATS
  main: Support calling driver for GL_PROGRAM_BINARY_LENGTH
  main: Support calling driver for GetProgramBinary
  main: Support calling driver for ProgramBinary
  main: Clear shader program data whenever ProgramBinary is called
  util: Add Mesa ARB_get_program_binary helper functions
  i965: Add ARB_get_program_binary support using nir_serialization

 docs/specs/MESA_program_binary_formats.txt  |   59 ++
 docs/specs/enums.txt|3 +
 include/GL/gl.h |8 +
 src/compiler/Android.glsl.mk|3 +-
 src/compiler/Makefile.glsl.am   |3 +-
 src/compiler/Makefile.sources   |8 +-
 src/compiler/glsl/link_uniform_initializers.cpp |2 +
 src/compiler/glsl/link_uniforms.cpp |3 +
 src/compiler/glsl/meson.build   |2 +
 src/compiler/glsl/serialize.cpp | 1252 +++
 src/compiler/glsl/serialize.h   |   50 +
 src/compiler/glsl/shader_cache.cpp  | 1185 +
 src/compiler/shader_info.h  |1 +
 src/mapi/glapi/registry/gl.xml  |2 +-
 src/mesa/drivers/dri/i965/Makefile.sources  |1 +
 src/mesa/drivers/dri/i965/brw_context.c |9 +
 src/mesa/drivers/dri/i965/brw_context.h |   16 +
 src/mesa/drivers/dri/i965/brw_disk_cache.c  |   31 +-
 src/mesa/drivers/dri/i965/brw_link.cpp  |9 +-
 src/mesa/drivers/dri/i965/brw_program.c |   35 +
 src/mesa/drivers/dri/i965/brw_program.h |7 +
 src/mesa/drivers/dri/i965/brw_program_binary.c  |  200 
 src/mesa/drivers/dri/i965/meson.build   |1 +
 src/mesa/main/dd.h  |   20 +
 src/mesa/main/get.c |9 +
 src/mesa/main/get_hash_params.py|4 +-
 src/mesa/main/mtypes.h  |4 +
 src/mesa/main/shaderapi.c   |   63 +-
 src/util/Makefile.sources   |2 +
 src/util/meson.build|2 +
 src/util/program_binary.c   |  322 ++
 src/util/program_binary.h   |   91 ++
 32 files changed, 2162 insertions(+), 1245 deletions(-)
 create mode 100644 docs/specs/MESA_program_binary_formats.txt
 create mode 100644 src/compiler/glsl/serialize.cpp
 create mode 100644 src/compiler/glsl/serialize.h
 create mode 100644 src/mesa/drivers/dri/i965/brw_program_binary.c
 create mode 100644 src/util/program_binary.c
 create mode 100644 src/util/program_binary.h

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: replace GLenum with GLenum16 in common structures

2017-11-08 Thread Brian Paul

On 11/08/2017 08:16 PM, Marek Olšák wrote:

From: Marek Olšák 

For lower CPU cache usage. All enums fit within 2 bytes.

gl_context = 152400 -> 136824 bytes


Wow.


vbo_context = 22696 -> 21520 bytes
---
  src/mesa/drivers/dri/nouveau/nv04_state_frag.c |   4 +-
  src/mesa/drivers/dri/nouveau/nv10_state_frag.c |   4 +-
  src/mesa/main/glheader.h   |   1 +
  src/mesa/main/mtypes.h | 210 -
  src/mesa/vbo/vbo_exec.h|   2 +-
  src/mesa/vbo/vbo_save.h|   4 +-
  src/mesa/vbo/vbo_save_draw.c   |   2 +-
  7 files changed, 114 insertions(+), 113 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c 
b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
index 248a7d2..bfe8eae 100644
--- a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
@@ -42,22 +42,22 @@
NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0

  struct combiner_state {
struct gl_context *ctx;
int unit;
GLboolean alpha;
GLboolean premodulate;

/* GL state */
GLenum mode;
-   GLenum *source;
-   GLenum *operand;
+   GLenum16 *source;
+   GLenum16 *operand;
GLuint logscale;

/* Derived HW state */
uint32_t hw;
  };

  #define __INIT_COMBINER_ALPHA_A GL_TRUE
  #define __INIT_COMBINER_ALPHA_RGB GL_FALSE

  /* Initialize a combiner_state struct from the texture unit
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c 
b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
index c6e4bb0..42dff08 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
@@ -60,22 +60,22 @@
  /* spare0_i = A_i * B_i + C_i * D_i */
  #define RC_OUT_SUMNV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0

  struct combiner_state {
struct gl_context *ctx;
int unit;
GLboolean premodulate;

/* GL state */
GLenum mode;
-   GLenum *source;
-   GLenum *operand;
+   GLenum16 *source;
+   GLenum16 *operand;
GLuint logscale;

/* Derived HW state */
uint64_t in;
uint32_t out;
  };

  /* Initialize a combiner_state struct from the texture unit
   * context. */
  #define INIT_COMBINER(chan, ctx, rc, i) do {  \
diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index 3f2a923..3729604 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -36,20 +36,21 @@
  #define GL_GLEXT_PROTOTYPES
  #include "GL/gl.h"
  #include "GL/glext.h"


  #ifdef __cplusplus
  extern "C" {
  #endif


+typedef unsigned short GLenum16; /* custom Mesa type to save space */
  typedef int GLclampx;


  #ifndef GL_OES_EGL_image
  typedef void *GLeglImageOES;
  #endif


  #ifndef GL_OES_EGL_image_external
  #define GL_TEXTURE_EXTERNAL_OES 0x8D65
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index fd5306c..078cf20 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -375,21 +375,21 @@ struct gl_light


  /**
   * Light model state.
   */
  struct gl_lightmodel
  {
 GLfloat Ambient[4];/**< ambient color */
 GLboolean LocalViewer; /**< Local (or infinite) view point? */
 GLboolean TwoSide; /**< Two (or one) sided lighting? */
-   GLenum ColorControl;/**< either GL_SINGLE_COLOR
+   GLenum16 ColorControl;  /**< either GL_SINGLE_COLOR
 *or GL_SEPARATE_SPECULAR_COLOR */
  };


  /**
   * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT)
   */
  struct gl_accum_attrib
  {
 GLfloat ClearColor[4]; /**< Accumulation buffer clear color */
@@ -411,53 +411,53 @@ union gl_color_union
  /**
   * Color buffer attribute group (GL_COLOR_BUFFER_BIT).
   */
  struct gl_colorbuffer_attrib
  {
 GLuint ClearIndex;  /**< Index for glClear */
 union gl_color_union ClearColor;/**< Color for glClear, unclamped 
*/
 GLuint IndexMask;   /**< Color index write mask */
 GLubyte ColorMask[MAX_DRAW_BUFFERS][4]; /**< Each flag is 0xff or 0x0 */

-   GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into 
*/
+   GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into */


GLenum16 ?




 /**
  * \name alpha testing
  */
 /*@{*/
 GLboolean AlphaEnabled;/**< Alpha test enabled flag */
-   GLenum AlphaFunc;   /**< Alpha test function */
+   GLenum16 AlphaFunc; /**< Alpha test function */
 GLfloat AlphaRefUnclamped;
 GLclampf AlphaRef; /**< Alpha reference value */
 /*@}*/

 /**
  * \name Blending
  */
 /*@{*/
 GLbitfield BlendEnabled;   /**< Per-buffer blend enable flags */

 /* NOTE: this 

[Mesa-dev] [PATCH] r600/query: drop rest of vi workaround code.

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This isn't needed in r600 anymore.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_query.c | 46 ++-
 src/gallium/drivers/r600/r600_query.h |  4 ---
 2 files changed, 13 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_query.c 
b/src/gallium/drivers/r600/r600_query.c
index aa3e36f..0003e2b 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -506,7 +506,6 @@ void r600_query_hw_destroy(struct r600_common_screen 
*rscreen,
}
 
r600_resource_reference(>buffer.buf, NULL);
-   r600_resource_reference(>workaround_buf, NULL);
FREE(rquery);
 }
 
@@ -932,23 +931,19 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
 
-   if (query->workaround_buf) {
-   op = PRED_OP(PREDICATION_OP_BOOL64);
-   } else {
-   switch (query->b.type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   op = PRED_OP(PREDICATION_OP_ZPASS);
-   break;
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
-   op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
-   invert = !invert;
-   break;
-   default:
-   assert(0);
-   return;
-   }
+   switch (query->b.type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   op = PRED_OP(PREDICATION_OP_ZPASS);
+   break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+   op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+   invert = !invert;
+   break;
+   default:
+   assert(0);
+   return;
}
 
/* if true then invert, see GL_ARB_conditional_render_inverted */
@@ -957,19 +952,6 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
else
op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no 
overflow */
 
-   /* Use the value written by compute shader as a workaround. Note that
-* the wait flag does not apply in this predication mode.
-*
-* The shader outputs the result value to L2. Workarounds only affect VI
-* and later, where the CP reads data from L2, so we don't need an
-* additional flush.
-*/
-   if (query->workaround_buf) {
-   uint64_t va = query->workaround_buf->gpu_address + 
query->workaround_offset;
-   emit_set_predicate(ctx, query->workaround_buf, va, op);
-   return;
-   }
-
op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
 
/* emit predicate packets for all data blocks */
@@ -1067,8 +1049,6 @@ bool r600_query_hw_begin(struct r600_common_context *rctx,
if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
r600_query_hw_reset_buffers(rctx, query);
 
-   r600_resource_reference(>workaround_buf, NULL);
-
r600_query_hw_emit_start(rctx, query);
if (!query->buffer.buf)
return false;
diff --git a/src/gallium/drivers/r600/r600_query.h 
b/src/gallium/drivers/r600/r600_query.h
index 1a3c683..acba1a6 100644
--- a/src/gallium/drivers/r600/r600_query.h
+++ b/src/gallium/drivers/r600/r600_query.h
@@ -193,10 +193,6 @@ struct r600_query_hw {
struct list_head list;
/* For transform feedback: which stream the query is for */
unsigned stream;
-
-   /* Workaround via compute shader */
-   struct r600_resource *workaround_buf;
-   unsigned workaround_offset;
 };
 
 bool r600_query_hw_init(struct r600_common_screen *rscreen,
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4] i965 : optimized bucket index calculation

2017-11-08 Thread aravindan . muthukumar
From: Aravindan Muthukumar 

Reducing Bucket index calculation to O(1).

This algorithm calculates the index using matrix method.
Matrix arrangement is as below:
Assuming PAGE_SIZE is 4096.

  1*4096   2*40963*40964*4096
  5*4096   6*40967*40968*4096
  10*4096  12*4096   14*4096   16*4096
  20*4096  24*4096   28*4096   32*4096
   ...  ...   ...   ...
   ...  ...   ...   ...
   ...  ...   ...   max_cache_size

From this matrix its clearly seen that every row
follows the below way:
  ...   ...   ...n
n+(1/4)n  n+(1/2)n  n+(3/4)n2n

Row is calculated as log2(size/PAGE_SIZE)
Column is calculated as converting the difference
between the elements to fit into power size of two
and indexing it.

Final Index is (row*4)+(col-1)

Tested with Intel Mesa CI.

Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)

v4: Review comments on style and code comments implemented (Ian).
v3: Review comments implemented (Ian).
v2: Review comments implemented (Jason).

Signed-off-by: Aravindan Muthukumar 
Signed-off-by: Kedar Karanje 
Reviewed-by: Yogesh Marathe 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 47 --
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 17036b5..f21df5a 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -86,6 +86,8 @@
 
 #define memclear(s) memset(, 0, sizeof(s))
 
+#define PAGE_SIZE 4096
+
 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
 
 static inline int
@@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
return ALIGN(pitch, tile_width);
 }
 
+/**
+ * This function finds the correct bucket fit for the input size.
+ * The function works with O(1) complexity when the requested size
+ * was queried instead of iterating the size through all the buckets.
+ */
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   int i;
+   /* Calculating the pages and rounding up to the page size. */
+   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+
+   /* Row  Bucket sizesclz((x-1) | 3)   RowColumn
+*in pages  stride   size
+*   0:   1  2  3  4 -> 30 30 30 304   1
+*   1:   5  6  7  8 -> 29 29 29 294   1
+*   2:  10 12 14 16 -> 28 28 28 288   2
+*   3:  20 24 28 32 -> 27 27 27 27   16   4
+*/
+   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
+   const unsigned row_max_pages = 4 << row;
+   
+   /* The '& ~2' is the special case for row 1. In row 1, max pages /
+* 2 is 2, but the previous row maximum is zero (because there is
+* no previous row). All row maximum sizes are power of 2, so that
+* is the only case where that bit will be set.
+*/
+   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
+   int col_size_log2 = row - 1;
+   col_size_log2 += (col_size_log2 < 0);
 
-   for (i = 0; i < bufmgr->num_buckets; i++) {
-  struct bo_cache_bucket *bucket = >cache_bucket[i];
-  if (bucket->size >= size) {
- return bucket;
-  }
-   }
+   const unsigned col = (pages - prev_row_max_pages +
+((1 << col_size_log2) - 1)) >> col_size_log2;
 
-   return NULL;
+   /* Calculating the index based on the row and column. */
+   const unsigned index = (row * 4) + (col - 1);
+
+   return (index < bufmgr->num_buckets) ?
+  >cache_bucket[index] : NULL;
 }
 
 int
@@ -1254,6 +1281,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
list_inithead(>cache_bucket[i].head);
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
+
+   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] st/mesa: add support for hw atomics to glsl->tgsi. (v4)

2017-11-08 Thread Dave Airlie
On 9 November 2017 at 11:54, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This adds support for creating the hw atomic tgsi from
> the glsl codepaths.
>
> v2: drop the atomic index and move to backend.
> v3: drop buffer decls. (Marek)
> v4: fix off by one (Gert)

Found a bug in my fix for this one,
https://cgit.freedesktop.org/~airlied/mesa/log/?h=r600-atomic-submit
has the correct version,

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation

2017-11-08 Thread Muthukumar, Aravindan
> On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote:
> > From: Aravindan Muthukumar 
> >
> > Now the complexity has been reduced to O(1)
> >
> > Algorithm calculates the index using matrix method.
> > Matrix arrangement is as below:
> > Assuming PAGE_SIZE is 4096.
> >
> >   1*4096   2*40963*40964*4096
> >   5*4096   6*40967*40968*4096
> >   10*4096  12*4096   14*4096   16*4096
> >   20*4096  24*4096   28*4096   32*4096
> >...  ...   ...   ...
> >...  ...   ...   ...
> >...  ...   ...   max_cache_size
> >
> > From this matrix its clearly seen that every row follows the below
> > way:
> >   ...   ...   ...n
> > n+(1/4)n  n+(1/2)n  n+(3/4)n2n
> >
> > Row is calculated as log2(size/PAGE_SIZE) Column is calculated as
> > converting the difference between the elements to fit into power size
> > of two and indexing it.
> >
> > Final Index is (row*4)+(col-1)
> >
> > Tested with Intel Mesa CI.
> >
> > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767%
> > (n=20)
> >
> > v3: review comments implemented (Ian).
> > v2: review comments implemented (Jason).
> >
> > Signed-off-by: Aravindan Muthukumar 
> > Signed-off-by: Kedar Karanje 
> > Reviewed-by: Yogesh Marathe 
> > ---
> >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 38
> > +++---
> >  1 file changed, 30 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > index 17036b5..9a423da 100644
> > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > @@ -86,6 +86,8 @@
> >
> >  #define memclear(s) memset(, 0, sizeof(s))
> >
> > +#define PAGE_SIZE 4096
> > +
> >  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
> >
> >  static inline int
> > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t
> pitch, uint32_t tiling)
> > return ALIGN(pitch, tile_width);
> >  }
> >
> > +/*
> > + * This function finds the correct bucket fit for the input size.
> > + * The function works with O(1) complexity when the requested size
> > + * was queried instead of iterating the size through all the buckets.
> > + */
> >  static struct bo_cache_bucket *
> >  bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)  {
> > -   int i;
> > +   /* Calculating the pages and rounding up to the page size. */
> > +   const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> >
> > -   for (i = 0; i < bufmgr->num_buckets; i++) {
> > -  struct bo_cache_bucket *bucket = >cache_bucket[i];
> > -  if (bucket->size >= size) {
> > - return bucket;
> > -  }
> > -   }
> > +   /* Finding the row number based on the calculated pages. */
> > +   const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3);
> >
> > -   return NULL;
> 
> Why did you make random (and incorrect) style changes and delete
> (useful) comments from the code I sent?
>

> > > Thanks Ian. I added comments based on my understanding and I get the 
> > > point I'll push v4 with your comments.
 
> > +   const unsigned int row_max_pages = 4 << rows;
> > +   const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2;
> > +
> > +   /* Finding the column number using column interval. */
> > +   int col_size_log2 = rows - 1;
> > +   col_size_log2 += (col_size_log2 < 0);
> > +
> > +   const unsigned int col = ( (pages - prev_row_max_pages +
> > +( (1 << col_size_log2) - 1) ) >>
> > + col_size_log2 );
> > +
> > +   /* Calculating the index based on the row and column. */
> > +   const unsigned int index = (rows * 4) + (col - 1);
> > +
> > +   return (index < bufmgr->num_buckets) ?
> > +  >cache_bucket[index] : NULL;
> >  }
> >
> >  int
> > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
> > list_inithead(>cache_bucket[i].head);
> > bufmgr->cache_bucket[i].size = size;
> > bufmgr->num_buckets++;
> > +
> > +   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
> > +   assert(bucket_for_size(bufmgr, size - 2048) == 
> > >cache_bucket[i]);
> > +   assert(bucket_for_size(bufmgr, size + 1) !=
> > + >cache_bucket[i]);
> >  }
> >
> >  static void
> >

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)

2017-11-08 Thread Brian Paul

On 11/08/2017 08:12 PM, Brian Paul wrote:

On 11/08/2017 06:28 PM, Ian Romanick wrote:

Any thoughts about my data using __attribute__((__packed__))?


Sorry, I didn't have time to dig into it.  I took a look this evening.

I think the ENUM_8BIT idea will work for GCC and MSVC but only for C++
sources.  MSVC doesn't like the sized enum syntax in C compilation units
(unless there's some compiler flag I haven't found yet).  So, we could
use it in the GLSL compiler, but not over in the gallium headers.

Does that matter to you?

Could I address this issue in a follow-on after the current series?



FWIW: here's what it would basically look like:

#ifndef __cplusplus
#error This only works with C++
#endif

#if defined(_MSC_VER)
#define ENUM_8BIT(NAME) enum NAME : unsigned char
#elif defined(__GNUC__)
#define ENUM_8BIT(NAME) enum __attribute__((__packed__)) NAME
#else
#define ENUM_8BIT(NAME) enum NAME
#endif

ENUM_8BIT(glsl_base_type) {
   GLSL_TYPE_UINT = 0,
   GLSL_TYPE_INT,
[...]
};


-Brian




-Brian



On 11/07/2017 04:07 PM, Brian Paul wrote:

Declare glsl_type::sampled_type as glsl_base_type as we do for the
base_type field.  And make base_type a bitfield to save a few bytes.

Update glsl_type constructor to take glsl_base_type instead of unsigned
and pass GLSL_TYPE_VOID instead of zero.

No Piglit regressions with llvmpipe.

v2:
- Declare both base_type and sampled_type as 8-bit fields
- Use the new ASSERT_BITFIELD_SIZE() macro.
---
  src/compiler/glsl_types.cpp | 30 +++---
  src/compiler/glsl_types.h   | 28 +---
  2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 704b63c..107a81f 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type,
   glsl_base_type base_type, unsigned
vector_elements,
   unsigned matrix_columns, const char *name) :
 gl_type(gl_type),
-   base_type(base_type),
+   base_type(base_type), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(vector_elements), matrix_columns(matrix_columns),
 length(0)
  {
@@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type,

  glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
   enum glsl_sampler_dim dim, bool shadow, bool
array,
- unsigned type, const char *name) :
+ glsl_base_type type, const char *name) :
 gl_type(gl_type),
-   base_type(base_type),
+   base_type(base_type), sampled_type(type),
 sampler_dimensionality(dim), sampler_shadow(shadow),
-   sampler_array(array), sampled_type(type), interface_packing(0),
+   sampler_array(array), interface_packing(0),
 interface_row_major(0), length(0)
  {
 mtx_lock(_type::mem_mutex);
@@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type,
glsl_base_type base_type,
  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned
num_fields,
   const char *name) :
 gl_type(0),
-   base_type(GLSL_TYPE_STRUCT),
+   base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(0), matrix_columns(0),
 length(num_fields)
  {
@@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field
*fields, unsigned num_fields,
   enum glsl_interface_packing packing,
   bool row_major, const char *name) :
 gl_type(0),
-   base_type(GLSL_TYPE_INTERFACE),
+   base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing((unsigned) packing),
+   interface_packing((unsigned) packing),
 interface_row_major((unsigned) row_major),
 vector_elements(0), matrix_columns(0),
 length(num_fields)
@@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field
*fields, unsigned num_fields,
  glsl_type::glsl_type(const glsl_type *return_type,
   const glsl_function_param *params, unsigned
num_params) :
 gl_type(0),
-   base_type(GLSL_TYPE_FUNCTION),
+   base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(0), matrix_columns(0),
 length(num_params)
  {
@@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type,

  glsl_type::glsl_type(const char *subroutine_name) :
 gl_type(0),

[Mesa-dev] [PATCH 7/6] radeonsi: don't call r600_can_dma_copy_buffer for DISCARD_RANGE

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

we don't use dma_data in this codepath.
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index cdcd37b..2e0f71a 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -426,22 +426,21 @@ static void *r600_buffer_transfer_map(struct pipe_context 
*ctx,
/* At this point, the buffer is always idle. */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
} else {
/* Fall back to a temporary buffer. */
usage |= PIPE_TRANSFER_DISCARD_RANGE;
}
}
 
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
-PIPE_TRANSFER_PERSISTENT)) &&
- r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
+PIPE_TRANSFER_PERSISTENT))) ||
 (rbuffer->flags & RADEON_FLAG_SPARSE))) {
assert(usage & PIPE_TRANSFER_WRITE);
 
/* Check if mapping this buffer would cause waiting for the GPU.
 */
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
force_discard_range ||
si_rings_is_buffer_referenced(rctx, rbuffer->buf, 
RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, 
RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary 
buffer. */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] radeonsi: remove has_cp_dma, has_streamout flags

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_buffer_common.c | 5 +
 src/gallium/drivers/radeon/r600_pipe_common.h   | 2 --
 src/gallium/drivers/radeonsi/si_pipe.c  | 3 ---
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index f0cfd09..cdcd37b 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -347,24 +347,21 @@ static void *r600_buffer_get_transfer(struct pipe_context 
*ctx,
transfer->staging = staging;
*ptransfer = >b.b;
return data;
 }
 
 static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
 unsigned dstx, unsigned srcx, unsigned 
size)
 {
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
 
-   return rctx->screen->has_cp_dma ||
-  (dword_aligned && (rctx->dma.cs ||
- rctx->screen->has_streamout));
-
+   return dword_aligned && rctx->dma.cs;
 }
 
 static void *r600_buffer_transfer_map(struct pipe_context *ctx,
   struct pipe_resource *resource,
   unsigned level,
   unsigned usage,
   const struct pipe_box *box,
   struct pipe_transfer **ptransfer)
 {
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 61560ac..2ac 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -384,22 +384,20 @@ struct r600_memory_object {
uint32_toffset;
 };
 
 struct r600_common_screen {
struct pipe_screen  b;
struct radeon_winsys*ws;
enum radeon_family  family;
enum chip_class chip_class;
struct radeon_info  info;
uint64_tdebug_flags;
-   boolhas_cp_dma;
-   boolhas_streamout;
boolhas_rbplus; /* if RB+ registers 
exist */
boolrbplus_allowed; /* if RB+ is allowed */
 
struct disk_cache   *disk_shader_cache;
 
struct slab_parent_pool pool_transfers;
 
/* Texture filter settings. */
int force_aniso; /* -1 = disabled */
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index e84de36..55760fa 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1100,23 +1100,20 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
!(sscreen->b.debug_flags & 
DBG(NO_DFSM));
}
 
/* While it would be nice not to have this flag, we are constrained
 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
 * on GFX9.
 */
sscreen->llvm_has_working_vgpr_indexing = sscreen->b.chip_class <= VI;
 
-   sscreen->b.has_cp_dma = true;
-   sscreen->b.has_streamout = true;
-
/* Some chips have RB+ registers, but don't support RB+. Those must
 * always disable it.
 */
if (sscreen->b.family == CHIP_STONEY ||
sscreen->b.chip_class >= GFX9) {
sscreen->b.has_rbplus = true;
 
sscreen->b.rbplus_allowed =
!(sscreen->b.debug_flags & DBG(NO_RB_PLUS)) &&
(sscreen->b.family == CHIP_STONEY ||
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] radeonsi: pack r600_surface better

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

160 -> 136 bytes
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 6b0a743..61560ac 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -293,41 +293,41 @@ struct r600_texture {
 */
unsignedps_draw_ratio;
/* The number of clears since the last DCC usage analysis. */
unsignednum_slow_clears;
 };
 
 struct r600_surface {
struct pipe_surface base;
 
/* These can vary with block-compressed textures. */
-   unsigned width0;
-   unsigned height0;
+   uint16_t width0;
+   uint16_t height0;
 
-   bool color_initialized;
-   bool depth_initialized;
+   bool color_initialized:1;
+   bool depth_initialized:1;
 
/* Misc. color flags. */
-   bool color_is_int8;
-   bool color_is_int10;
-   bool dcc_incompatible;
+   bool color_is_int8:1;
+   bool color_is_int10:1;
+   bool dcc_incompatible:1;
 
/* Color registers. */
unsigned cb_color_info;
unsigned cb_color_view;
unsigned cb_color_attrib;
unsigned cb_color_attrib2;  /* GFX9 and later */
unsigned cb_dcc_control;/* VI and later */
-   unsigned spi_shader_col_format; /* no blending, no 
alpha-to-coverage. */
-   unsigned spi_shader_col_format_alpha;   /* alpha-to-coverage */
-   unsigned spi_shader_col_format_blend;   /* blending without alpha. */
-   unsigned spi_shader_col_format_blend_alpha; /* blending with alpha. */
+   unsigned spi_shader_col_format:8;   /* no blending, no 
alpha-to-coverage. */
+   unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */
+   unsigned spi_shader_col_format_blend:8; /* blending without alpha. */
+   unsigned spi_shader_col_format_blend_alpha:8; /* blending with alpha. */
 
/* DB registers. */
uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE */
uint64_t db_stencil_base;
uint64_t db_htile_data_base;
unsigned db_depth_info;
unsigned db_z_info;
unsigned db_z_info2;/* GFX9+ */
unsigned db_depth_view;
unsigned db_depth_size;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] radeonsi: pack r600_texture better

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

1752 -> 1736 bytes
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 53 +--
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 43b11262..6b0a743 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -226,82 +226,81 @@ struct r600_cmask_info {
uint64_t offset;
uint64_t size;
unsigned alignment;
unsigned slice_tile_max;
uint64_t base_address_reg;
 };
 
 struct r600_texture {
struct r600_resourceresource;
 
+   struct radeon_surf  surface;
uint64_tsize;
-   unsignednum_level0_transfers;
-   enum pipe_formatdb_render_format;
-   boolis_depth;
-   booldb_compatible;
-   boolcan_sample_z;
-   boolcan_sample_s;
-   unsigneddirty_level_mask; /* each bit says if 
that mipmap is compressed */
-   unsignedstencil_dirty_level_mask; /* each bit 
says if that mipmap is compressed */
struct r600_texture *flushed_depth_texture;
-   struct radeon_surf  surface;
 
/* Colorbuffer compression and fast clear. */
struct r600_fmask_info  fmask;
struct r600_cmask_info  cmask;
struct r600_resource*cmask_buffer;
uint64_tdcc_offset; /* 0 = disabled */
unsignedcb_color_info; /* fast clear enable bit 
*/
unsignedcolor_clear_value[2];
unsignedlast_msaa_resolve_target_micro_mode;
+   unsignednum_level0_transfers;
 
/* Depth buffer compression and fast clear. */
uint64_thtile_offset;
-   booltc_compatible_htile;
-   booldepth_cleared; /* if it was cleared at 
least once */
float   depth_clear_value;
-   boolstencil_cleared; /* if it was cleared 
at least once */
+   uint16_tdirty_level_mask; /* each bit says if 
that mipmap is compressed */
+   uint16_tstencil_dirty_level_mask; /* each bit 
says if that mipmap is compressed */
+   enum pipe_formatdb_render_format:16;
uint8_t stencil_clear_value;
-   boolupgraded_depth; /* upgraded from unorm 
to Z32_FLOAT */
+   booltc_compatible_htile:1;
+   booldepth_cleared:1; /* if it was cleared 
at least once */
+   boolstencil_cleared:1; /* if it was cleared 
at least once */
+   boolupgraded_depth:1; /* upgraded from 
unorm to Z32_FLOAT */
+   boolis_depth:1;
+   booldb_compatible:1;
+   boolcan_sample_z:1;
+   boolcan_sample_s:1;
 
+   /* We need to track DCC dirtiness, because st/dri usually calls
+* flush_resource twice per frame (not a bug) and we don't wanna
+* decompress DCC twice. Also, the dirty tracking must be done even
+* if DCC isn't used, because it's required by the DCC usage analysis
+* for a possible future enablement.
+*/
+   boolseparate_dcc_dirty:1;
+   /* Statistics gathering for the DCC enablement heuristic. */
+   booldcc_gather_statistics:1;
+   /* Counter that should be non-zero if the texture is bound to a
+* framebuffer. Implemented in radeonsi only.
+*/
+   unsignedframebuffers_bound;
/* Whether the texture is a displayable back buffer and needs DCC
 * decompression, which is expensive. Therefore, it's enabled only
 * if statistics suggest that it will pay off and it's allocated
 * separately. It can't be bound as a sampler by apps. Limited to
 * target == 2D and last_level == 0. If enabled, dcc_offset contains
 * the absolute GPUVM address, not the relative one.
 */
struct r600_resource*dcc_separate_buffer;
/* When DCC is temporarily disabled, the separate buffer is here. */
struct r600_resource*last_dcc_separate_buffer;
-   /* We need to track DCC dirtiness, because st/dri usually calls
-* flush_resource twice per frame (not 

[Mesa-dev] [PATCH] mesa: replace GLenum with GLenum16 in common structures

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

For lower CPU cache usage. All enums fit within 2 bytes.

gl_context = 152400 -> 136824 bytes
vbo_context = 22696 -> 21520 bytes
---
 src/mesa/drivers/dri/nouveau/nv04_state_frag.c |   4 +-
 src/mesa/drivers/dri/nouveau/nv10_state_frag.c |   4 +-
 src/mesa/main/glheader.h   |   1 +
 src/mesa/main/mtypes.h | 210 -
 src/mesa/vbo/vbo_exec.h|   2 +-
 src/mesa/vbo/vbo_save.h|   4 +-
 src/mesa/vbo/vbo_save_draw.c   |   2 +-
 7 files changed, 114 insertions(+), 113 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c 
b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
index 248a7d2..bfe8eae 100644
--- a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
@@ -42,22 +42,22 @@
NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0
 
 struct combiner_state {
struct gl_context *ctx;
int unit;
GLboolean alpha;
GLboolean premodulate;
 
/* GL state */
GLenum mode;
-   GLenum *source;
-   GLenum *operand;
+   GLenum16 *source;
+   GLenum16 *operand;
GLuint logscale;
 
/* Derived HW state */
uint32_t hw;
 };
 
 #define __INIT_COMBINER_ALPHA_A GL_TRUE
 #define __INIT_COMBINER_ALPHA_RGB GL_FALSE
 
 /* Initialize a combiner_state struct from the texture unit
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c 
b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
index c6e4bb0..42dff08 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
@@ -60,22 +60,22 @@
 /* spare0_i = A_i * B_i + C_i * D_i */
 #define RC_OUT_SUM NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0
 
 struct combiner_state {
struct gl_context *ctx;
int unit;
GLboolean premodulate;
 
/* GL state */
GLenum mode;
-   GLenum *source;
-   GLenum *operand;
+   GLenum16 *source;
+   GLenum16 *operand;
GLuint logscale;
 
/* Derived HW state */
uint64_t in;
uint32_t out;
 };
 
 /* Initialize a combiner_state struct from the texture unit
  * context. */
 #define INIT_COMBINER(chan, ctx, rc, i) do {   \
diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index 3f2a923..3729604 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -36,20 +36,21 @@
 #define GL_GLEXT_PROTOTYPES
 #include "GL/gl.h"
 #include "GL/glext.h"
 
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
+typedef unsigned short GLenum16; /* custom Mesa type to save space */
 typedef int GLclampx;
 
 
 #ifndef GL_OES_EGL_image
 typedef void *GLeglImageOES;
 #endif
 
 
 #ifndef GL_OES_EGL_image_external
 #define GL_TEXTURE_EXTERNAL_OES 0x8D65
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index fd5306c..078cf20 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -375,21 +375,21 @@ struct gl_light
 
 
 /**
  * Light model state.
  */
 struct gl_lightmodel
 {
GLfloat Ambient[4]; /**< ambient color */
GLboolean LocalViewer;  /**< Local (or infinite) view point? */
GLboolean TwoSide;  /**< Two (or one) sided lighting? */
-   GLenum ColorControl;/**< either GL_SINGLE_COLOR
+   GLenum16 ColorControl;  /**< either GL_SINGLE_COLOR
 *or GL_SEPARATE_SPECULAR_COLOR */
 };
 
 
 /**
  * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT)
  */
 struct gl_accum_attrib
 {
GLfloat ClearColor[4];  /**< Accumulation buffer clear color */
@@ -411,53 +411,53 @@ union gl_color_union
 /**
  * Color buffer attribute group (GL_COLOR_BUFFER_BIT).
  */
 struct gl_colorbuffer_attrib
 {
GLuint ClearIndex;  /**< Index for glClear */
union gl_color_union ClearColor;/**< Color for glClear, unclamped */
GLuint IndexMask;   /**< Color index write mask */
GLubyte ColorMask[MAX_DRAW_BUFFERS][4]; /**< Each flag is 0xff or 0x0 */
 
-   GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into 
*/
+   GLenum DrawBuffer[MAX_DRAW_BUFFERS];/**< Which buffer to draw into */
 
/** 
 * \name alpha testing
 */
/*@{*/
GLboolean AlphaEnabled; /**< Alpha test enabled flag */
-   GLenum AlphaFunc;   /**< Alpha test function */
+   GLenum16 AlphaFunc; /**< Alpha test function */
GLfloat AlphaRefUnclamped;
GLclampf AlphaRef;  /**< Alpha reference value */
/*@}*/
 
/** 
 * \name Blending
 */
/*@{*/
GLbitfield BlendEnabled;/**< Per-buffer blend enable flags */
 
/* NOTE: this does _not_ depend on fragment clamping or any other clamping
 * control, only on the fixed-pointness of the 

[Mesa-dev] [PATCH 3/6] radeonsi: clean up r600_surface

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

216 -> 160 bytes
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 37 ---
 src/gallium/drivers/radeon/r600_texture.c |  3 ---
 2 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 48501a1..43b11262 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -301,63 +301,48 @@ struct r600_surface {
struct pipe_surface base;
 
/* These can vary with block-compressed textures. */
unsigned width0;
unsigned height0;
 
bool color_initialized;
bool depth_initialized;
 
/* Misc. color flags. */
-   bool alphatest_bypass;
-   bool export_16bpc;
bool color_is_int8;
bool color_is_int10;
bool dcc_incompatible;
 
/* Color registers. */
unsigned cb_color_info;
-   unsigned cb_color_base;
unsigned cb_color_view;
-   unsigned cb_color_size; /* R600 only */
-   unsigned cb_color_dim;  /* EG only */
-   unsigned cb_color_pitch;/* EG and later */
-   unsigned cb_color_slice;/* EG and later */
-   unsigned cb_color_attrib;   /* EG and later */
+   unsigned cb_color_attrib;
unsigned cb_color_attrib2;  /* GFX9 and later */
unsigned cb_dcc_control;/* VI and later */
-   unsigned cb_color_fmask;/* CB_COLORn_FMASK (EG and later) or 
CB_COLORn_FRAG (r600) */
-   unsigned cb_color_fmask_slice;  /* EG and later */
-   unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
-   unsigned cb_color_mask; /* R600 only */
-   unsigned spi_shader_col_format; /* SI+, no blending, no 
alpha-to-coverage. */
-   unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
-   unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. 
*/
-   unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with 
alpha. */
-   struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
-   struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. 
R600 only */
+   unsigned spi_shader_col_format; /* no blending, no 
alpha-to-coverage. */
+   unsigned spi_shader_col_format_alpha;   /* alpha-to-coverage */
+   unsigned spi_shader_col_format_blend;   /* blending without alpha. */
+   unsigned spi_shader_col_format_blend_alpha; /* blending with alpha. */
 
/* DB registers. */
-   uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) 
or DB_DEPTH_BASE (r600) */
-   uint64_t db_stencil_base;   /* EG and later */
+   uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE */
+   uint64_t db_stencil_base;
uint64_t db_htile_data_base;
-   unsigned db_depth_info; /* R600 only, then SI and later */
-   unsigned db_z_info; /* EG and later */
+   unsigned db_depth_info;
+   unsigned db_z_info;
unsigned db_z_info2;/* GFX9+ */
unsigned db_depth_view;
unsigned db_depth_size;
-   unsigned db_depth_slice;/* EG and later */
-   unsigned db_stencil_info;   /* EG and later */
+   unsigned db_depth_slice;
+   unsigned db_stencil_info;
unsigned db_stencil_info2;  /* GFX9+ */
-   unsigned db_prefetch_limit; /* R600 only */
unsigned db_htile_surface;
-   unsigned db_preload_control;/* EG and later */
 };
 
 struct r600_mmio_counter {
unsigned busy;
unsigned idle;
 };
 
 union r600_mmio_counters {
struct {
/* For global GPU load including SDMA. */
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index a1b6ed4..8c33923 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1966,23 +1966,20 @@ static struct pipe_surface *r600_create_surface(struct 
pipe_context *pipe,
}
 
return si_create_surface_custom(pipe, tex, templ,
  width0, height0,
  width, height);
 }
 
 static void r600_surface_destroy(struct pipe_context *pipe,
 struct pipe_surface *surface)
 {
-   struct r600_surface *surf = (struct r600_surface*)surface;
-   r600_resource_reference(>cb_buffer_fmask, NULL);
-   r600_resource_reference(>cb_buffer_cmask, NULL);
pipe_resource_reference(>texture, NULL);
FREE(surface);
 }
 
 static void r600_clear_texture(struct pipe_context *pipe,
   struct pipe_resource *tex,
   unsigned level,
   const struct pipe_box *box,
   

[Mesa-dev] [PATCH 1/6] radeonsi: remove DBG_NO_DISCARD_RANGE

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_buffer_common.c | 2 --
 src/gallium/drivers/radeon/r600_pipe_common.c   | 2 --
 src/gallium/drivers/radeon/r600_pipe_common.h   | 1 -
 3 files changed, 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 519e52e..f0cfd09 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -361,21 +361,20 @@ static bool r600_can_dma_copy_buffer(struct 
r600_common_context *rctx,
 }
 
 static void *r600_buffer_transfer_map(struct pipe_context *ctx,
   struct pipe_resource *resource,
   unsigned level,
   unsigned usage,
   const struct pipe_box *box,
   struct pipe_transfer **ptransfer)
 {
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
-   struct r600_common_screen *rscreen = (struct 
r600_common_screen*)ctx->screen;
struct r600_resource *rbuffer = r600_resource(resource);
uint8_t *data;
 
assert(box->x + box->width <= resource->width0);
 
/* From GL_AMD_pinned_memory issues:
 *
 * 4) Is glMapBuffer on a shared buffer guaranteed to return the
 *same system address which was specified at creation time?
 *
@@ -429,21 +428,20 @@ static void *r600_buffer_transfer_map(struct pipe_context 
*ctx,
if (r600_invalidate_buffer(rctx, rbuffer)) {
/* At this point, the buffer is always idle. */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
} else {
/* Fall back to a temporary buffer. */
usage |= PIPE_TRANSFER_DISCARD_RANGE;
}
}
 
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
-   !(rscreen->debug_flags & DBG(NO_DISCARD_RANGE)) &&
((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
 PIPE_TRANSFER_PERSISTENT)) &&
  r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
 (rbuffer->flags & RADEON_FLAG_SPARSE))) {
assert(usage & PIPE_TRANSFER_WRITE);
 
/* Check if mapping this buffer would cause waiting for the GPU.
 */
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
force_discard_range ||
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index aa72187..e0d50d0 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -821,22 +821,20 @@ static const struct debug_named_value 
common_debug_options[] = {
{ "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized 
shader variants." },
 
{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and 
exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault 
test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM 
fault test and exit." },
 
/* features */
{ "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
{ "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
-   /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
-   { "noinvalrange", DBG(NO_DISCARD_RANGE), "Disable handling of 
INVALIDATE_RANGE map flags" },
{ "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
{ "notiling", DBG(NO_TILING), "Disable tiling" },
{ "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on 
end-of-packet." },
{ "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations 
when possible." },
{ "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader 
creation." },
{ "nowc", DBG(NO_WC), "Disable GTT write combining" },
{ "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
{ "nodcc", DBG(NO_DCC), "Disable DCC." },
{ "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
{ "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index f803ee4..47306c6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -94,21 +94,20 @@ enum {
 
/* Information logging options: */
DBG_INFO,
DBG_TEX,
DBG_COMPUTE,
DBG_VM,
 
/* Driver options: */
DBG_FORCE_DMA,
DBG_NO_ASYNC_DMA,
-   DBG_NO_DISCARD_RANGE,
DBG_NO_WC,
DBG_CHECK_VM,

[Mesa-dev] [PATCH 2/6] radeonsi: remove r600_texture::non_disp_tiling

2017-11-08 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h | 2 --
 src/gallium/drivers/radeon/r600_texture.c | 7 ---
 2 files changed, 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 47306c6..48501a1 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -256,22 +256,20 @@ struct r600_texture {
 
/* Depth buffer compression and fast clear. */
uint64_thtile_offset;
booltc_compatible_htile;
booldepth_cleared; /* if it was cleared at 
least once */
float   depth_clear_value;
boolstencil_cleared; /* if it was cleared 
at least once */
uint8_t stencil_clear_value;
boolupgraded_depth; /* upgraded from unorm 
to Z32_FLOAT */
 
-   boolnon_disp_tiling; /* R600-Cayman only */
-
/* Whether the texture is a displayable back buffer and needs DCC
 * decompression, which is expensive. Therefore, it's enabled only
 * if statistics suggest that it will pay off and it's allocated
 * separately. It can't be bound as a sampler by apps. Limited to
 * target == 2D and last_level == 0. If enabled, dcc_offset contains
 * the absolute GPUVM address, not the relative one.
 */
struct r600_resource*dcc_separate_buffer;
/* When DCC is temporarily disabled, the separate buffer is here. */
struct r600_resource*last_dcc_separate_buffer;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 0800c0f..a1b6ed4 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -546,21 +546,20 @@ static void r600_reallocate_texture_inplace(struct 
r600_common_context *rctx,
rtex->can_sample_s = new_tex->can_sample_s;
rtex->surface = new_tex->surface;
rtex->fmask = new_tex->fmask;
rtex->cmask = new_tex->cmask;
rtex->cb_color_info = new_tex->cb_color_info;
rtex->last_msaa_resolve_target_micro_mode = 
new_tex->last_msaa_resolve_target_micro_mode;
rtex->htile_offset = new_tex->htile_offset;
rtex->tc_compatible_htile = new_tex->tc_compatible_htile;
rtex->depth_cleared = new_tex->depth_cleared;
rtex->stencil_cleared = new_tex->stencil_cleared;
-   rtex->non_disp_tiling = new_tex->non_disp_tiling;
rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
rtex->framebuffers_bound = new_tex->framebuffers_bound;
 
if (new_bind_flag == PIPE_BIND_LINEAR) {
assert(!rtex->htile_offset);
assert(!rtex->cmask.size);
assert(!rtex->fmask.size);
assert(!rtex->dcc_offset);
assert(!rtex->is_depth);
}
@@ -1148,24 +1147,20 @@ r600_texture_create_object(struct pipe_screen *screen,
rtex->db_render_format = base->format;
else {
rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
rtex->upgraded_depth = base->format != 
PIPE_FORMAT_Z32_FLOAT &&
   base->format != 
PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
}
} else {
rtex->db_render_format = base->format;
}
 
-   /* Tiled depth textures utilize the non-displayable tile order.
-* This must be done after r600_setup_surface.
-* Applies to R600-Cayman. */
-   rtex->non_disp_tiling = rtex->is_depth && 
rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = 
rtex->surface.micro_tile_mode;
 
/* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
 * between frames, so the only thing that can enable separate DCC
 * with DRI2 is multiple slow clears within a frame.
 */
rtex->ps_draw_ratio = 0;
 
if (rtex->is_depth) {
@@ -1484,22 +1479,20 @@ bool si_init_flushed_depth_texture(struct pipe_context 
*ctx,
resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
 
if (staging)
resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
 
*flushed_depth_texture = (struct r600_texture 
*)ctx->screen->resource_create(ctx->screen, );
if (*flushed_depth_texture == NULL) {
R600_ERR("failed to create temporary texture to hold flushed 
depth\n");
return false;
}
-
-   (*flushed_depth_texture)->non_disp_tiling = false;
return true;
 }
 
 /**
  * Initialize the 

[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103586

--- Comment #10 from Dave Gilbert  ---
I believe I'm still seeing this:

dg@hath:~/ocl2$ clinfo 
Number of platforms   1
  Platform Name   Clover
  Platform Vendor Mesa
  Platform VersionOpenCL 1.1 Mesa 17.4.0-devel
(git-a16dc04ad5)

dg@hath:~/ocl2$ echo $LD_LIBRARY_PATH 
/home/dg/mesa/try/lib:

so I *think* it's using my build.

and I believe I'm still seeing it.
Is my test valid or do I really need that finish?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)

2017-11-08 Thread Brian Paul

On 11/08/2017 06:28 PM, Ian Romanick wrote:

Any thoughts about my data using __attribute__((__packed__))?


Sorry, I didn't have time to dig into it.  I took a look this evening.

I think the ENUM_8BIT idea will work for GCC and MSVC but only for C++ 
sources.  MSVC doesn't like the sized enum syntax in C compilation units 
(unless there's some compiler flag I haven't found yet).  So, we could 
use it in the GLSL compiler, but not over in the gallium headers.


Does that matter to you?

Could I address this issue in a follow-on after the current series?

-Brian



On 11/07/2017 04:07 PM, Brian Paul wrote:

Declare glsl_type::sampled_type as glsl_base_type as we do for the
base_type field.  And make base_type a bitfield to save a few bytes.

Update glsl_type constructor to take glsl_base_type instead of unsigned
and pass GLSL_TYPE_VOID instead of zero.

No Piglit regressions with llvmpipe.

v2:
- Declare both base_type and sampled_type as 8-bit fields
- Use the new ASSERT_BITFIELD_SIZE() macro.
---
  src/compiler/glsl_types.cpp | 30 +++---
  src/compiler/glsl_types.h   | 28 +---
  2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 704b63c..107a81f 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type,
   glsl_base_type base_type, unsigned vector_elements,
   unsigned matrix_columns, const char *name) :
 gl_type(gl_type),
-   base_type(base_type),
+   base_type(base_type), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(vector_elements), matrix_columns(matrix_columns),
 length(0)
  {
@@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type,

  glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
   enum glsl_sampler_dim dim, bool shadow, bool array,
- unsigned type, const char *name) :
+ glsl_base_type type, const char *name) :
 gl_type(gl_type),
-   base_type(base_type),
+   base_type(base_type), sampled_type(type),
 sampler_dimensionality(dim), sampler_shadow(shadow),
-   sampler_array(array), sampled_type(type), interface_packing(0),
+   sampler_array(array), interface_packing(0),
 interface_row_major(0), length(0)
  {
 mtx_lock(_type::mem_mutex);
@@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type 
base_type,
  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
   const char *name) :
 gl_type(0),
-   base_type(GLSL_TYPE_STRUCT),
+   base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(0), matrix_columns(0),
 length(num_fields)
  {
@@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
   enum glsl_interface_packing packing,
   bool row_major, const char *name) :
 gl_type(0),
-   base_type(GLSL_TYPE_INTERFACE),
+   base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing((unsigned) packing),
+   interface_packing((unsigned) packing),
 interface_row_major((unsigned) row_major),
 vector_elements(0), matrix_columns(0),
 length(num_fields)
@@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
  glsl_type::glsl_type(const glsl_type *return_type,
   const glsl_function_param *params, unsigned num_params) :
 gl_type(0),
-   base_type(GLSL_TYPE_FUNCTION),
+   base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(0), matrix_columns(0),
 length(num_params)
  {
@@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type,

  glsl_type::glsl_type(const char *subroutine_name) :
 gl_type(0),
-   base_type(GLSL_TYPE_SUBROUTINE),
+   base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID),
 sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
 vector_elements(1), matrix_columns(1),
 length(0)
  {
@@ -442,9 +442,9 @@ _mesa_glsl_release_types(void)


  glsl_type::glsl_type(const 

Re: [Mesa-dev] [PATCH 2/4] r600: use mysterious DX10_CLAMP bit in pixel shader setup

2017-11-08 Thread Roland Scheidegger
FWIW I'd really appreciate it if someone could shed some light on that
mystery bit there...

Roland

Am 09.11.2017 um 03:58 schrieb srol...@vmware.com:
> From: Roland Scheidegger 
> 
> I don't know what this bit really does. The docs are somewhere between
> misleading and wrong however, as at least the newer ones (that bit exists with
> GCN as well) imply all NaNs would get converted to zeros, which is definitely
> NOT the case (and that would not be dx10 compliant neither), the r600 ones are
> also talking about "dx10 style" vs "dx9 style" clamp, whatever that means for
> dx9... Makes no difference at all with piglit's isinf-and-isnan tests, so very
> obviously NaNs are still generated just fine.
> radeonsi also seems to set this bit nowadays (the llvm amdgpu code comment
> now says "Make clamp modifier on NaN input returns 0" instead of "Do not
> clamp NAN to 0" since it was changed).
> 
> This prevents misrenderings in This War of Mine since using ieee
> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use
> clamped rcp opcode, which would also fix this.
> 
> AMD, it would be really really nice if there would be useful/correct/accurate
> information about this bit...
> The bit can be set for all shader stages, and maybe it should be set but I
> really have no idea...
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
> ---
>  src/gallium/drivers/r600/evergreen_state.c | 1 +
>  src/gallium/drivers/r600/r600_state.c  | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 96eb35a981..ddd59dc0b5 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -3235,6 +3235,7 @@ void evergreen_update_ps_state(struct pipe_context 
> *ctx, struct r600_pipe_shader
>   r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
>S_028844_NUM_GPRS(rshader->bc.ngpr) |
>S_028844_PRIME_CACHE_ON_DRAW(1) |
> +  S_028844_DX10_CLAMP(1) |
>S_028844_STACK_SIZE(rshader->bc.nstack));
>   /* After that, the NOP relocation packet must be emitted (shader->bo, 
> RADEON_USAGE_READ). */
>  
> diff --git a/src/gallium/drivers/r600/r600_state.c 
> b/src/gallium/drivers/r600/r600_state.c
> index c21e8dabb1..c0d0b1667a 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2548,6 +2548,7 @@ void r600_update_ps_state(struct pipe_context *ctx, 
> struct r600_pipe_shader *sha
>   r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
>   r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
>S_028850_NUM_GPRS(rshader->bc.ngpr) |
> +  S_028850_DX10_CLAMP(1) |
>S_028850_STACK_SIZE(rshader->bc.nstack) |
>S_028850_UNCACHED_FIRST_INST(ufi));
>   r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.

2017-11-08 Thread Roland Scheidegger
All on Juniper.

But anyway, I've got another solution, with the only drawback that I
don't really know what it does due to docs being lackluster/misleading
there :-).
But that would let us keep the ieee opcodes. And while I don't know what
it does, I suspect it's a better idea regardless ;-). hw supported by
nouveau drivers might just do the same without anyone even knowing with
some luck...

Roland



Am 08.11.2017 um 07:20 schrieb Ilia Mirkin:
> Actually cayman gets half of it - it gets the abs, but not clamped. I
> wonder what happens if you go the other way -- use the IEEE version of
> the op for RSQ() (presumably you're not testing this on cayman).
> 
> On Wed, Nov 8, 2017 at 1:18 AM, Ilia Mirkin  wrote:
>> tgsi_rsq appears to ignore the passed-in op and always puts in
>> ALU_OP1_RECIPSQRT_CLAMPED anyways. It also sticks an absolute value on
>> the RSQ() argument. This only happens for eg, not cayman. (Probably
>> why only the rcp_clamped change appeared to be necessary.)
>>
>> This is odd though, because there's no clamping like that in other
>> drivers. The trace you made looks fine on both nvc0 and nv50.
>>
>> On Tue, Nov 7, 2017 at 11:01 PM,   wrote:
>>> From: Roland Scheidegger 
>>>
>>> r600 already used the clamped versions, but for some reason this was
>>> different to eg/cayman.
>>> (Note that it has been different since essentially forever, 7 years, since
>>> df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed
>>> this for r600 but not eg (cayman wasn't supported back then, but probably
>>> copied this from the eg part later). The commit does not mention any reason
>>> why this difference should exist.)
>>> This seems a bit unfortunate, since it would be nice to use ieee arithmetic,
>>> I have no idea what this could potentially break and no idea if it really
>>> makes sense going back to legacy-style rcp/rsq...
>>> This however prevents misrenderings in This War of Mine since using ieee
>>> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking
>>> only rcp_clamped is necessary for this. It seems likely the root cause is
>>> some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it
>>> apparently works with other drivers, not sure what's up with that...
>>>
>>> Bugzilla: 
>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D103544=DwIBaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=FYMO5RZgIVWRluOu5DMEzW2xYcV3A5ZOOioWlZcdonw=b2fwORC2yAK7ezpjud4xeebdQRW-632-SZjTJGI-JHk=
>>> ---
>>>  src/gallium/drivers/r600/r600_shader.c | 16 
>>>  1 file changed, 12 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>>> b/src/gallium/drivers/r600/r600_shader.c
>>> index 6a755bb3fd..62fc4da901 100644
>>> --- a/src/gallium/drivers/r600/r600_shader.c
>>> +++ b/src/gallium/drivers/r600/r600_shader.c
>>> @@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction 
>>> eg_shader_tgsi_instruction[] =
>>> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
>>> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
>>> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
>>> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
>>> tgsi_trans_srcx_replicate},
>>> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
>>> +   /* XXX:
>>> +* For state trackers other than OpenGL, we'll want to use
>>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
>>> +*/
>>> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
>>> tgsi_trans_srcx_replicate},
>>> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq},
>>> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
>>> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
>>> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
>>> @@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction 
>>> cm_shader_tgsi_instruction[] =
>>> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
>>> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
>>> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
>>> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
>>> cayman_emit_float_instr},
>>> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, 
>>> cayman_emit_float_instr},
>>> +   /* XXX:
>>> +* For state trackers other than OpenGL, we'll want to use
>>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
>>> +*/
>>> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
>>> cayman_emit_float_instr},
>>> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, 
>>> cayman_emit_float_instr},
>>> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
>>> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
>>> [TGSI_OPCODE_MUL]   = { 

Re: [Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.

2017-11-08 Thread Roland Scheidegger
Am 08.11.2017 um 07:18 schrieb Ilia Mirkin:
> tgsi_rsq appears to ignore the passed-in op and always puts in
> ALU_OP1_RECIPSQRT_CLAMPED anyways. It also sticks an absolute value on
> the RSQ() argument. This only happens for eg, not cayman. (Probably
> why only the rcp_clamped change appeared to be necessary.)
Right, I noticed that afterwards, it is using the clamped version for
rsq for eg.

> 
> This is odd though, because there's no clamping like that in other
> drivers. The trace you made looks fine on both nvc0 and nv50.
Yes, it also looks fine on llvmpipe, which adheres to strict ieee rules
(or rather, strict dx10 rules for this, but they are mostly identical,
with min/max returning the non-nan, which I'd nearly bet nvc0/nv50 do too).

Roland


> 
> On Tue, Nov 7, 2017 at 11:01 PM,   wrote:
>> From: Roland Scheidegger 
>>
>> r600 already used the clamped versions, but for some reason this was
>> different to eg/cayman.
>> (Note that it has been different since essentially forever, 7 years, since
>> df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed
>> this for r600 but not eg (cayman wasn't supported back then, but probably
>> copied this from the eg part later). The commit does not mention any reason
>> why this difference should exist.)
>> This seems a bit unfortunate, since it would be nice to use ieee arithmetic,
>> I have no idea what this could potentially break and no idea if it really
>> makes sense going back to legacy-style rcp/rsq...
>> This however prevents misrenderings in This War of Mine since using ieee
>> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking
>> only rcp_clamped is necessary for this. It seems likely the root cause is
>> some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it
>> apparently works with other drivers, not sure what's up with that...
>>
>> Bugzilla: 
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D103544=DwIBaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=O3sPiamq_x4GgPnGelOAW-6LG12mV9kyATKu7PI5o10=W1XEpaa39PmD6AFtLpI21D3QTrP7XYVwGWfFYCT0PRM=
>> ---
>>  src/gallium/drivers/r600/r600_shader.c | 16 
>>  1 file changed, 12 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>> b/src/gallium/drivers/r600/r600_shader.c
>> index 6a755bb3fd..62fc4da901 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction 
>> eg_shader_tgsi_instruction[] =
>> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
>> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
>> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
>> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
>> tgsi_trans_srcx_replicate},
>> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
>> +   /* XXX:
>> +* For state trackers other than OpenGL, we'll want to use
>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
>> +*/
>> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
>> tgsi_trans_srcx_replicate},
>> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq},
>> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
>> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
>> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
>> @@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction 
>> cm_shader_tgsi_instruction[] =
>> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
>> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
>> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
>> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
>> cayman_emit_float_instr},
>> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, 
>> cayman_emit_float_instr},
>> +   /* XXX:
>> +* For state trackers other than OpenGL, we'll want to use
>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
>> +*/
>> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
>> cayman_emit_float_instr},
>> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, 
>> cayman_emit_float_instr},
>> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
>> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
>> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
>> --
>> 2.12.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev=DwIBaQ=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=O3sPiamq_x4GgPnGelOAW-6LG12mV9kyATKu7PI5o10=5mjPUHmN5M2pr4cVX5DmE7_sMJfXmAVL27FxPc55SLo=


[Mesa-dev] [PATCH 1/4] r600: use min_dx10/max_dx10 instead of min/max

2017-11-08 Thread sroland
From: Roland Scheidegger 

I believe this is the safe thing to do, especially ever since the driver
actually generates NaNs for muls too.
Albeit since the radeon ISA docs are inaccurate/wrong there, I'm not
entirely sure what the non-dx10 versions do, but (as required by dx10)
the dx10 versions should pick a non-nan source over a nan source.
Other drivers presumably do the same (radeonsi, llvmpipe).
This was shown to make some difference for bug 103544, albeit it is not
required to fix it.
---
 src/gallium/drivers/r600/r600_shader.c  | 12 ++--
 src/gallium/drivers/r600/sb/sb_expr.cpp |  2 ++
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 188fbc9d47..6a755bb3fd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -8844,8 +8844,8 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9042,8 +9042,8 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9265,8 +9265,8 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp 
b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 3dd3a4815b..7a5d62c8e8 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
n.bc.src[0].abs == n.bc.src[1].abs) {
switch (n.bc.op) {
case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
+   case ALU_OP2_MIN_DX10:
case ALU_OP2_MAX:
+   case ALU_OP2_MAX_DX10:
convert_to_mov(n, v0, n.bc.src[0].neg, 
n.bc.src[0].abs);
return fold_alu_op1(n);
case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
-- 
2.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] r600: set the number type correctly for float rts in cb setup

2017-11-08 Thread sroland
From: Roland Scheidegger 

Float rts were always set as unorm instead of float.
Not sure of the consequences, but at least it looks like the blend clamp
would have been enabled, which is against the rules (only eg really bothered
to even attempt to specify this correctly, r600 always used clamp anyway).
Albeit r600 (not r700) setup still looks bugged to me due to never setting
BLEND_FLOAT32 which must be set according to docs...
Not sure if the hw really cares, no piglit change.
---
 src/gallium/drivers/r600/evergreen_state.c |  7 ++-
 src/gallium/drivers/r600/r600_state.c  | 10 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index ddd59dc0b5..ba08f38f8c 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1042,7 +1042,7 @@ static void evergreen_set_color_surface_buffer(struct 
r600_context *rctx,
}
}
ntype = V_028C70_NUMBER_UNORM;
-   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_028C70_NUMBER_SRGB;
else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
if (desc->channel[i].normalized)
@@ -1054,7 +1054,10 @@ static void evergreen_set_color_surface_buffer(struct 
r600_context *rctx,
ntype = V_028C70_NUMBER_UNORM;
else if (desc->channel[i].pure_integer)
ntype = V_028C70_NUMBER_UINT;
+   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+   ntype = V_028C70_NUMBER_FLOAT;
}
+
pitch = (pitch / 8) - 1;
color->pitch = S_028C64_PITCH_TILE_MAX(pitch);
 
@@ -1180,6 +1183,8 @@ static void evergreen_set_color_surface_common(struct 
r600_context *rctx,
ntype = V_028C70_NUMBER_UNORM;
else if (desc->channel[i].pure_integer)
ntype = V_028C70_NUMBER_UINT;
+   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+   ntype = V_028C70_NUMBER_FLOAT;
}
 
if (R600_BIG_ENDIAN)
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index c0d0b1667a..0bda8d5b3f 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -817,7 +817,7 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
unsigned offset;
const struct util_format_description *desc;
int i;
-   bool blend_bypass = 0, blend_clamp = 1, do_endian_swap = FALSE;
+   bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = FALSE;
 
if (rtex->db_compatible && !r600_can_sample_zs(rtex, false)) {
r600_init_flushed_depth_texture(>b.b, surf->base.texture, 
NULL);
@@ -869,6 +869,8 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
ntype = V_0280A0_NUMBER_UNORM;
else if (desc->channel[i].pure_integer)
ntype = V_0280A0_NUMBER_UINT;
+   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+   ntype = V_0280A0_NUMBER_FLOAT;
}
 
if (R600_BIG_ENDIAN)
@@ -883,6 +885,11 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
 
endian = r600_colorformat_endian_swap(format, do_endian_swap);
 
+   /* blend clamp should be set for all NORM/SRGB types */
+   if (ntype == V_0280A0_NUMBER_UNORM || ntype == V_0280A0_NUMBER_SNORM ||
+   ntype == V_0280A0_NUMBER_SRGB)
+   blend_clamp = 1;
+
/* set blend bypass according to docs if SINT/UINT or
   8/24 COLOR variants */
if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT ||
@@ -916,6 +923,7 @@ static void r600_init_color_surface(struct r600_context 
*rctx,
 ntype != V_0280A0_NUMBER_UINT &&
 ntype != V_0280A0_NUMBER_SINT) &&
G_0280A0_BLEND_CLAMP(color_info) &&
+   /* XXX this condition is always true since BLEND_FLOAT32 is 
never set (bug?). */
!G_0280A0_BLEND_FLOAT32(color_info)) {
color_info |= 
S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
surf->export_16bpc = true;
-- 
2.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] r600: use mysterious DX10_CLAMP bit in pixel shader setup

2017-11-08 Thread sroland
From: Roland Scheidegger 

I don't know what this bit really does. The docs are somewhere between
misleading and wrong however, as at least the newer ones (that bit exists with
GCN as well) imply all NaNs would get converted to zeros, which is definitely
NOT the case (and that would not be dx10 compliant neither), the r600 ones are
also talking about "dx10 style" vs "dx9 style" clamp, whatever that means for
dx9... Makes no difference at all with piglit's isinf-and-isnan tests, so very
obviously NaNs are still generated just fine.
radeonsi also seems to set this bit nowadays (the llvm amdgpu code comment
now says "Make clamp modifier on NaN input returns 0" instead of "Do not
clamp NAN to 0" since it was changed).

This prevents misrenderings in This War of Mine since using ieee
muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use
clamped rcp opcode, which would also fix this.

AMD, it would be really really nice if there would be useful/correct/accurate
information about this bit...
The bit can be set for all shader stages, and maybe it should be set but I
really have no idea...

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
---
 src/gallium/drivers/r600/evergreen_state.c | 1 +
 src/gallium/drivers/r600/r600_state.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 96eb35a981..ddd59dc0b5 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3235,6 +3235,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader
r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
 S_028844_NUM_GPRS(rshader->bc.ngpr) |
 S_028844_PRIME_CACHE_ON_DRAW(1) |
+S_028844_DX10_CLAMP(1) |
 S_028844_STACK_SIZE(rshader->bc.nstack));
/* After that, the NOP relocation packet must be emitted (shader->bo, 
RADEON_USAGE_READ). */
 
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index c21e8dabb1..c0d0b1667a 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2548,6 +2548,7 @@ void r600_update_ps_state(struct pipe_context *ctx, 
struct r600_pipe_shader *sha
r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
 S_028850_NUM_GPRS(rshader->bc.ngpr) |
+S_028850_DX10_CLAMP(1) |
 S_028850_STACK_SIZE(rshader->bc.nstack) |
 S_028850_UNCACHED_FIRST_INST(ufi));
r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
-- 
2.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] r600: use ieee version of rcp

2017-11-08 Thread sroland
From: Roland Scheidegger 

r600 used the clamped version for rcp, whereas both evergreen and cayman
used the ieee version. I don't know why that discrepancy exists (it does so
since day 1) but there does not seem to be a valid reason for this, so make
it consistent. This seems now safer than before the previous commit (using
the mystery dx10 clamp).
Note that rsq still uses clamped version (as before even though the table
may have suggested otherwise for evergreen) for r600/eg, but not for cayman.
I just don't feel lucky enough to change this (it should also be noted r600
supports sqrt natively, which is always ieee, therefore might not really see
rsqrt with glsl often presumably).
Compile tested only...
---
 src/gallium/drivers/r600/r600_shader.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6a755bb3fd..628c33787e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -8830,11 +8830,7 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
 
-   /* XXX:
-* For state trackers other than OpenGL, we'll want to use
-* _RECIP_IEEE instead.
-*/
-   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
tgsi_trans_srcx_replicate},
+   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
tgsi_trans_srcx_replicate},
 
[TGSI_OPCODE_RSQ]   = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
@@ -9034,7 +9030,7 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
[TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
tgsi_trans_srcx_replicate},
-   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
+   [TGSI_OPCODE_RSQ]   = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
[TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
-- 
2.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/9] gallium/tgsi: start adding hw atomics (v3.1)

2017-11-08 Thread Ilia Mirkin
Just some naming trivia, not a proper review:

On Wed, Nov 8, 2017 at 8:54 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This adds support for a hw atomic counters to TGSI.
>
> A new register file for storing atomic counters is added,
> along with a new atomic counter semantic, along with docs
> for both.
>
> v2: drop semantic, move hw counter to backend,
> Ilia pointed out SSO would have busted my plan, and he
> was right.
> v3: drop BUFFER decls. (Marek)
> v3.1: minor fixups for whitespace, set ureg error
> if we overflow the hw atomic limits. (nha)
>
> Reviewed-by: Marek Olšák 
> Reviewed-by: Nicolai Hähnle 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  1 +
>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 81 
> ++
>  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 +++
>  src/gallium/docs/source/tgsi.rst   | 33 ++--
>  src/gallium/include/pipe/p_shader_tokens.h |  1 +
>  src/gallium/include/pipe/p_state.h |  1 +
>  6 files changed, 121 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c 
> b/src/gallium/auxiliary/tgsi/tgsi_strings.c
> index 0872db9..4f28b49 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
> @@ -58,6 +58,7 @@ static const char *tgsi_file_names[] =
> "BUFFER",
> "MEMORY",
> "CONSTBUF",
> +   "HWATOMIC",

HW_ATOMIC? (or rename the file?)

>  };
>
>  const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
> b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> index b26434c..4f3ac97 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> @@ -80,6 +80,7 @@ struct ureg_tokens {
>  #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
>  #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
>  #define UREG_MAX_CONSTANT_RANGE 32
> +#define UREG_MAX_HW_ATOMIC_RANGE 32
>  #define UREG_MAX_IMMEDIATE 4096
>  #define UREG_MAX_ADDR 3
>  #define UREG_MAX_ARRAY_TEMPS 256
> @@ -92,6 +93,15 @@ struct const_decl {
> unsigned nr_constant_ranges;
>  };
>
> +struct hw_atomic_decl {
> +   struct {
> +  unsigned first;
> +  unsigned last;
> +  unsigned array_id;
> +   } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
> +   unsigned nr_hw_atomic_ranges;
> +};
> +
>  #define DOMAIN_DECL 0
>  #define DOMAIN_INSN 1
>
> @@ -182,6 +192,8 @@ struct ureg_program
>
> struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
>
> +   struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
> +
> unsigned properties[TGSI_PROPERTY_COUNT];
>
> unsigned nr_addrs;
> @@ -583,6 +595,30 @@ out:
> return ureg_src_dimension(src, 0);
>  }
>
> +
> +/* Returns a new hw atomic register.  Keep track of which have been
> + * referred to so that we can emit decls later.
> + */
> +void
> +ureg_DECL_hw_atomic(struct ureg_program *ureg,
> +unsigned first,
> +unsigned last,
> +unsigned buffer_id,
> +unsigned array_id)
> +{
> +   struct hw_atomic_decl *decl = >hw_atomic_decls[buffer_id];
> +
> +   if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
> +  uint i = decl->nr_hw_atomic_ranges++;
> +
> +  decl->hw_atomic_range[i].first = first;
> +  decl->hw_atomic_range[i].last = last;
> +  decl->hw_atomic_range[i].array_id = array_id;
> +   } else {
> +  set_bad(ureg);
> +   }
> +}
> +
>  static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
>  boolean local )
>  {
> @@ -1501,6 +1537,35 @@ emit_decl_semantic(struct ureg_program *ureg,
> }
>  }
>
> +static void
> +emit_decl_atomic_2d(struct ureg_program *ureg,
> +unsigned first,
> +unsigned last,
> +unsigned index2D,
> +unsigned array_id)
> +{
> +   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 
> 3);
> +
> +   out[0].value = 0;
> +   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
> +   out[0].decl.NrTokens = 3;
> +   out[0].decl.File = TGSI_FILE_HW_ATOMIC;
> +   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
> +   out[0].decl.Dimension = 1;
> +   out[0].decl.Array = array_id != 0;
> +
> +   out[1].value = 0;
> +   out[1].decl_range.First = first;
> +   out[1].decl_range.Last = last;
> +
> +   out[2].value = 0;
> +   out[2].decl_dim.Index2D = index2D;
> +
> +   if (array_id) {
> +  out[3].value = 0;
> +  out[3].array.ArrayID = array_id;
> +   }
> +}
>
>  static void
>  emit_decl_fs(struct ureg_program *ureg,
> @@ -1908,6 +1973,22 @@ static void emit_decls( struct ureg_program *ureg )
>}
> }
>
> +   for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
> +  struct hw_atomic_decl *decl = 

[Mesa-dev] [PATCH 5/9] st/mesa: start adding support for hw atomics atom. (v2)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This adds a new atom that calls the new driver API to
bind buffers containing hw atomics.

v2: fixup bindings for sparse buffers. (mareko/nha)
don't bind buffer atomics when hw atomics are enabled.
use NewAtomicBuffer (mareko)

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_atom_atomicbuf.c   | 41 ++--
 src/mesa/state_tracker/st_atom_list.h|  2 ++
 src/mesa/state_tracker/st_cb_bufferobjects.c |  2 +-
 src/mesa/state_tracker/st_context.c  |  9 +-
 src/mesa/state_tracker/st_context.h  |  1 +
 5 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c 
b/src/mesa/state_tracker/st_atom_atomicbuf.c
index ee5944f..c502ba6 100644
--- a/src/mesa/state_tracker/st_atom_atomicbuf.c
+++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
@@ -46,7 +46,7 @@ st_bind_atomics(struct st_context *st, struct gl_program 
*prog,
 {
unsigned i;
 
-   if (!prog || !st->pipe->set_shader_buffers)
+   if (!prog || !st->pipe->set_shader_buffers || st->has_hw_atomics)
   return;
 
for (i = 0; i < prog->sh.data->NumAtomicBuffers; i++) {
@@ -63,7 +63,7 @@ st_bind_atomics(struct st_context *st, struct gl_program 
*prog,
  sb.buffer_offset = binding->Offset;
  sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
 
-/* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+ /* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
   * Take the minimum just to be sure.
   */
  if (!binding->AutomaticSize)
@@ -128,3 +128,40 @@ st_bind_cs_atomics(struct st_context *st)
 
st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE);
 }
+
+void
+st_bind_hw_atomic_buffers(struct st_context *st)
+{
+   struct pipe_shader_buffer buffers[PIPE_MAX_HW_ATOMIC_BUFFERS];
+   int i;
+   int num_buffers = 0;
+
+   if (!st->has_hw_atomics)
+  return;
+
+   for (i = 0; i < st->ctx->Const.MaxAtomicBufferBindings; i++) {
+  struct gl_buffer_binding *binding = >ctx->AtomicBufferBindings[i];
+  struct st_buffer_object *st_obj = 
st_buffer_object(binding->BufferObject);
+  struct pipe_shader_buffer *sb = [num_buffers];
+
+  if (st_obj && st_obj->buffer) {
+sb->buffer = st_obj->buffer;
+sb->buffer_offset = binding->Offset;
+sb->buffer_size = st_obj->buffer->width0 - binding->Offset;
+
+/* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+ * Take the minimum just to be sure.
+ */
+if (!binding->AutomaticSize)
+  sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size);
+  } else {
+sb->buffer = NULL;
+sb->buffer_offset = 0;
+sb->buffer_size = 0;
+  }
+  num_buffers++;
+   }
+
+   st->pipe->set_hw_atomic_buffers(st->pipe, 0, num_buffers,
+   buffers);
+}
diff --git a/src/mesa/state_tracker/st_atom_list.h 
b/src/mesa/state_tracker/st_atom_list.h
index b76854e..8f50a72 100644
--- a/src/mesa/state_tracker/st_atom_list.h
+++ b/src/mesa/state_tracker/st_atom_list.h
@@ -66,6 +66,8 @@ ST_STATE(ST_NEW_GS_SSBOS, st_bind_gs_ssbos)
 ST_STATE(ST_NEW_PIXEL_TRANSFER, st_update_pixel_transfer)
 ST_STATE(ST_NEW_TESS_STATE, st_update_tess)
 
+ST_STATE(ST_NEW_HW_ATOMICS, st_bind_hw_atomic_buffers)
+
 /* this must be done after the vertex program update */
 ST_STATE(ST_NEW_VERTEX_ARRAYS, st_update_array)
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 86ebfc6..a9104a9 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -348,7 +348,7 @@ bufferobj_data(struct gl_context *ctx,
if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
   ctx->NewDriverState |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS;
if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-  ctx->NewDriverState |= ST_NEW_ATOMIC_BUFFER;
+  ctx->NewDriverState |= ctx->DriverFlags.NewAtomicBuffer;
 
return GL_TRUE;
 }
diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 5d8dd8b..e82090b 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -405,6 +405,10 @@ st_create_context_priv( struct gl_context *ctx, struct 
pipe_context *pipe,
st->has_multi_draw_indirect =
   screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT);
 
+   st->has_hw_atomics =
+  screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
+   PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS) ? true 
: false;
+
/* GL limits and extensions */
st_init_limits(pipe->screen, >Const, >Extensions);
st_init_extensions(pipe->screen, >Const,
@@ -497,7 +501,10 @@ static void st_init_driver_flags(struct st_context *st)
 
/* Shader resources */
f->NewTextureBuffer = 

[Mesa-dev] [PATCH 8/9] r600: add support for hw atomic counters. (v3)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This adds support for the evergreen/cayman atomic counters.

These are implemented using GDS append/consume counters. The values
for each counter are loaded before drawing and saved after each draw
using special CP packets.

v2: move hw atomic assignment into driver.
v3: fix messing up caps (Gert Wollny), only store ranges in driver,
drop buffers.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/evergreen_state.c   | 159 ++
 src/gallium/drivers/r600/r600_pipe.c |  15 ++
 src/gallium/drivers/r600/r600_pipe.h |  22 +++
 src/gallium/drivers/r600/r600_shader.c   | 239 ---
 src/gallium/drivers/r600/r600_shader.h   |  19 +++
 src/gallium/drivers/r600/r600_state_common.c |  46 ++
 src/gallium/drivers/r600/r600d_common.h  |   2 +
 7 files changed, 480 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 96eb35a..634cd96 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3716,6 +3716,38 @@ static void evergreen_set_tess_state(struct pipe_context 
*ctx,
rctx->tess_state_dirty = true;
 }
 
+static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
+   unsigned start_slot,
+   unsigned count,
+   const struct pipe_shader_buffer 
*buffers)
+{
+   struct r600_context *rctx = (struct r600_context *)ctx;
+   struct r600_atomic_buffer_state *astate;
+   int i, idx;
+
+   astate = >atomic_buffer_state;
+
+   /* we'd probably like to expand this to 8 later so put the logic in */
+   for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+   const struct pipe_shader_buffer *buf;
+   struct pipe_shader_buffer *abuf;
+
+   abuf = >buffer[i];
+
+   if (!buffers || !buffers[idx].buffer) {
+   pipe_resource_reference(>buffer, NULL);
+   astate->enabled_mask &= ~(1 << i);
+   continue;
+   }
+   buf = [idx];
+
+   pipe_resource_reference(>buffer, buf->buffer);
+   abuf->buffer_offset = buf->buffer_offset;
+   abuf->buffer_size = buf->buffer_size;
+   astate->enabled_mask |= (1 << i);
+   }
+}
+
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
unsigned id = 1;
@@ -3801,6 +3833,7 @@ void evergreen_init_state_functions(struct r600_context 
*rctx)
rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
rctx->b.b.set_min_samples = evergreen_set_min_samples;
rctx->b.b.set_tess_state = evergreen_set_tess_state;
+   rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers;
if (rctx->b.chip_class == EVERGREEN)
 rctx->b.b.get_sample_position = evergreen_get_sample_position;
 else
@@ -4107,3 +4140,129 @@ void eg_trace_emit(struct r600_context *rctx)
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id));
 }
+
+bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+   struct r600_shader_atomic 
*combined_atomics,
+   uint8_t *atomic_used_mask_p)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   struct r600_atomic_buffer_state *astate = >atomic_buffer_state;
+   unsigned pkt_flags = 0;
+   uint8_t atomic_used_mask = 0;
+   int i, j, k;
+
+   for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+   uint8_t num_atomic_stage;
+   struct r600_pipe_shader *pshader;
+
+   pshader = rctx->hw_shader_stages[i].shader;
+   if (!pshader)
+   continue;
+
+   num_atomic_stage = pshader->shader.nhwatomic_ranges;
+   if (!num_atomic_stage)
+   continue;
+
+   for (j = 0; j < num_atomic_stage; j++) {
+   struct r600_shader_atomic *atomic = 
>shader.atomics[j];
+   int natomics = atomic->end - atomic->start + 1;
+
+   for (k = 0; k < natomics; k++) {
+   /* seen this in a previous stage */
+   if (atomic_used_mask & (1u << (atomic->hw_idx + 
k)))
+   continue;
+
+   combined_atomics[atomic->hw_idx + k].hw_idx = 
atomic->hw_idx + k;
+   combined_atomics[atomic->hw_idx + k].buffer_id 
= atomic->buffer_id;
+   combined_atomics[atomic->hw_idx + k].start = 
atomic->start + k;
+   

[Mesa-dev] [PATCH 9/9] docs: update r600 atomic counter status.

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 docs/features.txt | 6 +++---
 docs/relnotes/17.4.0.html | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 10ccf9d..86d07ba 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -150,7 +150,7 @@ GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, radeonsi
 
   GL_ARB_texture_compression_bptc   DONE (i965, r600)
   GL_ARB_compressed_texture_pixel_storage   DONE (all drivers)
-  GL_ARB_shader_atomic_counters DONE (i965, softpipe)
+  GL_ARB_shader_atomic_counters DONE (i965, r600, 
softpipe)
   GL_ARB_texture_storageDONE (all drivers)
   GL_ARB_transform_feedback_instanced   DONE (i965, nv50, 
r600, llvmpipe, softpipe, swr)
   GL_ARB_base_instance  DONE (i965, nv50, 
r600, llvmpipe, softpipe, swr)
@@ -227,7 +227,7 @@ GL 4.6, GLSL 4.60
   GL_ARB_indirect_parametersDONE (i965/gen7+, 
nvc0, radeonsi)
   GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
radeonsi, llvmpipe, softpipe, swr)
   GL_ARB_polygon_offset_clamp   DONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, swr)
-  GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
+  GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, r600, radeonsi, softpipe)
   GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
   GL_ARB_shader_group_vote  DONE (i965, nvc0, 
radeonsi)
   GL_ARB_spirv_extensions   in progress (Nicolai 
Hähnle, Ian Romanick)
@@ -246,7 +246,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
   GL_ARB_explicit_uniform_location  DONE (all drivers that 
support GLSL)
   GL_ARB_framebuffer_no_attachments DONE (i965/gen7+, 
r600, softpipe)
   GL_ARB_program_interface_queryDONE (all drivers)
-  GL_ARB_shader_atomic_counters DONE (i965/gen7+, 
softpipe)
+  GL_ARB_shader_atomic_counters DONE (i965/gen7+, 
r600, softpipe)
   GL_ARB_shader_image_load_storeDONE (i965/gen7+, 
softpipe)
   GL_ARB_shader_image_size  DONE (i965/gen7+, 
softpipe)
   GL_ARB_shader_storage_buffer_object   DONE (i965/gen7+, 
softpipe)
diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
index f81b5bd..19e0c80 100644
--- a/docs/relnotes/17.4.0.html
+++ b/docs/relnotes/17.4.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE 
environment variable is set to "0" or "false"
+GL_ARB_shader_atomic_counters and GL_ARB_shader_atomic_counter_ops on 
r600/evergreen+
 
 
 Bug fixes
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/9] st/mesa: setup hw atomic limits. (v1.1)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

HW atomics need to use caps to set some limits, and some
other limits may also need limiting.

This fixes things up to work for evergreen hw, it may need
more changes in the future if other hw wants to use this path.

v1.1: fix indent.

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_extensions.c | 45 ++
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index fa2d002..d4b8dc9 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -79,6 +79,7 @@ void st_init_limits(struct pipe_screen *screen,
unsigned sh;
boolean can_ubo = TRUE;
int temp;
+   bool ssbo_atomic = true;
 
c->MaxTextureLevels
   = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
@@ -243,11 +244,21 @@ void st_init_limits(struct pipe_screen *screen,
   c->MaxUniformBlockSize / 4 *
   pc->MaxUniformBlocks);
 
-  pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
-  pc->MaxAtomicBuffers = screen->get_shader_param(
-screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
-  pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
-
+  temp = screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS);
+  if (temp) {
+ /*
+  * for separate atomic counters get the actual hw limits
+  * per stage on atomic counters and buffers
+  */
+ ssbo_atomic = false;
+ pc->MaxAtomicCounters = temp;
+ pc->MaxAtomicBuffers = screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS);
+ pc->MaxShaderStorageBlocks = screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
+  } else {
+ pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+ pc->MaxAtomicBuffers = screen->get_shader_param(screen, sh, 
PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
+ pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
+  }
   pc->MaxImageUniforms = screen->get_shader_param(
 screen, sh, PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
 
@@ -407,14 +418,26 @@ void st_init_limits(struct pipe_screen *screen,
   screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
 
c->MaxAtomicBufferBindings =
- c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
-   c->MaxCombinedAtomicBuffers =
+  c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+
+   if (!ssbo_atomic) {
+  /* for separate atomic buffers - there atomic buffer size will be
+ limited */
+  c->MaxAtomicBufferSize = 
c->Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters * ATOMIC_COUNTER_SIZE;
+  /* on all HW with separate atomic (evergreen) the following
+ lines are true. not sure it's worth adding CAPs for this at this
+ stage. */
+  c->MaxCombinedAtomicCounters = 
c->Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
+  c->MaxCombinedAtomicBuffers = 
c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+   } else {
+  c->MaxCombinedAtomicBuffers =
  c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers +
  c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers +
  c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers +
  c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers +
  c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
-   assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
+  assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
+   }
 
if (c->MaxCombinedAtomicBuffers > 0) {
   extensions->ARB_shader_atomic_counters = GL_TRUE;
@@ -425,8 +448,10 @@ void st_init_limits(struct pipe_screen *screen,
c->ShaderStorageBufferOffsetAlignment =
   screen->get_param(screen, PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT);
if (c->ShaderStorageBufferOffsetAlignment) {
-  c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings =
- c->MaxCombinedAtomicBuffers;
+  /* for hw atomic counters leaves these at default for now */
+  if (ssbo_atomic)
+ c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings 
=
+c->MaxCombinedAtomicBuffers;
   c->MaxCombinedShaderOutputResources +=
  c->MaxCombinedShaderStorageBlocks;
   c->MaxShaderStorageBlockSize = 1 << 27;
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] gallium: add hw atomic buffer binding API.

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This API binds atomic buffers for all bound shaders (as per the
GL semantics).

This is needed to support cross shader hw atomic counters.

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
Signed-off-by: Dave Airlie 
---
 src/gallium/docs/source/context.rst  |  8 
 src/gallium/include/pipe/p_context.h | 16 
 2 files changed, 24 insertions(+)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index ba7fef8..5898157 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -145,6 +145,14 @@ to the array index which is used for sampling.
 * ``sampler_view_destroy`` destroys a sampler view and releases its reference
   to associated texture.
 
+Hardware Atomic buffers
+^^^
+
+Buffers containing hw atomics are required to support the feature
+on some drivers.
+
+Drivers that require this need to fill the ``set_hw_atomic_buffers`` method.
+
 Shader Resources
 
 
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 4609d4d..c2153f7 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -332,6 +332,22 @@ struct pipe_context {
   const struct pipe_shader_buffer *buffers);
 
/**
+* Bind an array of hw atomic buffers for use by all shaders.
+* And buffers that were previously bound to the specified range
+* will be unbound.
+*
+* \param start_slot first buffer slot to bind.
+* \param count  number of consecutive buffers to bind.
+* \param buffersarray of pointers to the buffers to bind, it
+*   should contain at least \a count elements
+*   unless it's NULL, in which case no buffers will
+*   be bound.
+*/
+   void (*set_hw_atomic_buffers)(struct pipe_context *,
+ unsigned start_slot, unsigned count,
+ const struct pipe_shader_buffer *buffers);
+
+   /**
 * Bind an array of images that will be used by a shader.
 * Any images that were previously bound to the specified range
 * will be unbound.
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] st/mesa: add support for hw atomics to glsl->tgsi. (v4)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This adds support for creating the hw atomic tgsi from
the glsl codepaths.

v2: drop the atomic index and move to backend.
v3: drop buffer decls. (Marek)
v4: fix off by one (Gert)

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 100 -
 1 file changed, 85 insertions(+), 15 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index fd9df61..472a6c7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -152,6 +152,13 @@ find_array_type(struct inout_decl *decls, unsigned count, 
unsigned array_id)
return GLSL_TYPE_ERROR;
 }
 
+struct hwatomic_decl {
+   unsigned location;
+   unsigned binding;
+   unsigned size;
+   unsigned array_id;
+};
+
 struct glsl_to_tgsi_visitor : public ir_visitor {
 public:
glsl_to_tgsi_visitor();
@@ -176,6 +183,9 @@ public:
unsigned num_outputs;
unsigned num_output_arrays;
 
+   struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS];
+   unsigned num_atomics;
+   unsigned num_atomic_arrays;
int num_address_regs;
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
@@ -3206,24 +3216,64 @@ 
glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
exec_node *param = ir->actual_parameters.get_head();
ir_dereference *deref = static_cast(param);
ir_variable *location = deref->variable_referenced();
-
-   st_src_reg buffer(
- PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
-
+   bool has_hw_atomics = st_context(ctx)->has_hw_atomics;
/* Calculate the surface offset */
st_src_reg offset;
unsigned array_size = 0, base = 0;
uint16_t index = 0;
+   st_src_reg resource;
 
get_deref_offsets(deref, _size, , , , false);
 
-   if (offset.file != PROGRAM_UNDEFINED) {
-  emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
-   offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
-  emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
-   offset, st_src_reg_for_int(location->data.offset + index * 
ATOMIC_COUNTER_SIZE));
+   if (has_hw_atomics) {
+  variable_storage *entry = find_variable_storage(location);
+  st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, 
location->data.binding);
+
+  if (!entry) {
+ entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC,
+   num_atomics);
+ _mesa_hash_table_insert(this->variables, location, entry);
+
+ atomic_info[num_atomics].location = location->data.location;
+ atomic_info[num_atomics].binding = location->data.binding;
+ atomic_info[num_atomics].size = 
location->type->arrays_of_arrays_size();
+ atomic_info[num_atomics].array_id = 0;
+ num_atomics++;
+  }
+
+  if (offset.file != PROGRAM_UNDEFINED) {
+ if (atomic_info[entry->index].array_id == 0) {
+num_atomic_arrays++;
+atomic_info[entry->index].array_id = num_atomic_arrays;
+ }
+ buffer.array_id = atomic_info[entry->index].array_id;
+  }
+
+  buffer.index = index;
+  buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE;
+  buffer.has_index2 = true;
+
+  if (offset.file != PROGRAM_UNDEFINED) {
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ *buffer.reladdr = offset;
+ emit_arl(ir, sampler_reladdr, offset);
+  }
+  offset = st_src_reg_for_int(0);
+
+  resource = buffer;
} else {
-  offset = st_src_reg_for_int(location->data.offset + index * 
ATOMIC_COUNTER_SIZE);
+  st_src_reg buffer(PROGRAM_BUFFER, location->data.binding,
+GLSL_TYPE_ATOMIC_UINT);
+
+  if (offset.file != PROGRAM_UNDEFINED) {
+ emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
+  offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
+  offset, st_src_reg_for_int(location->data.offset + index * 
ATOMIC_COUNTER_SIZE));
+  } else {
+ offset = st_src_reg_for_int(location->data.offset + index * 
ATOMIC_COUNTER_SIZE);
+  }
+  resource = buffer;
}
 
ir->return_deref->accept(this);
@@ -3286,7 +3336,7 @@ 
glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
   inst = emit_asm(ir, opcode, dst, offset, data, data2);
}
 
-   inst->resource = buffer;
+   inst->resource = resource;
 }
 
 void
@@ -4388,6 +4438,8 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
num_outputs = 0;
num_input_arrays = 0;
num_output_arrays = 0;
+   num_atomics = 0;
+   num_atomic_arrays = 0;
num_immediates = 0;
num_address_regs = 0;
samplers_used = 0;
@@ 

[Mesa-dev] [PATCH 2/9] gallium/tgsi: start adding hw atomics (v3.1)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This adds support for a hw atomic counters to TGSI.

A new register file for storing atomic counters is added,
along with a new atomic counter semantic, along with docs
for both.

v2: drop semantic, move hw counter to backend,
Ilia pointed out SSO would have busted my plan, and he
was right.
v3: drop BUFFER decls. (Marek)
v3.1: minor fixups for whitespace, set ureg error
if we overflow the hw atomic limits. (nha)

Reviewed-by: Marek Olšák 
Reviewed-by: Nicolai Hähnle 
Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_strings.c  |  1 +
 src/gallium/auxiliary/tgsi/tgsi_ureg.c | 81 ++
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |  7 +++
 src/gallium/docs/source/tgsi.rst   | 33 ++--
 src/gallium/include/pipe/p_shader_tokens.h |  1 +
 src/gallium/include/pipe/p_state.h |  1 +
 6 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c 
b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 0872db9..4f28b49 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -58,6 +58,7 @@ static const char *tgsi_file_names[] =
"BUFFER",
"MEMORY",
"CONSTBUF",
+   "HWATOMIC",
 };
 
 const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index b26434c..4f3ac97 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -80,6 +80,7 @@ struct ureg_tokens {
 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
 #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
 #define UREG_MAX_CONSTANT_RANGE 32
+#define UREG_MAX_HW_ATOMIC_RANGE 32
 #define UREG_MAX_IMMEDIATE 4096
 #define UREG_MAX_ADDR 3
 #define UREG_MAX_ARRAY_TEMPS 256
@@ -92,6 +93,15 @@ struct const_decl {
unsigned nr_constant_ranges;
 };
 
+struct hw_atomic_decl {
+   struct {
+  unsigned first;
+  unsigned last;
+  unsigned array_id;
+   } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
+   unsigned nr_hw_atomic_ranges;
+};
+
 #define DOMAIN_DECL 0
 #define DOMAIN_INSN 1
 
@@ -182,6 +192,8 @@ struct ureg_program
 
struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
 
+   struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
+
unsigned properties[TGSI_PROPERTY_COUNT];
 
unsigned nr_addrs;
@@ -583,6 +595,30 @@ out:
return ureg_src_dimension(src, 0);
 }
 
+
+/* Returns a new hw atomic register.  Keep track of which have been
+ * referred to so that we can emit decls later.
+ */
+void
+ureg_DECL_hw_atomic(struct ureg_program *ureg,
+unsigned first,
+unsigned last,
+unsigned buffer_id,
+unsigned array_id)
+{
+   struct hw_atomic_decl *decl = >hw_atomic_decls[buffer_id];
+
+   if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
+  uint i = decl->nr_hw_atomic_ranges++;
+
+  decl->hw_atomic_range[i].first = first;
+  decl->hw_atomic_range[i].last = last;
+  decl->hw_atomic_range[i].array_id = array_id;
+   } else {
+  set_bad(ureg);
+   }
+}
+
 static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
 boolean local )
 {
@@ -1501,6 +1537,35 @@ emit_decl_semantic(struct ureg_program *ureg,
}
 }
 
+static void
+emit_decl_atomic_2d(struct ureg_program *ureg,
+unsigned first,
+unsigned last,
+unsigned index2D,
+unsigned array_id)
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 3;
+   out[0].decl.File = TGSI_FILE_HW_ATOMIC;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+   out[0].decl.Dimension = 1;
+   out[0].decl.Array = array_id != 0;
+
+   out[1].value = 0;
+   out[1].decl_range.First = first;
+   out[1].decl_range.Last = last;
+
+   out[2].value = 0;
+   out[2].decl_dim.Index2D = index2D;
+
+   if (array_id) {
+  out[3].value = 0;
+  out[3].array.ArrayID = array_id;
+   }
+}
 
 static void
 emit_decl_fs(struct ureg_program *ureg,
@@ -1908,6 +1973,22 @@ static void emit_decls( struct ureg_program *ureg )
   }
}
 
+   for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
+  struct hw_atomic_decl *decl = >hw_atomic_decls[i];
+
+  if (decl->nr_hw_atomic_ranges) {
+ uint j;
+
+ for (j = 0; j < decl->nr_hw_atomic_ranges; j++) {
+emit_decl_atomic_2d(ureg,
+decl->hw_atomic_range[j].first,
+decl->hw_atomic_range[j].last,
+i,
+decl->hw_atomic_range[j].array_id);
+ }
+  }
+   }
+

[Mesa-dev] [PATCH 4/9] mesa/program: add hw atomic counter file

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This is needed for the GLSL->TGSI translation for hw atomic counters.

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/mtypes.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index d092630..b905a26 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2060,6 +2060,7 @@ typedef enum
PROGRAM_BUFFER,  /**< for shader buffers, compile-time only */
PROGRAM_MEMORY,  /**< for shared, global and local memory */
PROGRAM_IMAGE,   /**< for shader images, compile-time only */
+   PROGRAM_HW_ATOMIC,   /**< for hw atomic counters, compile-time only */
PROGRAM_FILE_MAX
 } gl_register_file;
 
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] gallium/r600 atomic - v4

2017-11-08 Thread Dave Airlie
Hopefully last pass, a few fixes in here, patch 5 is the only
outstanding non-reviewed one, I think I've fixed the sparse
buffer binding in it well enough, there is also fix for Gert's
off-by one.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] gallium: add CAPs to support HW atomic counters. (v3)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This looks like an evergreen specific feature, but with atomic
counters AMD have hw specific counters they use instead of operating
on buffers directly. These are separate to the buffer atomics,
so require different limits and code paths.

I've left the CAP for atomic type extensible in case someone
else has a variant on this sort of thing (freedreno maybe?)
and needs to change it.

This adds all the CAPs required to add support for those atomic
counters, along with a related CAP for limiting the number of
output resources.

I'd like to land this and the st patch then I can start to
upstream the evergreen support for these and other GL4.x features.

v2: drop the ATOMIC_COUNTER_MODE cap, just use the return
from the HW counters. If 0 we use the current mode.
v3: fix some rebase errors (Gert Wollny)

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/gallivm/lp_bld_limits.h| 2 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.h   | 2 ++
 src/gallium/docs/source/screen.rst   | 5 -
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 2 ++
 src/gallium/drivers/freedreno/freedreno_screen.c | 2 ++
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 2 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 2 ++
 src/gallium/drivers/r300/r300_screen.c   | 2 ++
 src/gallium/drivers/r600/r600_pipe.c | 2 ++
 src/gallium/drivers/radeonsi/si_pipe.c   | 2 ++
 src/gallium/drivers/svga/svga_screen.c   | 4 
 src/gallium/drivers/vc4/vc4_screen.c | 2 ++
 src/gallium/drivers/virgl/virgl_screen.c | 2 ++
 src/gallium/include/pipe/p_defines.h | 2 ++
 15 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index ea320bb..c7755bf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -140,6 +140,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
   return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
   return 32;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 514c69e..ad920dc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -541,6 +541,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
   return 0;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
   return PIPE_MAX_SHADER_BUFFERS;
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 9f00059..519728f 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -519,7 +519,10 @@ MOV OUT[0], CONST[0][3]  # copy vector 3 of constbuf 0
 * ``PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS``: Whether the merge registers
   TGSI pass is skipped. This might reduce code size and register pressure if
   the underlying driver has a real backend compiler.
-
+* ``PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS``: If atomic counters are separate,
+  how many HW counters are available for this stage. (0 uses SSBO atomics).
+* ``PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS``: If atomic counters are
+  separate, how many atomic counter buffers are available for this stage.
 
 .. _pipe_compute_cap:
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index b0c4b7b..e3de442 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -459,6 +459,8 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
   return 0;
}
 
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index e5504b6..bc66dab 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -554,6 +554,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
return 32;
case 

Re: [Mesa-dev] [PATCH 2/4] st/mesa: use enum types instead of int/unsigned (v3)

2017-11-08 Thread Ian Romanick
On 11/08/2017 09:08 AM, Erik Faye-Lund wrote:
> On Wed, Nov 8, 2017 at 1:07 AM, Brian Paul  wrote:
>> Use the proper enum types for various variables.  Makes life in gdb
>> a little nicer.  Note that the size of enum bitfields must be one
>> larger so the high bit is always zero (for MSVC).
> 
> You *could* also do something like this on MSVC to get unsigned enum
> values, thus not needing the extra bit:
> 
> ---8<---
> #include 
> 
> #ifdef _MSC_VER
> #define FORCE_UNSIGNED : unsigned
> #else
> #define FORCE_UNSIGNED
> #endif
> 
> enum Foo FORCE_UNSIGNED {

Looking at
https://stackoverflow.com/questions/837319/packing-enums-using-the-msvc-compiler,
I think some variation of this may solve the attribute((packed)) problem
too.  See my other e-mail.  Basically an enum can be forced to be a type
of a particular size using this technique.  Maybe a ENUM_8BITS that is
": unsigned char" on MSVC and "__attribute__((__packed__))" on GCC?

>FOO_A = 1,
>FOO_B = 255
> };
> 
> struct Bar {
>Foo foo : 8;
> };
> 
> int main()
> {
>Bar foo;
>foo.foo = FOO_B;
>printf("%d\n", foo.foo);
>return 0;
> }
> ---8<---
> 
> This outputs 255 on MSVC.
> 
> It's not beautiful, though.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)

2017-11-08 Thread Ian Romanick
Any thoughts about my data using __attribute__((__packed__))?

On 11/07/2017 04:07 PM, Brian Paul wrote:
> Declare glsl_type::sampled_type as glsl_base_type as we do for the
> base_type field.  And make base_type a bitfield to save a few bytes.
> 
> Update glsl_type constructor to take glsl_base_type instead of unsigned
> and pass GLSL_TYPE_VOID instead of zero.
> 
> No Piglit regressions with llvmpipe.
> 
> v2:
> - Declare both base_type and sampled_type as 8-bit fields
> - Use the new ASSERT_BITFIELD_SIZE() macro.
> ---
>  src/compiler/glsl_types.cpp | 30 +++---
>  src/compiler/glsl_types.h   | 28 +---
>  2 files changed, 36 insertions(+), 22 deletions(-)
> 
> diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
> index 704b63c..107a81f 100644
> --- a/src/compiler/glsl_types.cpp
> +++ b/src/compiler/glsl_types.cpp
> @@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type,
>   glsl_base_type base_type, unsigned vector_elements,
>   unsigned matrix_columns, const char *name) :
> gl_type(gl_type),
> -   base_type(base_type),
> +   base_type(base_type), sampled_type(GLSL_TYPE_VOID),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> -   sampled_type(0), interface_packing(0), interface_row_major(0),
> +   interface_packing(0), interface_row_major(0),
> vector_elements(vector_elements), matrix_columns(matrix_columns),
> length(0)
>  {
> @@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type,
>  
>  glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
>   enum glsl_sampler_dim dim, bool shadow, bool array,
> - unsigned type, const char *name) :
> + glsl_base_type type, const char *name) :
> gl_type(gl_type),
> -   base_type(base_type),
> +   base_type(base_type), sampled_type(type),
> sampler_dimensionality(dim), sampler_shadow(shadow),
> -   sampler_array(array), sampled_type(type), interface_packing(0),
> +   sampler_array(array), interface_packing(0),
> interface_row_major(0), length(0)
>  {
> mtx_lock(_type::mem_mutex);
> @@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type 
> base_type,
>  glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
>   const char *name) :
> gl_type(0),
> -   base_type(GLSL_TYPE_STRUCT),
> +   base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> -   sampled_type(0), interface_packing(0), interface_row_major(0),
> +   interface_packing(0), interface_row_major(0),
> vector_elements(0), matrix_columns(0),
> length(num_fields)
>  {
> @@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
> unsigned num_fields,
>   enum glsl_interface_packing packing,
>   bool row_major, const char *name) :
> gl_type(0),
> -   base_type(GLSL_TYPE_INTERFACE),
> +   base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> -   sampled_type(0), interface_packing((unsigned) packing),
> +   interface_packing((unsigned) packing),
> interface_row_major((unsigned) row_major),
> vector_elements(0), matrix_columns(0),
> length(num_fields)
> @@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
> unsigned num_fields,
>  glsl_type::glsl_type(const glsl_type *return_type,
>   const glsl_function_param *params, unsigned num_params) 
> :
> gl_type(0),
> -   base_type(GLSL_TYPE_FUNCTION),
> +   base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> -   sampled_type(0), interface_packing(0), interface_row_major(0),
> +   interface_packing(0), interface_row_major(0),
> vector_elements(0), matrix_columns(0),
> length(num_params)
>  {
> @@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type,
>  
>  glsl_type::glsl_type(const char *subroutine_name) :
> gl_type(0),
> -   base_type(GLSL_TYPE_SUBROUTINE),
> +   base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> -   sampled_type(0), interface_packing(0), interface_row_major(0),
> +   interface_packing(0), interface_row_major(0),
> vector_elements(1), matrix_columns(1),
> length(0)
>  {
> @@ -442,9 +442,9 @@ _mesa_glsl_release_types(void)
>  
>  
>  glsl_type::glsl_type(const glsl_type *array, unsigned length) :
> -   base_type(GLSL_TYPE_ARRAY),
> +   base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID),
> sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
> -   sampled_type(0), interface_packing(0), interface_row_major(0),
> +   interface_packing(0), interface_row_major(0),
> vector_elements(0), 

Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default

2017-11-08 Thread Timothy Arceri

Reviewed-by: Timothy Arceri 

Mark may want to consider adding some of the once a day type CI runs for 
this. For example running the test suite for two consecutive runs on the 
same build so that the second run uses the shader cache and also a 
second run the uses MESA_GLSL=cache_fb to force testing of the cache 
fallback path.


On 09/11/17 11:58, Jordan Justen wrote:

f9d5a7add42af5a2e4410526d1480a08f41317ae along with
a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one
known regression with shader cache. (Deus Ex instability.)

We should enable the shader cache by default to stabilize it before
the next major Mesa release.

Signed-off-by: Jordan Justen 
---
  docs/relnotes/17.4.0.html  | 2 +-
  src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 ---
  2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
index f81b5bd62d3..48dcd5cce38 100644
--- a/docs/relnotes/17.4.0.html
+++ b/docs/relnotes/17.4.0.html
@@ -44,7 +44,7 @@ Note: some of the new features are only available with 
certain drivers.
  
  
  

-Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE environment variable is set to 
"0" or "false"
+Disk shader cache support for i965
  
  
  Bug fixes

diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index 853ea98af03..cd0524c5cbf 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -420,9 +420,6 @@ void
  brw_disk_cache_init(struct brw_context *brw)
  {
  #ifdef ENABLE_SHADER_CACHE
-   if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true))
-  return;
-
 char renderer[10];
 MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
 brw->screen->deviceID);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gallium/r600 hw atomic support (v3)

2017-11-08 Thread Dave Airlie
On 7 November 2017 at 20:45, Gert Wollny  wrote:
> Am Dienstag, den 07.11.2017, 16:30 +1000 schrieb Dave Airlie:
>> This is the 3rd submission of the gallium/r600 hw atomic counter
>> support.
>>
>> This is fixes some rebase artifacts, removes the BUFFER decls from
>> the TGSI, and fixes some indirect crashes in the r600 backend,
>
> Well, I still get some crashes, i.e. piglits
>
> spec@arb_arrays_of_arrays@execution@atomic_counters@vs-indirect-index
> spec@arb_arrays_of_arrays@execution@atomic_counters@fs-indirect-index
>
> abort with stack smashing reported. Backtrace for vs-indirect-index is

oops off-by one, fixed locally.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: add support for all intrinsics. (v2)

2017-11-08 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Thu, Nov 9, 2017 at 2:12 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This is derived from tgsi/radeonsi code from the GLSL intrinsics.
>
> This should pre-fix radv for the upcoming spirv patches.
>
> v2: actually use wait_cnt, sleep deprived dad time! (Bas)
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 32 +++-
>  1 file changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2ae656693fc..f922b32bf7b 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context 
> *ctx,
>ctx->ac.voidt, args, 1, 0);
>  }
>
> +static void emit_membar(struct nir_to_llvm_context *ctx,
> +   const nir_intrinsic_instr *instr)
> +{
> +   unsigned waitcnt = NOOP_WAITCNT;
> +
> +   switch (instr->intrinsic) {
> +   case nir_intrinsic_memory_barrier:
> +   case nir_intrinsic_group_memory_barrier:
> +   waitcnt &= VM_CNT & LGKM_CNT;
> +   break;
> +   case nir_intrinsic_memory_barrier_atomic_counter:
> +   case nir_intrinsic_memory_barrier_buffer:
> +   case nir_intrinsic_memory_barrier_image:
> +   waitcnt &= VM_CNT;
> +   break;
> +   case nir_intrinsic_memory_barrier_shared:
> +   waitcnt &= LGKM_CNT;
> +   break;
> +   default:
> +   break;
> +   }
> +   if (waitcnt != NOOP_WAITCNT)
> +   emit_waitcnt(ctx, waitcnt);
> +}
> +
>  static void emit_barrier(struct nir_to_llvm_context *ctx)
>  {
> /* SI only (thanks to a hw bug workaround):
> @@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
> emit_discard_if(ctx, instr);
> break;
> case nir_intrinsic_memory_barrier:
> -   emit_waitcnt(ctx->nctx, VM_CNT);
> +   case nir_intrinsic_group_memory_barrier:
> +   case nir_intrinsic_memory_barrier_atomic_counter:
> +   case nir_intrinsic_memory_barrier_buffer:
> +   case nir_intrinsic_memory_barrier_image:
> +   case nir_intrinsic_memory_barrier_shared:
> +   emit_membar(ctx->nctx, instr);
> break;
> case nir_intrinsic_barrier:
> emit_barrier(ctx->nctx);
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: add support for all intrinsics. (v2)

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This is derived from tgsi/radeonsi code from the GLSL intrinsics.

This should pre-fix radv for the upcoming spirv patches.

v2: actually use wait_cnt, sleep deprived dad time! (Bas)

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2ae656693fc..f922b32bf7b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context *ctx,
   ctx->ac.voidt, args, 1, 0);
 }
 
+static void emit_membar(struct nir_to_llvm_context *ctx,
+   const nir_intrinsic_instr *instr)
+{
+   unsigned waitcnt = NOOP_WAITCNT;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_group_memory_barrier:
+   waitcnt &= VM_CNT & LGKM_CNT;
+   break;
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   waitcnt &= VM_CNT;
+   break;
+   case nir_intrinsic_memory_barrier_shared:
+   waitcnt &= LGKM_CNT;
+   break;
+   default:
+   break;
+   }
+   if (waitcnt != NOOP_WAITCNT)
+   emit_waitcnt(ctx, waitcnt);
+}
+
 static void emit_barrier(struct nir_to_llvm_context *ctx)
 {
/* SI only (thanks to a hw bug workaround):
@@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
emit_discard_if(ctx, instr);
break;
case nir_intrinsic_memory_barrier:
-   emit_waitcnt(ctx->nctx, VM_CNT);
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared:
+   emit_membar(ctx->nctx, instr);
break;
case nir_intrinsic_barrier:
emit_barrier(ctx->nctx);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: add support for all intrinsics.

2017-11-08 Thread Bas Nieuwenhuizen
On Thu, Nov 9, 2017 at 2:04 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This is derived from tgsi/radeonsi code from the GLSL intrinsics.
>
> This should pre-fix radv for the upcoming spirv patches.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 32 +++-
>  1 file changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2ae656693fc..405581f61da 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context 
> *ctx,
>ctx->ac.voidt, args, 1, 0);
>  }
>
> +static void emit_membar(struct nir_to_llvm_context *ctx,
> +   const nir_intrinsic_instr *instr)
> +{
> +   unsigned waitcnt = NOOP_WAITCNT;
> +
> +   switch (instr->intrinsic) {
> +   case nir_intrinsic_memory_barrier:
> +   case nir_intrinsic_group_memory_barrier:
> +   waitcnt &= VM_CNT & LGKM_CNT;
> +   break;
> +   case nir_intrinsic_memory_barrier_atomic_counter:
> +   case nir_intrinsic_memory_barrier_buffer:
> +   case nir_intrinsic_memory_barrier_image:
> +   waitcnt &= VM_CNT;
> +   break;
> +   case nir_intrinsic_memory_barrier_shared:
> +   waitcnt &= LGKM_CNT;
> +   break;
> +   default:
> +   break;
> +   }
> +   if (waitcnt != NOOP_WAITCNT)
> +   emit_waitcnt(ctx, VM_CNT);

Why VM_CNT instead of waitcnt?

> +}
> +
>  static void emit_barrier(struct nir_to_llvm_context *ctx)
>  {
> /* SI only (thanks to a hw bug workaround):
> @@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
> emit_discard_if(ctx, instr);
> break;
> case nir_intrinsic_memory_barrier:
> -   emit_waitcnt(ctx->nctx, VM_CNT);
> +   case nir_intrinsic_group_memory_barrier:
> +   case nir_intrinsic_memory_barrier_atomic_counter:
> +   case nir_intrinsic_memory_barrier_buffer:
> +   case nir_intrinsic_memory_barrier_image:
> +   case nir_intrinsic_memory_barrier_shared:
> +   emit_membar(ctx->nctx, instr);
> break;
> case nir_intrinsic_barrier:
> emit_barrier(ctx->nctx);
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: add support for all intrinsics.

2017-11-08 Thread Dave Airlie
From: Dave Airlie 

This is derived from tgsi/radeonsi code from the GLSL intrinsics.

This should pre-fix radv for the upcoming spirv patches.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2ae656693fc..405581f61da 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3654,6 +3654,31 @@ static void emit_waitcnt(struct nir_to_llvm_context *ctx,
   ctx->ac.voidt, args, 1, 0);
 }
 
+static void emit_membar(struct nir_to_llvm_context *ctx,
+   const nir_intrinsic_instr *instr)
+{
+   unsigned waitcnt = NOOP_WAITCNT;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_group_memory_barrier:
+   waitcnt &= VM_CNT & LGKM_CNT;
+   break;
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   waitcnt &= VM_CNT;
+   break;
+   case nir_intrinsic_memory_barrier_shared:
+   waitcnt &= LGKM_CNT;
+   break;
+   default:
+   break;
+   }
+   if (waitcnt != NOOP_WAITCNT)
+   emit_waitcnt(ctx, VM_CNT);
+}
+
 static void emit_barrier(struct nir_to_llvm_context *ctx)
 {
/* SI only (thanks to a hw bug workaround):
@@ -4144,7 +4169,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
emit_discard_if(ctx, instr);
break;
case nir_intrinsic_memory_barrier:
-   emit_waitcnt(ctx->nctx, VM_CNT);
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared:
+   emit_membar(ctx->nctx, instr);
break;
case nir_intrinsic_barrier:
emit_barrier(ctx->nctx);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Enable disk shader cache by default

2017-11-08 Thread Jordan Justen
f9d5a7add42af5a2e4410526d1480a08f41317ae along with
a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one
known regression with shader cache. (Deus Ex instability.)

We should enable the shader cache by default to stabilize it before
the next major Mesa release.

Signed-off-by: Jordan Justen 
---
 docs/relnotes/17.4.0.html  | 2 +-
 src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
index f81b5bd62d3..48dcd5cce38 100644
--- a/docs/relnotes/17.4.0.html
+++ b/docs/relnotes/17.4.0.html
@@ -44,7 +44,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
-Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE 
environment variable is set to "0" or "false"
+Disk shader cache support for i965
 
 
 Bug fixes
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index 853ea98af03..cd0524c5cbf 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -420,9 +420,6 @@ void
 brw_disk_cache_init(struct brw_context *brw)
 {
 #ifdef ENABLE_SHADER_CACHE
-   if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true))
-  return;
-
char renderer[10];
MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
brw->screen->deviceID);
-- 
2.15.0.rc2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Fixed an uncommon freed-memory access during state validation

2017-11-08 Thread Bruce Cherniak
State validation is performed during clear and draw calls.  Validation
during clear was still accessing vertex buffer state.  When the currently
set vertex buffers are client arrays, this could lead to accessing freed
memory.  Such is the case with the VMD application.

Previously, vertex buffer validation depended on a dirty bit or the
draw info indicating an indexed draw.  This required special handling for
clears.  But, vertex buffer validation still occurred which was unnecessary
and wrong.

Now, only minimal validation is performed during clear, deferring the
remainder to the next draw.  And, by setting the dirty bit in swr_draw_vbo
for indexed draws, vertex buffer validation is only dependent upon a
single dirty bit.

This fixes a bug exposed by the VMD application when changing models.
---
 src/gallium/drivers/swr/swr_draw.cpp  |  7 ++-
 src/gallium/drivers/swr/swr_state.cpp | 35 +++
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
b/src/gallium/drivers/swr/swr_draw.cpp
index 57660c7464..a94cdd6da0 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -52,7 +52,12 @@ swr_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
   return;
}
 
-   /* Update derived state, pass draw info to update function */
+   /* If indexed draw, force vertex validation since index buffer comes
+* from draw info. */
+   if (info->index_size)
+  ctx->dirty |= SWR_NEW_VERTEX;
+
+   /* Update derived state, pass draw info to update function. */
swr_update_derived(pipe, info);
 
swr_update_draw_context(ctx);
diff --git a/src/gallium/drivers/swr/swr_state.cpp 
b/src/gallium/drivers/swr/swr_state.cpp
index c6da4fcb8e..4530d377ee 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1204,11 +1204,6 @@ swr_update_derived(struct pipe_context *pipe,
   ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState);
}
 
-   /* Scissor */
-   if (ctx->dirty & SWR_NEW_SCISSOR) {
-  ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, >swr_scissor);
-   }
-
/* Viewport */
if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
  | SWR_NEW_RASTERIZER)) {
@@ -1249,18 +1244,26 @@ swr_update_derived(struct pipe_context *pipe,
   ctx->api.pfnSwrSetViewports(ctx->swrContext, 1, vp, vpm);
}
 
-   /* Set vertex & index buffers
-* (using draw info if called by swr_draw_vbo)
-* If indexed draw, revalidate since index buffer comes from
-* pipe_draw_info.
-*/
-   if (ctx->dirty & SWR_NEW_VERTEX ||
-  (p_draw_info && p_draw_info->index_size)) {
+   /* When called from swr_clear (p_draw_info = null), render targets,
+* rasterState and viewports (dependent on render targets) are the only
+* necessary validation.  Defer remaining validation by setting
+* post_update_dirty_flags and clear all dirty flags.  BackendState is
+* still unconditionally validated below */
+   if (!p_draw_info) {
+  post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER |
+   SWR_NEW_RASTERIZER |
+   SWR_NEW_VIEWPORT);
+  ctx->dirty = 0;
+   }
+
+   /* Scissor */
+   if (ctx->dirty & SWR_NEW_SCISSOR) {
+  ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, >swr_scissor);
+   }
 
-  /* If being called by swr_draw_vbo, copy draw details */
-  struct pipe_draw_info info = {0};
-  if (p_draw_info)
- info = *p_draw_info;
+   /* Set vertex & index buffers */
+   if (ctx->dirty & SWR_NEW_VERTEX) {
+  const struct pipe_draw_info  = *p_draw_info;
 
   /* vertex buffers */
   SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102891] [radv] glitches on rpcs3 emulator (green zones)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102891

--- Comment #6 from Dave Airlie  ---
Did someone already try RADV_DEBUG=zerovram to see if it helps?

The trace replays badly on amdgpu-pro which suggests the bad stuff is in ram
before recording.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: implement (un)mapImage

2017-11-08 Thread Julien Isorce
Ah yes you are right, my mistake. I will update the patch after some more
testing. Thx.

On 8 November 2017 at 17:21, Chris Wilson  wrote:

> Quoting Julien Isorce (2017-11-08 16:55:05)
> > v2: add early return if (flag & MAP_INTERNAL_MASK)
> >
> > Already implemented for Gallium drivers.
> >
> > Useful for gbm_bo_(un)map.
> >
> > Tested by porting wayland/weston/clients/simple-dmabuf-drm.c to GBM.
> >
> > Signed-off-by: Julien Isorce 
> > ---
> >  src/mesa/drivers/dri/i965/intel_screen.c | 51
> ++--
> >  1 file changed, 49 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> b/src/mesa/drivers/dri/i965/intel_screen.c
> > index cdc36ad..b7b0e25 100644
> > --- a/src/mesa/drivers/dri/i965/intel_screen.c
> > +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> > @@ -755,6 +755,53 @@ intel_create_image(__DRIscreen *dri_screen,
> > loaderPrivate);
> >  }
> >
> > +static void *
> > +intel_map_image(__DRIcontext *context, __DRIimage *image,
> > +int x0, int y0, int width, int height,
> > +unsigned int flags, int *stride, void **map_info)
> > +{
> > +  struct brw_context *brw = NULL;
> > +  struct brw_bo *bo = NULL;
> > +  void *raw_data = NULL;
> > +
> > +  if (!context || !image || !stride || !map_info || *map_info)
> > + return NULL;
> > +
> > +  if (flags & MAP_INTERNAL_MASK)
> > + return NULL;
> > +
> > +  brw = context->driverPrivate;
> > +  bo = image->bo;
> > +
> > +  assert(brw);
> > +  assert(bo);
> > +
> > +  /* DRI flags and GL_MAP.*_BIT flags are the same, so just pass them
> on. */
> > +  raw_data = brw_bo_map(brw, bo, flags);
> > +
> > +  if (raw_data) {
> > +*map_info = raw_data;
> > +*stride = image->pitch;
> > +  }
> > +
> > +  return raw_data;
>
> Did you not say the returned address is to pixel0 of the (x,y)x(w,h) rect
> within the image? So raw_data + y0*image->pitch + x0*image->cpp?
> Or something more like raw_data + 
> y0/util_format_get_blockheight(image->format)
> * image->pitch +
> x0/util_format_get_blockwidth(image->format) * util_format_get_blocksize(
> image->format);
> -Chris
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json

2017-11-08 Thread Jason Ekstrand
On Wed, Nov 8, 2017 at 3:42 PM, Chad Versace 
wrote:

> On Wed 08 Nov 2017, Jason Ekstrand wrote:
> > On Wed, Nov 8, 2017 at 1:40 PM, Chad Versace <[1]
> chadvers...@chromium.org>
> > wrote:
> >
> > On Tue 07 Nov 2017, Dylan Baker wrote:
> > > Quoting Eric Engestrom (2017-11-07 07:25:53)
> > > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote:
> > > > > I tested this in a setup where the builddir was outside of the
> > srcdir.
> > > > > ---
> > > > >  src/intel/vulkan/meson.build | 12 
> > > > >  1 file changed, 12 insertions(+)
> > > > >
> > > > > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/
> > meson.build
> > > > > index ff24e304ef5..e8b7f407507 100644
> > > > > --- a/src/intel/vulkan/meson.build
> > > > > +++ b/src/intel/vulkan/meson.build
> > > > > @@ -48,6 +48,18 @@ intel_icd = custom_target(
> > > > >install : true,
> > > > >  )
> > > > >
> > > > > +dev_icd = custom_target(
> > > > > +  'dev_icd',
> > > > > +  input : 'anv_icd.py',
> > > > > +  output : 'dev_icd.@0@.json'.format(target_machine.cpu()),
> > > >
> > > > Strictly speaking, shouldn't that be `host_machine` [1] ?
> > > > I don't see how one would do a canadian build of mesa though, so
> > > > host == target should always be true.
> > >
> > > That's my fault. There are (or were) a number of cases where I used
> > target
> > > instead of host, that can also be a follow up.
> > >
> > > In any case:
> > > Acked-by: Dylan Baker <[2]dy...@pnwbakers.com>
> >
> > I build Mesa (with autotools) where host == x86_64 but target ==
> armv7a.
> >
> >
> > You're using dev_icd with a cross-compile?  Yikes!  I mean, it can work,
> but
> > that's not what I would have expected.  Also, why are you building anv
> and
> > targetting armv7a  Does that even work?
>
> Two many interacting topics!
>
> Eric said: "I don't see how one would do a canadian build of mesa
> though, so host == target should always be true". Mesa, not anvil.
> I replied that I cross-compile Mesa for ARM.
>

Ok, that makes more sense.


> On host-vs-target: My host machine and target machine are rarely the
> same. But, for Anvil, of course, the two machines do always have the
> same architecture.
>
> Regardless, the icd filename should always contain the target
> architecture, because run the icd *on the target*.
>

Fair enough.  Pedantry in these areas is perfectly reasonable.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json

2017-11-08 Thread Chad Versace
On Wed 08 Nov 2017, Jason Ekstrand wrote:
> On Wed, Nov 8, 2017 at 1:40 PM, Chad Versace <[1]chadvers...@chromium.org>
> wrote:
> 
> On Tue 07 Nov 2017, Dylan Baker wrote:
> > Quoting Eric Engestrom (2017-11-07 07:25:53)
> > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote:
> > > > I tested this in a setup where the builddir was outside of the
> srcdir.
> > > > ---
> > > >  src/intel/vulkan/meson.build | 12 
> > > >  1 file changed, 12 insertions(+)
> > > >
> > > > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/
> meson.build
> > > > index ff24e304ef5..e8b7f407507 100644
> > > > --- a/src/intel/vulkan/meson.build
> > > > +++ b/src/intel/vulkan/meson.build
> > > > @@ -48,6 +48,18 @@ intel_icd = custom_target(
> > > >    install : true,
> > > >  )
> > > >
> > > > +dev_icd = custom_target(
> > > > +  'dev_icd',
> > > > +  input : 'anv_icd.py',
> > > > +  output : 'dev_icd.@0@.json'.format(target_machine.cpu()),
> > >
> > > Strictly speaking, shouldn't that be `host_machine` [1] ?
> > > I don't see how one would do a canadian build of mesa though, so
> > > host == target should always be true.
> >
> > That's my fault. There are (or were) a number of cases where I used
> target
> > instead of host, that can also be a follow up.
> >
> > In any case:
> > Acked-by: Dylan Baker <[2]dy...@pnwbakers.com>
> 
> I build Mesa (with autotools) where host == x86_64 but target == armv7a.
> 
> 
> You're using dev_icd with a cross-compile?  Yikes!  I mean, it can work, but
> that's not what I would have expected.  Also, why are you building anv and
> targetting armv7a  Does that even work?

Two many interacting topics!

Eric said: "I don't see how one would do a canadian build of mesa
though, so host == target should always be true". Mesa, not anvil.
I replied that I cross-compile Mesa for ARM.

On host-vs-target: My host machine and target machine are rarely the
same. But, for Anvil, of course, the two machines do always have the
same architecture.

Regardless, the icd filename should always contain the target
architecture, because run the icd *on the target*.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Mesa 17.2.5 release candidate

2017-11-08 Thread Andres Gomez
Hello list,

The candidate for the Mesa 17.2.5 is now available. Currently we have:
 - 30 queued
 - 16 nominated (outstanding)
 - and 3 rejected patches


In the current queue we have:

In Mesa Core a GL error related to the ARB_ES3_1_compatibility spec
noticed with the GFXBench 5 Aztec Ruins has been corrected. 

The GLSL compiler is not giving a linker error for mismatching uniform
precision with GLSL ES 1.00 any more. This enables, specially, several
Android applications which violate this rule, like Forge of Empires,
for example.

The SPIR-V compiler has corrected an assert triggered when support for
the simple memory model was claimed.

NIR has also received a correction related with
gl_SubGroupG{e,t}MaskARB.

The Intel drivers, specially i965, have received several fixes,
including a plug for a memory leak and another one in the compiler to
avoid GPU hangs on Broxton.

The gallium i915g driver for Intel has also seen an important
correction.

AMD drivers, mostly radv, have also received several fixes, including a
correction for a DCC corruption that was visible with Rust, breaking an
endless loop in r600, avoiding a GPU hang with vulkan dota2 in VR mode
and a plug for a memory leak.

Broadcom's vc4 has gotten a fix to compile out some non-debug code that
shouldn't be there for the release build.


Take a look at section "Mesa stable queue" for more information.


Testing reports/general approval


Any testing reports (or general approval of the state of the branch)
will be greatly appreciated.

The plan is to have 17.2.5 next Friday (10th of November), around or
shortly after 23:00 GMT.

If you have any questions or suggestions - be that about the current
patch queue or otherwise, please go ahead.


Trivial merge conflicts
---

commit e7c9892810b65bda232a00f798aa4a258fc8
Author: Jason Ekstrand 

i965/miptree: Take an isl_format in render_aux_usage

(cherry picked from commit 39c5c12f8fbee9eec26a627f247d1f3ef7d4bf39)

commit bd2037da82ec8abc1db8834f67496b4b3cd74504
Author: Bas Nieuwenhuizen 

radv: Don't expose heaps with 0 memory.

(cherry picked from commit 806721429afa090380bf39a4958fe4e21c63816c)

commit 23eaeeb88ad787b0e3253807fb6f7180a0cb0711
Author: Dave Airlie 

radv: free attachments on end command buffer.

(cherry picked from commit f0ae06a13c1a60f58de77401f705eaf620b5b822)

commit 9ba45e7d33bb91d7b0fc65ecae9cbc8a2ba68593
Author: Bas Nieuwenhuizen 

radv: Don't use vgpr indexing for outputs on GFX9.

(cherry picked from commit 6ce550453f1df64caeb956f215d32da96b89f2b1)

commit 6a73458510c124856ade7e5a7e805fb08ae13671
Author: Bas Nieuwenhuizen 

radv: Disallow indirect outputs for GS on GFX9 as well.

(cherry picked from commit c07d719e8b683e1bf78f187dd17fe4716f4e5e9c)


Cheers,
Andres


Mesa stable queue
-

Nominated (16)
=

Emil Velikov (1):
  targets/opencl: don't hardcode the icd file install to /etc/...

Jason Ekstrand (15):
  intel/fs: Use a pure vertical stride for large register strides
  intel/fs: Pass builders instead of blocks into emit_[un]zip
  intel/fs: Be more explicit about our placement of [un]zip
  intel/fs: Use ANY/ALL32 predicates in SIMD32
  intel/fs: Don't stomp f0.1 in SIMD16 ballot
  intel/fs: Use an explicit D type for vote any/all/eq intrinsics
  intel/fs: Use a pair of 1-wide MOVs instead of SEL for any/all
  intel/eu/reg: Add a subscript() helper
  intel/fs: Fix MOV_INDIRECT for 64-bit values on little-core
  intel/fs: Fix integer multiplication lowering for src/dst hazards
  intel/fs: Use the original destination region for int MUL lowering
  intel/fs: Mark 64-bit values as being contiguous
  intel/fs: Rework zero-length URB write handling
  intel/nir: Add a helper for getting the NoIndirect mask
  inte
l/nir: Break the linking code into a helper in brw_nir.c


Queued (30)
===

Andres Gomez (7):
  docs: add sha256 checksums for 17.2.4
  cherr
y-ignore: radv: copy indirect lowering settings from radeonsi
  cher
ry-ignore: i965: fix blorp stage_prog_data->param leak
  cherry-
ignore: etnaviv: don't do resolve-in-place without valid TS
  cherry
-ignore: intel/fs: Alloc pull constants off mem_ctx
  cherry-ignore: 
added 17.3 nominations.
  cherry-ignore: automake: include
git_sha1.h.in in release tarball

Bas Nieuwenhuizen (3):
  radv: Don't expose heaps with 0 memory.
  radv: Don't use vgpr indexing for outputs on GFX9.
  radv: Disallow indirect outputs for GS on GFX9 as well.

Dave Airlie (3):
  i915g: make gears run again.
  radv: free attachments on end command buffer.
  radv: add initial copy descriptor support. (v2)

Eric Engestrom (1):
  vc4: fix release build

Gert Wollny 

[Mesa-dev] [PATCH 4/4] i965: Use prepare_external instead of make_shareable in setTexBuffer2

2017-11-08 Thread Jason Ekstrand
The setTexBuffer2 hook from GLX is used to implement glxBindTexImageEXT
which has tighter restrictions than just "it's shared".  In particular,
it says that any rendering to the image while it is bound causes the
contents to become undefined.

The GLX_EXT_texture_from_pixmap extension provides us with an acquire
and release in the form of glXBindTexImageEXT and glXReleaseTexImageEXT.
The extension spec says,

"Rendering to the drawable while it is bound to a texture will leave
the contents of the texture in an undefined state.  However, no
synchronization between rendering and texturing is done by GLX.  It
is the application's responsibility to implement any synchronization
required."

From the EGL 1.4 spec for eglBindTexImage:

"After eglBindTexImage is called, the specified surface is no longer
available for reading or writing.  Any read operation, such as
glReadPixels or eglCopyBuffers, which reads values from any of the
surface’s color buffers or ancillary buffers will produce
indeterminate results.  In addition, draw operations that are done
to the surface before its color buffer is released from the texture
produce indeterminate results

In other words, between the bind and release calls, we effectively own
those pixels and can assume, so long as we don't crash, that no one else
is reading from/writing to the surface.  The GLX and EGL implementations
call the setTexBuffer2 and releaseTexBuffer function pointers that the
driver can hook.

In theory, this means that, between BindTexImage and ReleaseTexImage, we
own the pixels and it should be safe to track aux usage so we
can avoid redundant resolves so long as we start off with the right
assumption at the start of the bind/release pair.

In practice, however, X11 has slightly different expectations.  It's
expected that the server may be drawing to the image at the same time as
the compositor is texturing from it.  In that case, the worst expected
outcome should be tearing or partial rendering and not random corruption
like we see when rendering races with scanout with CCS.  Fortunately,
the GEM rules about texture/render dependencies save us here.  If X11
submits work to write to a pixmap after the compositor has submitted
work to texture from it, GEM inserts a dependency between the compositor
and X11.  If X11 is using a high-priority context, this will cause the
compositor to get a temporarily boosted priority while the batch from
X11 is waiting on it.  This means that we will never have an actual race
between X11 and the compositor so no corruption can happen.

Unfortunately, however, this means that X11 will likely be rendering to it
between the compositor's BindTexImage and ReleaseTexImage calls.  If we
want to avoid strange issues, we need to be a bit careful about
resolves because we can't really transition it away from the "default"
aux usage.  The only case where this would practically be a problem is
with image_load_store where we have to do a full resolve in order to use
the image via the data port.  Even there it would only be a problem if
batches were split such that X11's rendering happens between the resolve
and the use of it as a storage image.  However, the chances of this
happening are very slim so we just emit a warning and hope for the best.

This commit adds a new helper intel_miptree_finish_external which resets
all aux state to whatever ISL says is the right worst-case "default" for
the given modifier.  It feels a little awkward to call it "finish"
because it's actually an acquire from the perspective of the driver, but
it matches the semantics of the other prepare/finish functions.  This
new helper gets called in intelSetTexBuffer2 instead of make_shareable.
We also add an intelReleaseTexBuffer (we passed NULL to releaseTexBuffer
before) and call intel_miptree_prepare_external in it.  This probably
does nothing most of the time but it means that the prepare/finish calls
are properly matched.

Cc: "17.3" 
Cc: Chad Versace 
Cc: Daniel Stone 
Cc: Louis-Francis Ratté-Boulianne 
Cc: Adam Jackson 
Cc: Chris Wilson 
Cc: Keith Packard 
Cc: Eric Anholt 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  3 ++
 src/mesa/drivers/dri/i965/intel_screen.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_tex.h |  2 +
 src/mesa/drivers/dri/i965/intel_tex_image.c   | 61 ++-
 5 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 47cfccc..a95b67c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2791,6 +2791,24 @@ intel_miptree_prepare_external(struct 

[Mesa-dev] [PATCH 2/4] i965/tex_image: Reference the renderbuffer miptree in setTexBuffer2

2017-11-08 Thread Jason Ekstrand
The old code made a new miptree that referenced the same BO as the
renderbuffer and just trusted in the memory aliasing to work.  There are
only two ways in which the new miptree is liable to differ from the one
in the renderbuffer and neither of them matter:

 1) It may have a different target.  The only targets that we can ever
see in intelSetTexBuffer2 are GL_TEXTURE_2D and GL_TEXTURE_RECTANGLE
and the difference between the two doesn't matter as far as the
miptree is concerned; genX(update_sampler_state) only looks at the
gl_texture_object and not the miptree when determining whether or
not to use normalized coordinates.

 2) It may have a very slightly different format.  Again, this doesn't
matter because we've supported texture views for quite some time so
we always look at the gl_texture_object format instead of the
miptree format for hardware setup anyway.

On the other hand, because we were recreating the miptree, we were using
intel_miptree_create_for_bo which doesn't understand modifiers.  We
really want this function to work without doing a resolve so long as you
have modifiers so we need to fix that.

Cc: "17.3" 
---
 src/mesa/drivers/dri/i965/intel_tex_image.c | 21 +++--
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 37c8e24..c52992a 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -405,6 +405,7 @@ static void
 intel_set_texture_image_mt(struct brw_context *brw,
struct gl_texture_image *image,
GLenum internal_format,
+   mesa_format format,
struct intel_mipmap_tree *mt)
 
 {
@@ -415,7 +416,7 @@ intel_set_texture_image_mt(struct brw_context *brw,
_mesa_init_teximage_fields(>ctx, image,
   mt->surf.logical_level0_px.width,
   mt->surf.logical_level0_px.height, 1,
-  0, internal_format, mt->format);
+  0, internal_format, format);
 
brw->ctx.Driver.FreeTextureImageBuffer(>ctx, image);
 
@@ -442,7 +443,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
mesa_format texFormat = MESA_FORMAT_NONE;
-   struct intel_mipmap_tree *mt;
GLenum internal_format = 0;
 
texObj = _mesa_get_current_tex_object(ctx, target);
@@ -464,31 +464,24 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
if (rb->mt->cpp == 4) {
   if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
  internal_format = GL_RGB;
- texFormat = MESA_FORMAT_B8G8R8X8_UNORM;
+ texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
   }
   else {
  internal_format = GL_RGBA;
  texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
   }
} else if (rb->mt->cpp == 2) {
+  /* This is 565 */
   internal_format = GL_RGB;
   texFormat = MESA_FORMAT_B5G6R5_UNORM;
}
 
intel_miptree_make_shareable(brw, rb->mt);
-   mt = intel_miptree_create_for_bo(brw, rb->mt->bo, texFormat, 0,
-rb->Base.Base.Width,
-rb->Base.Base.Height,
-1, rb->mt->surf.row_pitch,
-MIPTREE_CREATE_DEFAULT);
-   if (mt == NULL)
-   return;
-   mt->target = target;
 
_mesa_lock_texture(>ctx, texObj);
texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
-   intel_set_texture_image_mt(brw, texImage, internal_format, mt);
-   intel_miptree_release();
+   intel_set_texture_image_mt(brw, texImage, internal_format,
+  texFormat, rb->mt);
_mesa_unlock_texture(>ctx, texObj);
 }
 
@@ -581,7 +574,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, 
GLenum target,
const GLenum internal_format =
   image->internal_format != 0 ?
   image->internal_format : _mesa_get_format_base_format(mt->format);
-   intel_set_texture_image_mt(brw, texImage, internal_format, mt);
+   intel_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt);
intel_miptree_release();
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] i965/tex_image: Pull the tex format from the renderbuffer in intelSetTexBuffer2

2017-11-08 Thread Jason Ekstrand
Cc: "17.3" 
---
 src/mesa/drivers/dri/i965/intel_tex_image.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index c52992a..28800f6 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -462,20 +462,23 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
   return;
 
if (rb->mt->cpp == 4) {
-  if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+  if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
  internal_format = GL_RGB;
- texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
-  }
-  else {
+  else
  internal_format = GL_RGBA;
- texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
-  }
} else if (rb->mt->cpp == 2) {
   /* This is 565 */
   internal_format = GL_RGB;
-  texFormat = MESA_FORMAT_B5G6R5_UNORM;
}
 
+   /* The interactions between GLX_EXT_texture_from_pixmap and sRGB are not
+* defined at all.  However, since X has classically assumed that your data
+* is just bits and sRGB rendering was added on, the assumption is that the
+* the result of glXBindTexImageEXT will be a texture with a linear format
+* even if it was rendered with sRGB encoding enabled.
+*/
+   texFormat = _mesa_get_srgb_format_linear(intel_rb_format(rb));
+
intel_miptree_make_shareable(brw, rb->mt);
 
_mesa_lock_texture(>ctx, texObj);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] i965/miptree: Loosen the format check in miptree_match_image

2017-11-08 Thread Jason Ekstrand
This function is used to determine when we need to re-allocate a
miptree.  Since we do nothing different in miptree allocation for
sRGB vs. linear, loosening this should be safe and may lead to less
copying and reallocating in some odd cases.

Cc: "17.3" 
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 6 --
 src/mesa/drivers/dri/i965/intel_tex.c  | 2 +-
 src/mesa/drivers/dri/i965/intel_tex_obj.h  | 4 ++--
 src/mesa/drivers/dri/i965/intel_tex_validate.c | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 82f5a81..47cfccc 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1298,7 +1298,8 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
if (mt->etc_format != MESA_FORMAT_NONE)
   mt_format = mt->etc_format;
 
-   if (image->TexFormat != mt_format)
+   if (_mesa_get_srgb_format_linear(image->TexFormat) !=
+   _mesa_get_srgb_format_linear(mt_format))
   return false;
 
intel_get_image_dims(image, , , );
@@ -1537,7 +1538,8 @@ intel_miptree_copy_slice(struct brw_context *brw,
assert(src_layer < get_num_phys_layers(_mt->surf,
   src_level - src_mt->first_level));
 
-   assert(src_mt->format == dst_mt->format);
+   assert(_mesa_get_srgb_format_linear(src_mt->format) ==
+  _mesa_get_srgb_format_linear(dst_mt->format));
 
if (dst_mt->compressed) {
   unsigned int i, j;
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c 
b/src/mesa/drivers/dri/i965/intel_tex.c
index 65a1cb3..0650b6e 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -176,7 +176,7 @@ intel_alloc_texture_storage(struct gl_context *ctx,
intel_texobj->needs_validate = false;
intel_texobj->validated_first_level = 0;
intel_texobj->validated_last_level = levels - 1;
-   intel_texobj->_Format = intel_texobj->mt->format;
+   intel_texobj->_Format = first_image->TexFormat;
 
return true;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h 
b/src/mesa/drivers/dri/i965/intel_tex_obj.h
index 27c18b7..526f5ce 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h
@@ -57,8 +57,8 @@ struct intel_texture_object
bool needs_validate;
 
/* Mesa format for the validated texture object. For non-views this
-* will always be the same as mt->format. For views, it may differ
-* since the mt is shared across views with differing formats.
+* will always be the same as texObj->Image[0][0].TexFormat. For views, it
+* may differ since the mt is shared across views with differing formats.
 */
mesa_format _Format;
 
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c 
b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 2b7798c..ef7f907 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -174,7 +174,7 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint 
unit)
 
intelObj->validated_first_level = validate_first_level;
intelObj->validated_last_level = validate_last_level;
-   intelObj->_Format = intelObj->mt->format;
+   intelObj->_Format = firstImage->base.Base.TexFormat,
intelObj->needs_validate = false;
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] meson: Enable VC4's NEON assembly support.

2017-11-08 Thread Eric Anholt
Dylan Baker  writes:

> [ Unknown signature status ]
> Quoting Eric Anholt (2017-11-08 14:14:57)
>> ---
>>  meson.build |  5 +++--
>>  src/gallium/drivers/vc4/meson.build | 13 +
>>  2 files changed, 16 insertions(+), 2 deletions(-)
>> 
>> diff --git a/meson.build b/meson.build
>> index 0118c9a7c5ef..189c9be5b59c 100644
>> --- a/meson.build
>> +++ b/meson.build
>> @@ -485,8 +485,9 @@ endif
>>  
>>  # FIXME: enable asm when cross compiler
>>  # This is doable (autotools does it), but it's not of immediate concern
>> -if meson.is_cross_build()
>> -  message('Cross compiling, disabling asm')
>> +if meson.is_cross_build() and (host_machine.cpu_family() == 'x86' or
>> +   host_machine.cpu_family() == 'x86_64')
>
> How about:
> if meson.is_cross_build() and host_machine.cpu_family().startswith('x86')
>
> Other than that, for the series:
> Reviewed-by: Dylan Baker 

I like it.  Thanks!


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] meson: vc4 ARMHF cross-build support

2017-11-08 Thread Timothy Arceri

On 09/11/17 09:14, Eric Anholt wrote:

Timothy Arceri noted that vc4 didn't seem to have the NEON stuff
hooked up, so I worked on getting vc4 cross builds working for me
finally.  I haven't tested the result on HW quite yet.


I can now build vc4 with asm enable with this series so:

Tested-by: Timothy Arceri 



Eric Anholt (4):
   meson: Leave dep_llvm empty if !with_llvm
   meson: Drop stale comment about making valgrind conditional.
   meson: Always link libgallium_dri.so against dep_thread.
   meson: Enable VC4's NEON assembly support.

  meson.build | 13 +++--
  src/gallium/drivers/vc4/meson.build | 13 +
  src/gallium/targets/dri/meson.build |  1 +
  3 files changed, 21 insertions(+), 6 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103586

--- Comment #9 from Dave Gilbert  ---
(In reply to Jan Vesely from comment #8)
> (In reply to Dave Gilbert from comment #6)
> > (In reply to Jan Vesely from comment #5)
> > > (In reply to Dave Gilbert from comment #4)
> > > > Created attachment 135313 [details]
> > > > foo.link-0.ll
> > > > 
> > > > That's all 3 of the debug files it produced.
> > > > (I wasn't sure which were the llvm and which the isa dumps; I guess the 
> > > > asm
> > > > is the isa? and the ll's are both llvm dumps?)
> > > 
> > > yes. the first .ll is from compilation step, the other one is from linking
> > > step.
> > > 
> > > .ll dump looks correct.
> > > .asm also looks correct.
> > > 
> > > you can try producing multiple asm dumps for working and non-working runs.
> > > But I don't think that the llvm is the culprit here.
> > > 
> > > Can you try waiting for the kernel execution to complete explicitly before
> > > mapping the buffer?
> > > Ideally call clFinish() on line 63.
> > 
> > Since I'm on the C++ binding (probably a mistake) I used:
> >   queue.finish();
> > 
> > and it seems to be working.
> > 
> > (This also corresponds possibly to what I'm seeing on a more complex kernel;
> > with a more complex kernel I'm seeing on a whole pile of data on the last
> > few Z slices as being bogus suggesting it's not finished).
> > 
> > Dave
> 
> thanks for testing. I see you are using mesa 17.2.
> 
> there were few changes to blocking call synchronization that went to mesa
> 17.3:
> 02f8ac6b70033a1b240d497c4664c359d2398cc3 (clover: Wrap event::wait_count in
> a method taking care of the required locking.)
> bc4000ee40c78efe1e5e8a6244d4bb55389d8418 (clover: Run the associated action
> before an event is signalled.)
> 3a5b69c09ba355c616c274b0c7f5aba3bd21fd54 (clover: Wait for requested
> operation if blocking flag is set)
> 
> which might help address the issue. Can you test mesa 17.3?

Yeh, I'll figure out how to get 17.3 built on this box.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] glx: Prepare the DRI backends for GLX_EXT_no_config_context

2017-11-08 Thread Eric Anholt
Adam Jackson  writes:

> This should be safe as these backends already support the EGL version of
> this extension. DRI1 is not affected because it does not support
> GLX_ARB_create_context anyway. DRI-Windows is not prepared to implement
> this as there's no equivalent WGL extension, and wglCreateContextAttribs
> seems to really want the HDC's pixel format to be set.

Patch 1-2 are:

Reviewed-by: Eric Anholt 

Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-08 Thread Ilia Mirkin
On Wed, Nov 8, 2017 at 4:13 AM, Nicolai Hähnle  wrote:
> On 08.11.2017 09:53, Michel Dänzer wrote:
>>
>> On 07/11/17 10:58 PM, Marek Olšák wrote:
>>>
>>> On Tue, Nov 7, 2017 at 9:01 PM, Nicolai Hähnle 
>>> wrote:

 On 07.11.2017 18:35, Michel Dänzer wrote:
>
>
> On 07/11/17 06:28 PM, Marek Olšák wrote:
>>
>>
>> Hi,
>>
>> This patch is too large for the mailing list:
>>
>>
>>
>> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib=0e0f044268d3c1af2e78f161aaa2d92c30167cc1
>
>
>
>   From the commit log:
>
>> I just overwrote all Mesa files with internal addrlib and discarded
>> hunks that we should probably keep, but I might have missed something.
>
>
>
> FWIW, if a separate branch was used for importing addrlib changes, Git
> could keep track of our changes to it in the Mesa tree.



 I concur in principle. In practice, I explored doing that, but the
 commit
 discipline on the internal addrlib repository is pretty crappy, so we'd
 end
 up having to massage commits anyway. Maybe we can find a sweet spot
 somewhere by updating slightly more regularly, perhaps once a month.
>>>
>>>
>>> That's too much time-consuming work with no benefit. I used to do
>>> that, but it sucked. I prefer 1 commit with everything - easy conflict
>>> resolution, not having to rebase 60 commits that don't make sense.
>>
>>
>> FWIW, I didn't mean importing individual commits of the addrlib
>> repository into Mesa. Just having a separate branch[0] where addrlib
>> snapshots are imported and which is then merged to master. That way Git
>> will keep track of changes in both repositories and automatically merge
>> them as much as possible. Just using Git for what it was made for. :)
>
>
> What do you mean precisely? I did some experiments with a structure like
> this:
>
>  Mesa master  o--o--o--o--o--o--o
>   //
>  addrlibo--o--o--oo
>
> where addrlib is a branch that *only* contains addrlib and has a completely
> separate initial commit. This works somewhat reasonably, except I was
> worried that it might break bisecting Mesa by trying some of the commits
> that only exist in the addrlib branch.
>
> Though now that I think about it again, maybe bisecting is fine because none
> of the addrlib commits are ever in the "future cone" of any Mesa master
> commit.

If you want to avoid some of the merge pain without creating a totally
separate universe, why not do something like

  addrlib ooo
 /  \\
  Mesa master o--o--o--o--o--o--o--o--o

Just a thought.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-08 Thread Eric Anholt
Nicolai Hähnle  writes:

> On 08.11.2017 09:53, Michel Dänzer wrote:
>> On 07/11/17 10:58 PM, Marek Olšák wrote:
>>> On Tue, Nov 7, 2017 at 9:01 PM, Nicolai Hähnle  wrote:
 On 07.11.2017 18:35, Michel Dänzer wrote:
>
> On 07/11/17 06:28 PM, Marek Olšák wrote:
>>
>> Hi,
>>
>> This patch is too large for the mailing list:
>>
>>
>> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib=0e0f044268d3c1af2e78f161aaa2d92c30167cc1
>
>
>   From the commit log:
>
>> I just overwrote all Mesa files with internal addrlib and discarded
>> hunks that we should probably keep, but I might have missed something.
>
>
> FWIW, if a separate branch was used for importing addrlib changes, Git
> could keep track of our changes to it in the Mesa tree.


 I concur in principle. In practice, I explored doing that, but the commit
 discipline on the internal addrlib repository is pretty crappy, so we'd end
 up having to massage commits anyway. Maybe we can find a sweet spot
 somewhere by updating slightly more regularly, perhaps once a month.
>>>
>>> That's too much time-consuming work with no benefit. I used to do
>>> that, but it sucked. I prefer 1 commit with everything - easy conflict
>>> resolution, not having to rebase 60 commits that don't make sense.
>> 
>> FWIW, I didn't mean importing individual commits of the addrlib
>> repository into Mesa. Just having a separate branch[0] where addrlib
>> snapshots are imported and which is then merged to master. That way Git
>> will keep track of changes in both repositories and automatically merge
>> them as much as possible. Just using Git for what it was made for. :)
>
> What do you mean precisely? I did some experiments with a structure like 
> this:
>
>   Mesa master  o--o--o--o--o--o--o
>//
>   addrlibo--o--o--oo
>
> where addrlib is a branch that *only* contains addrlib and has a 
> completely separate initial commit. This works somewhat reasonably, 
> except I was  worried that it might break bisecting Mesa by trying some 
> of the commits that only exist in the addrlib branch.
>
> Though now that I think about it again, maybe bisecting is fine because 
> none of the addrlib commits are ever in the "future cone" of any Mesa 
> master commit.

I don't believe bisect will pick commits on addrlib if good and bad were
both on master.

I've used this git structure for maintaining GL xml files in libepoxy,
and it's really nice.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103586

--- Comment #8 from Jan Vesely  ---
(In reply to Dave Gilbert from comment #6)
> (In reply to Jan Vesely from comment #5)
> > (In reply to Dave Gilbert from comment #4)
> > > Created attachment 135313 [details]
> > > foo.link-0.ll
> > > 
> > > That's all 3 of the debug files it produced.
> > > (I wasn't sure which were the llvm and which the isa dumps; I guess the 
> > > asm
> > > is the isa? and the ll's are both llvm dumps?)
> > 
> > yes. the first .ll is from compilation step, the other one is from linking
> > step.
> > 
> > .ll dump looks correct.
> > .asm also looks correct.
> > 
> > you can try producing multiple asm dumps for working and non-working runs.
> > But I don't think that the llvm is the culprit here.
> > 
> > Can you try waiting for the kernel execution to complete explicitly before
> > mapping the buffer?
> > Ideally call clFinish() on line 63.
> 
> Since I'm on the C++ binding (probably a mistake) I used:
>   queue.finish();
> 
> and it seems to be working.
> 
> (This also corresponds possibly to what I'm seeing on a more complex kernel;
> with a more complex kernel I'm seeing on a whole pile of data on the last
> few Z slices as being bogus suggesting it's not finished).
> 
> Dave

thanks for testing. I see you are using mesa 17.2.

there were few changes to blocking call synchronization that went to mesa 17.3:
02f8ac6b70033a1b240d497c4664c359d2398cc3 (clover: Wrap event::wait_count in a
method taking care of the required locking.)
bc4000ee40c78efe1e5e8a6244d4bb55389d8418 (clover: Run the associated action
before an event is signalled.)
3a5b69c09ba355c616c274b0c7f5aba3bd21fd54 (clover: Wait for requested operation
if blocking flag is set)

which might help address the issue. Can you test mesa 17.3?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] meson: Enable VC4's NEON assembly support.

2017-11-08 Thread Dylan Baker
Quoting Eric Anholt (2017-11-08 14:14:57)
> ---
>  meson.build |  5 +++--
>  src/gallium/drivers/vc4/meson.build | 13 +
>  2 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index 0118c9a7c5ef..189c9be5b59c 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -485,8 +485,9 @@ endif
>  
>  # FIXME: enable asm when cross compiler
>  # This is doable (autotools does it), but it's not of immediate concern
> -if meson.is_cross_build()
> -  message('Cross compiling, disabling asm')
> +if meson.is_cross_build() and (host_machine.cpu_family() == 'x86' or
> +   host_machine.cpu_family() == 'x86_64')

How about:
if meson.is_cross_build() and host_machine.cpu_family().startswith('x86')

Other than that, for the series:
Reviewed-by: Dylan Baker 

> +  message('Cross compiling, disabling x86/x86_64 asm')
>with_asm = false
>  endif
>  
> diff --git a/src/gallium/drivers/vc4/meson.build 
> b/src/gallium/drivers/vc4/meson.build
> index 38b47fbdd977..572d4b4fa9aa 100644
> --- a/src/gallium/drivers/vc4/meson.build
> +++ b/src/gallium/drivers/vc4/meson.build
> @@ -81,6 +81,18 @@ files_libvc4 = files(
>'vc4_uniforms.c',
>  )
>  
> +libvc4_neon = []
> +if with_asm_arch == 'arm'
> +  libvc4_neon = static_library(
> +'vc4_neon',
> +'vc4_tiling_lt_neon.c',
> +include_directories : [
> +  inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom
> +],
> +c_args : '-mfpu=neon',
> +  )
> +endif
> +
>  simpenrose_c_args = []
>  dep_simpenrose = dependency('simpenrose', required : false)
>  if dep_simpenrose.found()
> @@ -94,6 +106,7 @@ libvc4 = static_library(
>  inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
>  inc_gallium_drivers, inc_drm_uapi,
>],
> +  link_with: libvc4_neon,
>c_args : [c_vis_args, simpenrose_c_args],
>cpp_args : [cpp_vis_args],
>dependencies : [dep_simpenrose, dep_libdrm, dep_valgrind],
> -- 
> 2.15.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] meson: Drop stale comment about making valgrind conditional.

2017-11-08 Thread Eric Anholt
It was fixed in 5c2ff5773a707519f6a773126f201c4e1e8a42d7.
---
 meson.build | 1 -
 1 file changed, 1 deletion(-)

diff --git a/meson.build b/meson.build
index 117ed7c087f4..0118c9a7c5ef 100644
--- a/meson.build
+++ b/meson.build
@@ -691,7 +691,6 @@ if with_glvnd
   pre_args += '-DUSE_LIBGLVND=1'
 endif
 
-# TODO: make this conditional
 if with_valgrind != 'false'
   dep_valgrind = dependency('valgrind', required : with_valgrind == 'true')
   if dep_valgrind.found()
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] meson: vc4 ARMHF cross-build support

2017-11-08 Thread Eric Anholt
Timothy Arceri noted that vc4 didn't seem to have the NEON stuff
hooked up, so I worked on getting vc4 cross builds working for me
finally.  I haven't tested the result on HW quite yet.

Eric Anholt (4):
  meson: Leave dep_llvm empty if !with_llvm
  meson: Drop stale comment about making valgrind conditional.
  meson: Always link libgallium_dri.so against dep_thread.
  meson: Enable VC4's NEON assembly support.

 meson.build | 13 +++--
 src/gallium/drivers/vc4/meson.build | 13 +
 src/gallium/targets/dri/meson.build |  1 +
 3 files changed, 21 insertions(+), 6 deletions(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] meson: Leave dep_llvm empty if !with_llvm

2017-11-08 Thread Eric Anholt
The gallium auxiliary build would link against llvm, for the gallivm code
that it didn't build.  This broke the build on my armhf cross, where
libLLVM-3.9.so is not multiarch and thus points to x86-64 libs.
---
 meson.build | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 44e062e01e9f..117ed7c087f4 100644
--- a/meson.build
+++ b/meson.build
@@ -657,10 +657,11 @@ llvm_modules = ['bitwriter', 'engine', 'mcdisassembler', 
'mcjit']
 if with_amd_vk
   llvm_modules += ['amdgpu', 'bitreader', 'ipo']
 endif
-dep_llvm = dependency(
-  'llvm', version : '>= 3.9.0', required : with_amd_vk, modules : llvm_modules,
-)
+dep_llvm = []
 if with_llvm
+  dep_llvm = dependency(
+'llvm', version : '>= 3.9.0', required : with_amd_vk, modules : 
llvm_modules,
+  )
   if dep_llvm.found()
 _llvm_version = dep_llvm.version().split('.')
 # Development versions of LLVM have an 'svn' suffix, we don't want that for
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] meson: Enable VC4's NEON assembly support.

2017-11-08 Thread Eric Anholt
---
 meson.build |  5 +++--
 src/gallium/drivers/vc4/meson.build | 13 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/meson.build b/meson.build
index 0118c9a7c5ef..189c9be5b59c 100644
--- a/meson.build
+++ b/meson.build
@@ -485,8 +485,9 @@ endif
 
 # FIXME: enable asm when cross compiler
 # This is doable (autotools does it), but it's not of immediate concern
-if meson.is_cross_build()
-  message('Cross compiling, disabling asm')
+if meson.is_cross_build() and (host_machine.cpu_family() == 'x86' or
+   host_machine.cpu_family() == 'x86_64')
+  message('Cross compiling, disabling x86/x86_64 asm')
   with_asm = false
 endif
 
diff --git a/src/gallium/drivers/vc4/meson.build 
b/src/gallium/drivers/vc4/meson.build
index 38b47fbdd977..572d4b4fa9aa 100644
--- a/src/gallium/drivers/vc4/meson.build
+++ b/src/gallium/drivers/vc4/meson.build
@@ -81,6 +81,18 @@ files_libvc4 = files(
   'vc4_uniforms.c',
 )
 
+libvc4_neon = []
+if with_asm_arch == 'arm'
+  libvc4_neon = static_library(
+'vc4_neon',
+'vc4_tiling_lt_neon.c',
+include_directories : [
+  inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom
+],
+c_args : '-mfpu=neon',
+  )
+endif
+
 simpenrose_c_args = []
 dep_simpenrose = dependency('simpenrose', required : false)
 if dep_simpenrose.found()
@@ -94,6 +106,7 @@ libvc4 = static_library(
 inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
 inc_gallium_drivers, inc_drm_uapi,
   ],
+  link_with: libvc4_neon,
   c_args : [c_vis_args, simpenrose_c_args],
   cpp_args : [cpp_vis_args],
   dependencies : [dep_simpenrose, dep_libdrm, dep_valgrind],
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] meson: Always link libgallium_dri.so against dep_thread.

2017-11-08 Thread Eric Anholt
Somehow on my cross build the -pthread is getting lost.  All the other
deps seem to work out fine.
---
 src/gallium/targets/dri/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/targets/dri/meson.build 
b/src/gallium/targets/dri/meson.build
index 0ce088e1aca6..c591b75d0379 100644
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@@ -134,6 +134,7 @@ libgallium_dri = shared_library(
   ],
   dependencies : [
 gallium_dri_depends, dep_selinux, dep_expat, dep_libdrm, dep_llvm,
+dep_thread,
   ],
 )
 
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] egl/wayland: Support for KHR_partial_update

2017-11-08 Thread Harish Krupo
Hi Emil,

Emil Velikov  writes:

> On 27 October 2017 at 05:54, Harish Krupo  wrote:
>> Hi Eric,
>>
>> Eric Engestrom  writes:
>>
>>> On Monday, 2017-10-23 16:20:54 +0530, Harish Krupo wrote:
 This passes 33/37 deqp tests related to partial_update, 4 are not
 supported. Tests not supported:
 dEQP-EGL.functional.negative_partial_update.not_postable_surface
 dEQP-EGL.functional.negative_partial_update.not_current_surface
 dEQP-EGL.functional.negative_partial_update.buffer_preserved
 dEQP-EGL.functional.negative_partial_update.not_current_surface2
 Reason: No matching egl config found.

 v2: Remove unnecessary return statement. Keep function names
 consistent.  (Emil Velikov)
 Add not supported list to commit message. (Eric Engestrom)

 v3: Remove explicit with_damage variable. (Eric Engestrom)

 Signed-off-by: Harish Krupo 
 ---
  src/egl/drivers/dri2/platform_wayland.c | 54 
 ++---
  1 file changed, 36 insertions(+), 18 deletions(-)

 diff --git a/src/egl/drivers/dri2/platform_wayland.c 
 b/src/egl/drivers/dri2/platform_wayland.c
 index b38eb1c335..8846099d57 100644
 --- a/src/egl/drivers/dri2/platform_wayland.c
 +++ b/src/egl/drivers/dri2/platform_wayland.c
 @@ -790,27 +790,44 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
 return ret;
  }

 +/**
 + * Called via eglSetDamageRegionKHR(), drv->API.SetDamageRegion().
 + */
  static EGLBoolean
 -try_damage_buffer(struct dri2_egl_surface *dri2_surf,
 -  const EGLint *rects,
 -  EGLint n_rects)
 +dri2_wl_set_damage_region(_EGLDriver *drv,
 + _EGLDisplay *dpy,
 + _EGLSurface *surf,
 + const EGLint *rects,
 + EGLint n_rects)
  {
 -   if (wl_proxy_get_version((struct wl_proxy *) 
 dri2_surf->wl_surface_wrapper)
 -   < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION)
 -  return EGL_FALSE;
 +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);

 -   for (int i = 0; i < n_rects; i++) {
 -  const int *rect = [i * 4];
 +   /* The spec doesn't mention what should be returned in case of
 +* failure in setting the damage buffer with the window system, so
 +* setting the damage to maximum surface area
 +*/
 +   if (!n_rects ||
 +   wl_proxy_get_version((struct wl_proxy *) 
 dri2_surf->wl_surface_wrapper)
 +   < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION) {
 +  wl_surface_damage(dri2_surf->wl_surface_wrapper,
 +0, 0, INT32_MAX, INT32_MAX);
 +   } else {
>>>
>>> I know Emil suggested you remove the `return` in an earlier version, but
>>> if you add it back here you can drop the else, and the diff will look
>>> much cleaner, keeping only the version check getting an `|| !n_rects`
>>> and `return false` becoming `damage(everything)`.
>>>
>>> Other than that, it looks good to me. Thanks :)
>>>
>>
>> Ok, will do that change.
>> It would be something like this:
>>   if (!n_rects ||
>>wl_proxy_get_version((struct wl_proxy *) 
>> dri2_surf->wl_surface_wrapper)
>>< WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION) {
>>  wl_surface_damage(dri2_surf->wl_surface_wrapper,
>>   0, 0, INT32_MAX, INT32_MAX);
>>  if (!n_rects)
>> return EGL_TRUE;
>>
>>  return EGL_FALSE;
>>   }
>>
>> I have a small confusion though:
>> As per spec [1]:
>>   * If eglSetDamageRegionKHR has already been called on  since the
>> most recent frame boundary, an EGL_BAD_ACCESS error is generated
>>
>> The "already been called" part is confusing. Should it be interpreted
>> as already been called and the previous call returned a true value or it
>> has already been called irrespective of the previous return value?
>>
>> AFAICT from deqp [2]: it expects true on the first call, false on the
>> second and expects EGL_BAD_ACCESS (it follows the 2nd approach where
>> irrespective of the return value, calling eglSetDamageRegionKHR twice is
>> an error). But in the current implementation the SetDamageRegionCalled
>> variable will be set only when we are successful in setting the damage
>> with the window system. In my case I always get a false return value (I
>> am testing on gnome wayland). Thus it ends up not returning
>> EGL_BAD_ACCESS and the test fails.
>>
>> To avoid this problem in the previous patch I set the return value to
>> true and set the damage region to full when version doesn't match. :)
>>
>> One way to fix this would be to set SetDamageRegionCalled to true
>> irrespective of the return value.
>>
>> Is this okay? I am still trying to see if this would cause

Re: [Mesa-dev] [PATCH] meson: Add script to use VERSION file for getting version

2017-11-08 Thread Dylan Baker
Quoting Eric Engestrom (2017-11-08 12:38:26)
> 
> 
> On 8 November 2017 19:32:22 GMT, Dylan Baker  wrote:
> > Quoting Eric Engestrom (2017-11-08 04:21:41)
> > > On Wednesday, 2017-11-01 11:58:16 -0700, Dylan Baker wrote:
> > > > Meson has up until this point set it's version in the root
> > meson.build
> > > > script. While there are other build systems them creates "one more
> > > > thing" to duplicate between meson and every other build system,
> > this
> > > > script is a simple "read, strip, print" sort of deal to allow
> > meson to
> > > > read the VERSION file.
> > > > 
> > > > I chose to implement this in python since python is portable, and
> > to
> > > > keep the meson.build script clean. This is also complicated by the
> > fact
> > > > that the project() call *must* be the first non-comment,non-blank
> > in the
> > > > toplevel meson.build script.
> > > > 
> > > > Signed-off-by: Dylan Baker 
> > > > ---
> > > >  meson.build  |  2 +-
> > > >  scripts/meson_get_version.py | 35
> > +++
> > > >  2 files changed, 36 insertions(+), 1 deletion(-)
> > > >  create mode 100644 scripts/meson_get_version.py
> > > > 
> > > > diff --git a/meson.build b/meson.build
> > > > index 6ad8c8bbf4b..3f77380f7df 100644
> > > > --- a/meson.build
> > > > +++ b/meson.build
> > > > @@ -21,7 +21,7 @@
> > > >  project(
> > > >'mesa',
> > > >['c', 'cpp'],
> > > > -  version : '17.3.0-devel',
> > > > +  version :
> > run_command(find_program('scripts/meson_get_version.py')).stdout(),
> > 
> > And actually, thinking about windows, this should be:
> > run_command(
> > [find_program('python', 'python2', 'python3'),
> > 'bin/meson_get_version.py']
> > ).stdout(),
> > 
> > Since windows doesn't support shabangs link nix does.
> 
> Indeed; r-b still stands with this change :)
> 
> Speaking of, did you manage to get mesa building with meson on windows?

Not yet, it's on my list of things to do soon, but I'd like to get as much of
the Linux/unix-like stuff building first as possible. macOS is my first
adventure outside of the comfortable Linux/BSD world.


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json

2017-11-08 Thread Jason Ekstrand
On Wed, Nov 8, 2017 at 1:40 PM, Chad Versace 
wrote:

> On Tue 07 Nov 2017, Dylan Baker wrote:
> > Quoting Eric Engestrom (2017-11-07 07:25:53)
> > > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote:
> > > > I tested this in a setup where the builddir was outside of the
> srcdir.
> > > > ---
> > > >  src/intel/vulkan/meson.build | 12 
> > > >  1 file changed, 12 insertions(+)
> > > >
> > > > diff --git a/src/intel/vulkan/meson.build
> b/src/intel/vulkan/meson.build
> > > > index ff24e304ef5..e8b7f407507 100644
> > > > --- a/src/intel/vulkan/meson.build
> > > > +++ b/src/intel/vulkan/meson.build
> > > > @@ -48,6 +48,18 @@ intel_icd = custom_target(
> > > >install : true,
> > > >  )
> > > >
> > > > +dev_icd = custom_target(
> > > > +  'dev_icd',
> > > > +  input : 'anv_icd.py',
> > > > +  output : 'dev_icd.@0@.json'.format(target_machine.cpu()),
> > >
> > > Strictly speaking, shouldn't that be `host_machine` [1] ?
> > > I don't see how one would do a canadian build of mesa though, so
> > > host == target should always be true.
> >
> > That's my fault. There are (or were) a number of cases where I used
> target
> > instead of host, that can also be a follow up.
> >
> > In any case:
> > Acked-by: Dylan Baker 
>
> I build Mesa (with autotools) where host == x86_64 but target == armv7a.
>

You're using dev_icd with a cross-compile?  Yikes!  I mean, it can work,
but that's not what I would have expected.  Also, why are you building anv
and targetting armv7a  Does that even work?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH v1 00/30] anv: dma-buf and DRM format modifiers

2017-11-08 Thread Chad Versace
On Tue 07 Nov 2017, Jason Ekstrand wrote:
> All of the pre-work patches have been reviewed by myself and Lionel.  I've 
> also
> read through the rest of the series and things look pretty good to me.  I did
> make some scattered comments but they shouldn't be a big deal.
> 
> My primary concern with the series is the lack of CCS support.  Getting that
> working correctly is clearly the biggest question mark in all of modifiers so
> I'm hesitant to pass judgment on this as a patch series (I think the spec is
> ok) with that piece still missing.

That's reasonable. I began implementing CCS support, but removed it from
the series due to lack of tests. Any CCS code I write it, I need to test
it by sharing the VkImage with KMS before I feel confident in it. I'll
do that and resend.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Adding support for EXT_sRGB for Opengl ES

2017-11-08 Thread Harish Krupo
Hi Emil,
Emil Velikov  writes:

> Hi Harish,
>
> This seems to have fallen through the cracks, right?

Thanks for bringing this up again :)

> Keep in mind that I've not checked all the existing code paths - just
> skimming through the patch itself.
>
> s/Adding support for EXT_sRGB for Opengl ES/mesa: add support for GL_EXT_sRGB/
>

This was my first patch in mesa, so the rookie mistakes. :)

> On 7 April 2017 at 05:20, Harish Krupo  wrote:
>> This addes support for the GL_EXT_sRGB extension for OpengGL ES 1.0 and 
>> above.
>> With this patch this test passes in dEQP:
>> dEQP-GLES2.capability.extensions.uncompressed_texture_formats.GL_EXT_sRGB
>>
> Having a more comprehensive list [as mentioned by Tapani] would be great here.
> X tests, A pass, B fail, C not supported.
>
>> Signed-off-by: Harish Krupo 
>> ---
>>  src/mapi/glapi/gen/es_EXT.xml| 8 
>>  src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
>>  src/mesa/main/extensions_table.h | 1 +
>>  src/mesa/main/fbobject.c | 2 +-
>>  src/mesa/main/genmipmap.c| 3 +++
>>  src/mesa/main/mtypes.h   | 1 +
>>  6 files changed, 15 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml
>> index 3e705eb409..a6fd7c755a 100644
>> --- a/src/mapi/glapi/gen/es_EXT.xml
>> +++ b/src/mapi/glapi/gen/es_EXT.xml
>> @@ -795,6 +795,14 @@
>>  
>>  
>>
>> +
>> +
> Extension number seems to be 105
>
>
>> --- a/src/mesa/main/genmipmap.c
>> +++ b/src/mesa/main/genmipmap.c
>> @@ -96,6 +96,9 @@ 
>> _mesa_is_valid_generate_texture_mipmap_internalformat(struct gl_context *ctx,
>>   (_mesa_is_es3_color_renderable(internalformat) &&
>>_mesa_is_es3_texture_filterable(ctx, internalformat));
>> }
>> +   else if (!_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_sRGB && 
>> (internalformat == GL_SRGB)) {
>> +  return GL_INVALID_OPERATION;
>> +   }
> I'm not sure how this would trigger - the function returns bool, thus
> the GL_INVALID_OPERATION will be promoted to true.
>

Yes, this is wrong, will look into it.
IIRC, there was some other bigger issue with this extension due to which
I was not able to proceed. I will gather all the related information and
will ask here. I am sure somebody will be able to help.

Thank you

Regards
Harish Krupo

> Hence the caller (generate_texture_mipmap) will continue instead of
> flagging an error.
>
> HTH
> Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json

2017-11-08 Thread Chad Versace
On Tue 07 Nov 2017, Dylan Baker wrote:
> Quoting Eric Engestrom (2017-11-07 07:25:53)
> > On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote:
> > > I tested this in a setup where the builddir was outside of the srcdir.
> > > ---
> > >  src/intel/vulkan/meson.build | 12 
> > >  1 file changed, 12 insertions(+)
> > > 
> > > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
> > > index ff24e304ef5..e8b7f407507 100644
> > > --- a/src/intel/vulkan/meson.build
> > > +++ b/src/intel/vulkan/meson.build
> > > @@ -48,6 +48,18 @@ intel_icd = custom_target(
> > >install : true,
> > >  )
> > >  
> > > +dev_icd = custom_target(
> > > +  'dev_icd',
> > > +  input : 'anv_icd.py',
> > > +  output : 'dev_icd.@0@.json'.format(target_machine.cpu()),
> > 
> > Strictly speaking, shouldn't that be `host_machine` [1] ?
> > I don't see how one would do a canadian build of mesa though, so
> > host == target should always be true.
> 
> That's my fault. There are (or were) a number of cases where I used target
> instead of host, that can also be a follow up.
> 
> In any case:
> Acked-by: Dylan Baker 

I build Mesa (with autotools) where host == x86_64 but target == armv7a.

The icd filename should have the same architecture as the driver it
loads, and that's the target_machine. You never need to access the
dev_icd.*.json on the host machine (that is, unless your target machine
and host machine are the same machine).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] meson: build gallium-xlib based glx

2017-11-08 Thread Eric Anholt
Dylan Baker  writes:

> Signed-off-by: Dylan Baker 
> ---
>  meson.build | 16 +++---
>  src/gallium/meson.build | 11 +++-
>  src/gallium/state_trackers/glx/xlib/meson.build | 27 ++
>  src/gallium/targets/libgl-xlib/meson.build  | 68 
> +
>  src/gallium/winsys/sw/xlib/meson.build  | 27 ++
>  5 files changed, 139 insertions(+), 10 deletions(-)
>  create mode 100644 src/gallium/state_trackers/glx/xlib/meson.build
>  create mode 100644 src/gallium/targets/libgl-xlib/meson.build
>  create mode 100644 src/gallium/winsys/sw/xlib/meson.build
>

> diff --git a/src/gallium/targets/libgl-xlib/meson.build 
> b/src/gallium/targets/libgl-xlib/meson.build
> new file mode 100644
> index 000..825b7b4a0f1
> --- /dev/null
> +++ b/src/gallium/targets/libgl-xlib/meson.build
> @@ -0,0 +1,68 @@
> +# Copyright © 2017 Intel Corporation
> +
> +# Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> +# of this software and associated documentation files (the "Software"), to 
> deal
> +# in the Software without restriction, including without limitation the 
> rights
> +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +# copies of the Software, and to permit persons to whom the Software is
> +# furnished to do so, subject to the following conditions:
> +
> +# The above copyright notice and this permission notice shall be included in
> +# all copies or substantial portions of the Software.
> +
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
> +# SOFTWARE.
> +
> +# TODO: support non-static targets
> +# Static targets are always enabled in autotools (unless you modify
> +# configure.ac)
> +
> +gallium_xlib_c_args = [
> +  '-DGALLIUM_SOFTPIPE',
> +  '-DGALLIUM_RBUG',
> +  '-DGALLIUME_TRACE',
> +]
> +gallium_xlib_ld_args = []
> +gallium_xlib_link_with = []
> +gallium_xlib_depends = []
> +gallium_xlib_link_depends = []
> +gallium_xlib_xlibvers = []
> +gallium_xlib_sources = []
> +
> +if with_ld_version_script
> +  gallium_xlib_ld_args += ['-Wl,--version-script', 
> join_paths(meson.current_source_dir(), 'libgl-xlib.sym')]
> +  gallium_xlib_link_depends += files('libgl-xlib.sym')
> +endif
> +if with_shared_glapi
> +  gallium_xlib_link_with += libglapi
> +endif
> +if with_llvm
> +  gallium_xlib_c_args += '-DGALLIUM_LLVMPIPE'
> +  gallium_xlib_depends += dep_llvm
> +  gallium_xlib_link_with += libllvmpipe
> +endif
> +# TODO: SWR
> +
> +libgl = shared_library(
> +  'GL',
> +  files('xlib.c'),
> +  include_directories : [
> +inc_common, inc_gallium_winsys, inc_gallium_drivers,
> +include_directories('../../state_trackers/glx/xlib'),
> +  ],
> +  c_args : [c_vis_args, gallium_xlib_c_args],
> +  link_args : [ld_args_bsymbolic, ld_args_gc_sections, gallium_xlib_ld_args],
> +  link_depends : gallium_xlib_link_depends,
> +  link_with : [
> +libxlib, libws_xlib, libsoftpipe, libtrace, librbug, libglapi_static,
> +libgallium, libmesa_util, libmesa_gallium, gallium_xlib_link_with,
> +  ],
> +  dependencies : [dep_clock, dep_unwind, gallium_xlib_depends],

We shouldn't have to manually specify most of these deps, I think, since
they should be transitively pulled in by the static libraries using
them, right?  It's fine either way, though.

> +  install : true,
> +  version : '1.5.0',

Looks like this drops the MESA_MAJOR/MINOR/TINY version handling of the
automake version.  Other than this, and needing the build fix in patch
3, the series is:

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103586

--- Comment #7 from Jan Vesely  ---
Created attachment 135318
  --> https://bugs.freedesktop.org/attachment.cgi?id=135318=edit
annotated asm dump

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103586

--- Comment #6 from Dave Gilbert  ---
(In reply to Jan Vesely from comment #5)
> (In reply to Dave Gilbert from comment #4)
> > Created attachment 135313 [details]
> > foo.link-0.ll
> > 
> > That's all 3 of the debug files it produced.
> > (I wasn't sure which were the llvm and which the isa dumps; I guess the asm
> > is the isa? and the ll's are both llvm dumps?)
> 
> yes. the first .ll is from compilation step, the other one is from linking
> step.
> 
> .ll dump looks correct.
> .asm also looks correct.
> 
> you can try producing multiple asm dumps for working and non-working runs.
> But I don't think that the llvm is the culprit here.
> 
> Can you try waiting for the kernel execution to complete explicitly before
> mapping the buffer?
> Ideally call clFinish() on line 63.

Since I'm on the C++ binding (probably a mistake) I used:
  queue.finish();

and it seems to be working.

(This also corresponds possibly to what I'm seeing on a more complex kernel;
with a more complex kernel I'm seeing on a whole pile of data on the last few Z
slices as being bogus suggesting it's not finished).

Dave

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103586] OpenCL/Clover: AMD Turks: corrupt output buffer (depending on dimension order?)

2017-11-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103586

--- Comment #5 from Jan Vesely  ---
(In reply to Dave Gilbert from comment #4)
> Created attachment 135313 [details]
> foo.link-0.ll
> 
> That's all 3 of the debug files it produced.
> (I wasn't sure which were the llvm and which the isa dumps; I guess the asm
> is the isa? and the ll's are both llvm dumps?)

yes. the first .ll is from compilation step, the other one is from linking
step.

.ll dump looks correct.
.asm also looks correct.

you can try producing multiple asm dumps for working and non-working runs. But
I don't think that the llvm is the culprit here.

Can you try waiting for the kernel execution to complete explicitly before
mapping the buffer?
Ideally call clFinish() on line 63.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: Add script to use VERSION file for getting version

2017-11-08 Thread Eric Engestrom


On 8 November 2017 19:32:22 GMT, Dylan Baker  wrote:
> Quoting Eric Engestrom (2017-11-08 04:21:41)
> > On Wednesday, 2017-11-01 11:58:16 -0700, Dylan Baker wrote:
> > > Meson has up until this point set it's version in the root
> meson.build
> > > script. While there are other build systems them creates "one more
> > > thing" to duplicate between meson and every other build system,
> this
> > > script is a simple "read, strip, print" sort of deal to allow
> meson to
> > > read the VERSION file.
> > > 
> > > I chose to implement this in python since python is portable, and
> to
> > > keep the meson.build script clean. This is also complicated by the
> fact
> > > that the project() call *must* be the first non-comment,non-blank
> in the
> > > toplevel meson.build script.
> > > 
> > > Signed-off-by: Dylan Baker 
> > > ---
> > >  meson.build  |  2 +-
> > >  scripts/meson_get_version.py | 35
> +++
> > >  2 files changed, 36 insertions(+), 1 deletion(-)
> > >  create mode 100644 scripts/meson_get_version.py
> > > 
> > > diff --git a/meson.build b/meson.build
> > > index 6ad8c8bbf4b..3f77380f7df 100644
> > > --- a/meson.build
> > > +++ b/meson.build
> > > @@ -21,7 +21,7 @@
> > >  project(
> > >'mesa',
> > >['c', 'cpp'],
> > > -  version : '17.3.0-devel',
> > > +  version :
> run_command(find_program('scripts/meson_get_version.py')).stdout(),
> 
> And actually, thinking about windows, this should be:
> run_command(
> [find_program('python', 'python2', 'python3'),
> 'bin/meson_get_version.py']
> ).stdout(),
> 
> Since windows doesn't support shabangs link nix does.

Indeed; r-b still stands with this change :)

Speaking of, did you manage to get mesa building with meson on windows?

> 
> > >license : 'MIT',
> > >meson_version : '>= 0.42',
> > >default_options : ['c_std=c99', 'cpp_std=c++11']
> > > diff --git a/scripts/meson_get_version.py
> b/scripts/meson_get_version.py
> > > new file mode 100644
> > > index 000..a221e26f250
> > > --- /dev/null
> > > +++ b/scripts/meson_get_version.py
> > 
> > get_reviewers.pl is the only script in `scripts/`, everything else
> is in
> > `bin/`. I would suggest putting your script in `bin/` for now, we
> might
> > merge `bin/` into `scripts/` later.
> > 
> > > @@ -0,0 +1,35 @@
> > > +#!/usr/bin/env python
> > > +# encoding=utf-8
> > > +# Copyright © 2017 Intel Corporation
> > > +
> > > +# Permission is hereby granted, free of charge, to any person
> obtaining a copy
> > > +# of this software and associated documentation files (the
> "Software"), to deal
> > > +# in the Software without restriction, including without
> limitation the rights
> > > +# to use, copy, modify, merge, publish, distribute, sublicense,
> and/or sell
> > > +# copies of the Software, and to permit persons to whom the
> Software is
> > > +# furnished to do so, subject to the following conditions:
> > > +
> > > +# The above copyright notice and this permission notice shall be
> included in
> > > +# all copies or substantial portions of the Software.
> > > +
> > > +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > > +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > > +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
> EVENT SHALL THE
> > > +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> > > +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING FROM,
> > > +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS IN THE
> > > +# SOFTWARE.
> > > +
> > > +from __future__ import print_function
> > > +import os
> > > +
> > > +
> > > +def main():
> > > +filename = os.path.join(os.environ['MESON_SOURCE_ROOT'],
> 'VERSION')
> > > +with open(filename) as f:
> > > +version = f.read().strip()
> > > +print(version, end='')
> > > +
> > > +
> > > +if __name__ == '__main__':
> > > +main()
> > 
> > Seems like overkill, but why not. Change `main()` to
> `print_mesa_version()`
> > or something though, if the idea is to have it available as a
> module?
> > 
> > Reviewed-by: Eric Engestrom 
> > 
> > > -- 
> > > 2.14.3
> > > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/18] radeon/vcn: add vcn encode interface

2017-11-08 Thread Christian König

Am 08.11.2017 um 19:08 schrieb boyuan.zh...@amd.com:

From: Boyuan Zhang 

Add a new header file for vcn encode interface

Signed-off-by: Boyuan Zhang 


Only briefly skimmed over it, but what I saw looks mostly sane.

Maybe nice to have is to have the code for encoding of the SPS/PPS and 
slice header not in the driver, but in some helpers in /src/gallium/vl. 
But that is only a nice to have feature.


Apart from that the series is Acked-by: Christian König 
.


Regards,
Christian.


---
  src/gallium/drivers/radeon/radeon_vcn_enc.h | 325 
  1 file changed, 325 insertions(+)
  create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.h

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h 
b/src/gallium/drivers/radeon/radeon_vcn_enc.h
new file mode 100644
index 000..f9fa168
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
@@ -0,0 +1,325 @@
+/**
+ *
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#ifndef _RADEON_VCN_ENC_H
+#define _RADEON_VCN_ENC_H
+
+#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1
+#define RENCODE_FW_INTERFACE_MINOR_VERSION 2
+
+#define RENCODE_IB_PARAM_SESSION_INFO  0x0001
+#define RENCODE_IB_PARAM_TASK_INFO 0x0002
+#define RENCODE_IB_PARAM_SESSION_INIT  0x0003
+#define RENCODE_IB_PARAM_LAYER_CONTROL 0x0004
+#define RENCODE_IB_PARAM_LAYER_SELECT  0x0005
+#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x0006
+#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT   0x0007
+#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE  0x0008
+#define RENCODE_IB_PARAM_QUALITY_PARAMS0x0009
+#define RENCODE_IB_PARAM_SLICE_HEADER  0x000a
+#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x000b
+#define RENCODE_IB_PARAM_INTRA_REFRESH 0x000c
+#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x000d
+#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER0x000e
+#define RENCODE_IB_PARAM_FEEDBACK_BUFFER   0x0010
+#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU0x0020
+
+#define RENCODE_H264_IB_PARAM_SLICE_CONTROL0x0021
+#define RENCODE_H264_IB_PARAM_SPEC_MISC0x0022
+#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS0x0023
+#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER0x0024
+
+#define RENCODE_IB_OP_INITIALIZE   0x0101
+#define RENCODE_IB_OP_CLOSE_SESSION0x0102
+#define RENCODE_IB_OP_ENCODE   0x0103
+#define RENCODE_IB_OP_INIT_RC  0x0104
+#define RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x0105
+#define RENCODE_IB_OP_SET_SPEED_ENCODING_MODE  0x0106
+#define RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE0x0107
+#define RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE0x0108
+
+#define RENCODE_IF_MAJOR_VERSION_MASK  0x
+#define RENCODE_IF_MAJOR_VERSION_SHIFT 16
+#define RENCODE_IF_MINOR_VERSION_MASK  0x
+#define RENCODE_IF_MINOR_VERSION_SHIFT 0
+
+#define RENCODE_ENCODE_STANDARD_H264   1
+
+#define 

Re: [Mesa-dev] [PATCH] docs: add documentation for building with meson

2017-11-08 Thread Dylan Baker
Quoting Ian Romanick (2017-11-08 11:05:24)
> On 11/08/2017 10:59 AM, Ian Romanick wrote:
> > Is there a way to get a list of options before having any success?  I
> > want to disable using LLVM, but I can't get the list of options to do so
> > because I don't have libelf (required for LLVM... which I don't want):
> > 
> > Dependency libelf found: NO
> > 
> > Meson encountered an error in file meson.build, line 628, column 2:
> > C library 'elf' not found
> 
> I guess the answer is 'less meson-options.txt'. :)  Should probably
> document that for us n00bs.

That's in the v5 :)

> 
> > On 11/07/2017 09:28 AM, Dylan Baker wrote:
> >> v2: - Add information about CC, CXX, CFLAGS, and CXXFLAGS (Nicolai)
> >> - Add message at top that meson for mesa is still a work in progress
> >> - Add trailing "/" to directories (Eric E.)
> >> - Fix a number of spelling/grammar/style suggestions from Eric E.
> >> - Make a number of changes as suggested by Emil.
> >> v3: - Fix order of commands in example (Eric E.)
> >> - Add documentation for overriding LLVM version (Eric E.)
> >> v4: - Rebase on master
> >> - update default buildtype
> >> - add note about b_ndebug
> >> - Clarify meson configure a bit
> >>
> >> Signed-off-by: Dylan Baker 
> >> Reviewed-by: Eric Engestrom  (v3)
> >> ---
> >>  docs/contents.html |   1 +
> >>  docs/meson.html| 151 
> >> +
> >>  2 files changed, 152 insertions(+)
> >>  create mode 100644 docs/meson.html
> >>
> >> diff --git a/docs/contents.html b/docs/contents.html
> >> index d5455421091..9a86019e2f6 100644
> >> --- a/docs/contents.html
> >> +++ b/docs/contents.html
> >> @@ -43,6 +43,7 @@
> >>  Compiling / Installing
> >>
> >>  Autoconf
> >> +Meson
> >>
> >>  
> >>  Precompiled Libraries
> >> diff --git a/docs/meson.html b/docs/meson.html
> >> new file mode 100644
> >> index 000..ee505b1d5ee
> >> --- /dev/null
> >> +++ b/docs/meson.html
> >> @@ -0,0 +1,151 @@
> >> + >> "http://www.w3.org/TR/html4/loose.dtd;>
> >> +
> >> +
> >> +  
> >> +  Compilation and Installation using Meson
> >> +  
> >> +
> >> +
> >> +
> >> +
> >> +  The Mesa 3D Graphics Library
> >> +
> >> +
> >> +
> >> +
> >> +
> >> +Compilation and Installation using Meson
> >> +
> >> +1. Basic Usage
> >> +
> >> +The Meson build system for Mesa is still under active 
> >> development,
> >> +and should not be used in production environments.
> >> +
> >> +The meson build is currently only tested on linux, and is known to not 
> >> work
> >> +on macOS, Windows, and haiku. This will be fixed.
> >> +
> >> +
> >> +The meson program is used to configure the source directory and generates
> >> +either a ninja build file, or Visual Studio® build files. The latter, and 
> >> must
> >> +be enabled via the --backend switch, as ninja is always the default. 
> >> Meson only
> >> +supports out-of-tree builds, and must be passed a directory to put built 
> >> and
> >> +generated sources into. We'll call that directory "build" for examples.
> >> +
> >> +
> >> +
> >> +meson build/
> >> +
> >> +
> >> +
> >> +To see a description of your options you can run "meson configure" along 
> >> with a
> >> +build directory to view the selected options for. This will show your 
> >> meson
> >> +global arguments and project arguments, along with their defaults and your
> >> +local settings.
> >> +
> >> +
> >> +
> >> +meson configure build/
> >> +
> >> +
> >> +
> >> +With additional arguments "meson configure" is used to change options on
> >> +already configured build directory. All options passed to this command 
> >> are in
> >> +the form -D"command"="value".
> >> +
> >> +
> >> +
> >> +meson configure build/ -Dprefix=/tmp/install -Dglx=true
> >> +
> >> +
> >> +
> >> +Once you've run meson successfully you can use your configured backend to 
> >> build
> >> +the project. With ninja, the -C option can be be used to point at a 
> >> directory
> >> +to build.
> >> +
> >> +
> >> +
> >> +ninja -C build/
> >> +
> >> +
> >> +
> >> +Without arguments, it will produce libGL.so and/or several other libraries
> >> +depending on the options you have chosen. Later, if you want to rebuild 
> >> for a
> >> +different configuration, you should run ninja clean before
> >> +rebuilding, or create a new out of tree build directory (meson supports an
> >> +unlimited number of them) for each configuration you want to build.
> >> +
> >> +
> >> +CC, CFLAGS, CXX, CXXFLAGS
> >> +These environment variables
> >> +control the C and C++ compilers used during the build. The default 
> >> compilers
> >> +depends on your operating system. Meson supports most of the popular 
> >> compilers,
> >> +a complete list is available
> >> + >> href="http://mesonbuild.com/Reference-tables.html#compiler-ids;>here.
> >> +
> >> +These arguments are consumed and stored by meson when it is initialized or
> >> +re-initialized. Therefore passing 

  1   2   >