date:20170823

Re: [Mesa-dev] [PATCH 4/7] i965: Pass fb into emit_null_surface instead of dimensions.

2017-08-23 Thread Pohjolainen, Topi

On Tue, Aug 22, 2017 at 01:57:58PM -0700, Kenneth Graunke wrote:
> We either want the framebuffer dimensions or 1x1x1.  Passing fb and
> falling back to 1x1x1 lets us shorten some calls.
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 28 
> ++--
>  1 file changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 2d7de54dcdb..a0cb566e719 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -824,9 +824,7 @@ const struct brw_tracked_state brw_wm_pull_constants = {
>   */
>  static void
>  emit_null_surface_state(struct brw_context *brw,
> -unsigned width,
> -unsigned height,
> -unsigned samples,
> +const struct gl_framebuffer *fb,
>  uint32_t *out_offset)
>  {
> uint32_t *surf = brw_state_batch(brw,
> @@ -834,6 +832,11 @@ emit_null_surface_state(struct brw_context *brw,
>  brw->isl_dev.ss.align,
>  out_offset);
>  
> +   /* Use the fb dimensions or 1x1x1 */
> +   unsigned width   = fb ? _mesa_geometric_width(fb)   : 1;
> +   unsigned height  = fb ? _mesa_geometric_height(fb)  : 1;
> +   unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;

These could be const.

> +
> if (brw->gen != 6 || samples <= 1) {
>isl_null_fill_state(&brw->isl_dev, surf,
>isl_extent3d(width, height, 1));
> @@ -992,9 +995,6 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw,
>   uint32_t *surf_offset)
>  {
> GLuint i;
> -   const unsigned int w = _mesa_geometric_width(fb);
> -   const unsigned int h = _mesa_geometric_height(fb);
> -   const unsigned int s = _mesa_geometric_samples(fb);
>  
> /* Update surfaces for drawing buffers */
> if (fb->_NumColorDrawBuffers >= 1) {
> @@ -1007,12 +1007,12 @@ brw_update_renderbuffer_surfaces(struct brw_context 
> *brw,
> gen6_update_renderbuffer_surface(brw, rb, i, surf_index) :
> gen4_update_renderbuffer_surface(brw, rb, i, surf_index);
>} else {
> -emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]);
> +emit_null_surface_state(brw, fb, &surf_offset[surf_index]);
>}
>}
> } else {
>const uint32_t surf_index = render_target_start;
> -  emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]);
> +  emit_null_surface_state(brw, fb, &surf_offset[surf_index]);
> }
>  }
>  
> @@ -1117,11 +1117,7 @@ update_renderbuffer_read_surfaces(struct brw_context 
> *brw)
> 0);
>  
>   } else {
> -emit_null_surface_state(brw,
> -_mesa_geometric_width(fb),
> -_mesa_geometric_height(fb),
> -_mesa_geometric_samples(fb),
> -surf_offset);
> +emit_null_surface_state(brw, fb, surf_offset);
>   }
>}
>  
> @@ -1294,7 +1290,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct 
> gl_program *prog,
>   &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
>  
>if (binding->BufferObject == ctx->Shared->NullBufferObj) {
> - emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
> + emit_null_surface_state(brw, NULL, &ubo_surf_offsets[i]);
>} else {
>   struct intel_buffer_object *intel_bo =
>  intel_buffer_object(binding->BufferObject);
> @@ -1319,7 +1315,7 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct 
> gl_program *prog,
>   
> &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
>  
>if (binding->BufferObject == ctx->Shared->NullBufferObj) {
> - emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
> + emit_null_surface_state(brw, NULL, &ssbo_surf_offsets[i]);
>} else {
>   struct intel_buffer_object *intel_bo =
>  intel_buffer_object(binding->BufferObject);
> @@ -1611,7 +1607,7 @@ update_image_surface(struct brw_context *brw,
>}
>  
> } else {
> -  emit_null_surface_state(brw, 1, 1, 1, surf_offset);
> +  emit_null_surface_state(brw, NULL, surf_offset);
>update_default_image_param(brw, u, surface_idx, param);
> }
>  }
> -- 
> 2.14.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/7] i965: Stop using wm_prog_data->binding_table.render_target_start.

2017-08-23 Thread Pohjolainen, Topi


Just for future reference, we are addessing common code in src/intel/compiler.
Do we still want to use i965 prefix?

On Tue, Aug 22, 2017 at 01:58:01PM -0700, Kenneth Graunke wrote:
> Render target surfaces always start at binding table index 0.
> This is required for us to use headerless FB writes, which we
> really want to do.  So, we'll never change that.
> 
> Given that, it's not necessary to look up a wm_prog_data field
> which we already know contains 0.  We can drop the dependency in
> brw_renderbuffer_surfaces (Gen4-5)...which was already confusingly
> missing from gen6_renderbuffer_surfaces.
> ---
>  src/intel/compiler/brw_fs_generator.cpp  |  9 +++--
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 10 +++---
>  2 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 2ade486705b..c101c4696ef 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -277,8 +277,13 @@ fs_generator::fire_fb_write(fs_inst *inst,
> else
>msg_control = 
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
>  
> -   uint32_t surf_index =
> -  prog_data->binding_table.render_target_start + inst->target;
> +   /* We assume render targets start at 0, because headerless FB write
> +* messages set "Render Target Index" to 0.  Using a different binding
> +* table index would make it impossible to use headerless messages.
> +*/
> +   assert(prog_data->binding_table.render_target_start == 0);
> +
> +   uint32_t surf_index = inst->target;

Could be const.

There was similar nit in another patch, either way, really nice clean-up and
series:

Reviewed-by: Topi Pohjolainen 

>  
> bool last_render_target = inst->eot ||
>   (prog_data->dual_src_blend && dispatch_width == 
> 16);
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 5cfdbe58102..8c901df8e97 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -990,14 +990,11 @@ update_renderbuffer_surfaces(struct brw_context *brw)
>  {
> const struct gl_context *ctx = &brw->ctx;
>  
> -   /* BRW_NEW_FS_PROG_DATA */
> -   const struct brw_wm_prog_data *wm_prog_data =
> -  brw_wm_prog_data(brw->wm.base.prog_data);
> -
> /* _NEW_BUFFERS | _NEW_COLOR */
> const struct gl_framebuffer *fb = ctx->DrawBuffer;
>  
> -   const unsigned rt_start = wm_prog_data->binding_table.render_target_start;
> +   /* Render targets always start at binding table index 0. */
> +   const unsigned rt_start = 0;
>  
> uint32_t *surf_offsets = brw->wm.base.surf_offset;
>  
> @@ -1025,8 +1022,7 @@ const struct brw_tracked_state 
> brw_renderbuffer_surfaces = {
> .dirty = {
>.mesa = _NEW_BUFFERS |
>_NEW_COLOR,
> -  .brw = BRW_NEW_BATCH |
> - BRW_NEW_FS_PROG_DATA,
> +  .brw = BRW_NEW_BATCH,
> },
> .emit = update_renderbuffer_surfaces,
>  };
> -- 
> 2.14.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 6/6] radeonsi: try to re-use previously deleted bindless descriptor slots

2017-08-23 Thread Samuel Pitoiset




On 08/23/2017 07:30 AM, Timothy Arceri wrote:

This is causing piglit regressions for me. For example:

./bin/shader_runner 
tests/spec/arb_bindless_texture/execution/images/multiple-resident-images-reading.shader_test 
-auto -fb


Unexpected GL error: GL_OUT_OF_MEMORY 0x505
(Error at 
/home/tarceri/git/Mesa_arrays_of_arrays_piglit/tests/shaders/shader_runner.c:3560) 


glMakeImageHandleResidentARB error
PIGLIT: {"result": "fail" }


Mmh, you are right, but I can only reproduce with a non-debug build, weird.




On 22/08/17 07:22, Marek Olšák wrote:

Reviewed-by: Marek Olšák 

So let's commit this!

Marek

On Mon, Aug 21, 2017 at 4:50 PM, Samuel Pitoiset
 wrote:

Currently, when the array is full it is resized but it can grow
over and over because we don't try to re-use descriptor slots.

v4: - rebase on top of idalloc changes
v3: - use new idalloc gallium module

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/radeonsi/si_descriptors.c | 36 
+--

  src/gallium/drivers/radeonsi/si_pipe.h    |  2 ++
  2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c

index f14fce103f..c869dac9bb 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -61,6 +61,7 @@
  #include "gfx9d.h"

  #include "util/hash_table.h"
+#include "util/u_idalloc.h"
  #include "util/u_format.h"
  #include "util/u_memory.h"
  #include "util/u_upload_mgr.h"
@@ -2335,23 +2336,27 @@ static void 
si_init_bindless_descriptors(struct si_context *sctx,

  * considered to be a valid handle.
  */
 sctx->num_bindless_descriptors = 1;
+
+   /* Track which bindless slots are used (or not). */
+   util_idalloc_init(&sctx->bindless_used_slots);
+   util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
+
+   /* Reserve slot 0 because it's an invalid handle for 
bindless. */

+   assert(!util_idalloc_alloc(&sctx->bindless_used_slots));
  }

  static void si_release_bindless_descriptors(struct si_context *sctx)
  {
 si_release_descriptors(&sctx->bindless_descriptors);
+   util_idalloc_fini(&sctx->bindless_used_slots);
  }

-static unsigned
-si_create_bindless_descriptor(struct si_context *sctx, uint32_t 
*desc_list,

- unsigned size)
+static unsigned si_get_first_free_bindless_slot(struct si_context 
*sctx)

  {
 struct si_descriptors *desc = &sctx->bindless_descriptors;
-   unsigned desc_slot, desc_slot_offset;
-
-   /* Reserve a new slot for this bindless descriptor. */
-   desc_slot = sctx->num_bindless_descriptors++;
+   unsigned desc_slot;

+   desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
 if (desc_slot >= desc->num_elements) {
 /* The array of bindless descriptors is full, resize 
it. */

 unsigned slot_size = desc->element_dw_size * 4;
@@ -2363,6 +2368,20 @@ si_create_bindless_descriptor(struct 
si_context *sctx, uint32_t *desc_list,

 desc->num_active_slots = new_num_elements;
 }

+   assert(desc_slot);
+   return desc_slot;
+}
+
+static unsigned
+si_create_bindless_descriptor(struct si_context *sctx, uint32_t 
*desc_list,

+ unsigned size)
+{
+   struct si_descriptors *desc = &sctx->bindless_descriptors;
+   unsigned desc_slot, desc_slot_offset;
+
+   /* Find a free slot. */
+   desc_slot = si_get_first_free_bindless_slot(sctx);
+
 /* For simplicity, sampler and image bindless descriptors 
use fixed
  * 16-dword slots for now. Image descriptors only need 
8-dword but this
  * doesn't really matter because no real apps use image 
handles.
@@ -2475,6 +2494,9 @@ static void si_delete_texture_handle(struct 
pipe_context *ctx, uint64_t handle)


 tex_handle = (struct si_texture_handle *)entry->data;

+   /* Allow this descriptor slot to be re-used. */
+   util_idalloc_free(&sctx->bindless_used_slots, 
tex_handle->desc_slot);

+
 pipe_sampler_view_reference(&tex_handle->view, NULL);
 _mesa_hash_table_remove(sctx->tex_handles, entry);
 FREE(tex_handle);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h

index 56c3b08188..8a475d3b05 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -29,6 +29,7 @@
  #include "si_shader.h"

  #include "util/u_dynarray.h"
+#include "util/u_idalloc.h"

  #ifdef PIPE_ARCH_BIG_ENDIAN
  #define SI_BIG_ENDIAN 1
@@ -430,6 +431,7 @@ struct si_context {

 /* Bindless descriptors. */
 struct si_descriptors   bindless_descriptors;
+   struct util_idalloc bindless_used_slots;
 unsigned    num_bindless_descriptors;
 bool    bindless_descriptors_dirty;
 bool

Re: [Mesa-dev] [PATCH v4 6/6] radeonsi: try to re-use previously deleted bindless descriptor slots

2017-08-23 Thread Samuel Pitoiset




On 08/23/2017 09:34 AM, Samuel Pitoiset wrote:



On 08/23/2017 07:30 AM, Timothy Arceri wrote:

This is causing piglit regressions for me. For example:

./bin/shader_runner 
tests/spec/arb_bindless_texture/execution/images/multiple-resident-images-reading.shader_test 
-auto -fb


Unexpected GL error: GL_OUT_OF_MEMORY 0x505
(Error at 
/home/tarceri/git/Mesa_arrays_of_arrays_piglit/tests/shaders/shader_runner.c:3560) 


glMakeImageHandleResidentARB error
PIGLIT: {"result": "fail" }


Mmh, you are right, but I can only reproduce with a non-debug build, weird.


Figured, funny. :-) I will send a patch.






On 22/08/17 07:22, Marek Olšák wrote:

Reviewed-by: Marek Olšák 

So let's commit this!

Marek

On Mon, Aug 21, 2017 at 4:50 PM, Samuel Pitoiset
 wrote:

Currently, when the array is full it is resized but it can grow
over and over because we don't try to re-use descriptor slots.

v4: - rebase on top of idalloc changes
v3: - use new idalloc gallium module

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/radeonsi/si_descriptors.c | 36 
+--

  src/gallium/drivers/radeonsi/si_pipe.h    |  2 ++
  2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c

index f14fce103f..c869dac9bb 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -61,6 +61,7 @@
  #include "gfx9d.h"

  #include "util/hash_table.h"
+#include "util/u_idalloc.h"
  #include "util/u_format.h"
  #include "util/u_memory.h"
  #include "util/u_upload_mgr.h"
@@ -2335,23 +2336,27 @@ static void 
si_init_bindless_descriptors(struct si_context *sctx,

  * considered to be a valid handle.
  */
 sctx->num_bindless_descriptors = 1;
+
+   /* Track which bindless slots are used (or not). */
+   util_idalloc_init(&sctx->bindless_used_slots);
+   util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
+
+   /* Reserve slot 0 because it's an invalid handle for 
bindless. */

+   assert(!util_idalloc_alloc(&sctx->bindless_used_slots));
  }

  static void si_release_bindless_descriptors(struct si_context *sctx)
  {
 si_release_descriptors(&sctx->bindless_descriptors);
+   util_idalloc_fini(&sctx->bindless_used_slots);
  }

-static unsigned
-si_create_bindless_descriptor(struct si_context *sctx, uint32_t 
*desc_list,

- unsigned size)
+static unsigned si_get_first_free_bindless_slot(struct si_context 
*sctx)

  {
 struct si_descriptors *desc = &sctx->bindless_descriptors;
-   unsigned desc_slot, desc_slot_offset;
-
-   /* Reserve a new slot for this bindless descriptor. */
-   desc_slot = sctx->num_bindless_descriptors++;
+   unsigned desc_slot;

+   desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
 if (desc_slot >= desc->num_elements) {
 /* The array of bindless descriptors is full, 
resize it. */

 unsigned slot_size = desc->element_dw_size * 4;
@@ -2363,6 +2368,20 @@ si_create_bindless_descriptor(struct 
si_context *sctx, uint32_t *desc_list,

 desc->num_active_slots = new_num_elements;
 }

+   assert(desc_slot);
+   return desc_slot;
+}
+
+static unsigned
+si_create_bindless_descriptor(struct si_context *sctx, uint32_t 
*desc_list,

+ unsigned size)
+{
+   struct si_descriptors *desc = &sctx->bindless_descriptors;
+   unsigned desc_slot, desc_slot_offset;
+
+   /* Find a free slot. */
+   desc_slot = si_get_first_free_bindless_slot(sctx);
+
 /* For simplicity, sampler and image bindless descriptors 
use fixed
  * 16-dword slots for now. Image descriptors only need 
8-dword but this
  * doesn't really matter because no real apps use image 
handles.
@@ -2475,6 +2494,9 @@ static void si_delete_texture_handle(struct 
pipe_context *ctx, uint64_t handle)


 tex_handle = (struct si_texture_handle *)entry->data;

+   /* Allow this descriptor slot to be re-used. */
+   util_idalloc_free(&sctx->bindless_used_slots, 
tex_handle->desc_slot);

+
 pipe_sampler_view_reference(&tex_handle->view, NULL);
 _mesa_hash_table_remove(sctx->tex_handles, entry);
 FREE(tex_handle);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h

index 56c3b08188..8a475d3b05 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -29,6 +29,7 @@
  #include "si_shader.h"

  #include "util/u_dynarray.h"
+#include "util/u_idalloc.h"

  #ifdef PIPE_ARCH_BIG_ENDIAN
  #define SI_BIG_ENDIAN 1
@@ -430,6 +431,7 @@ struct si_context {

 /* Bindless descriptors. */
 struct si_descriptors   bindless_descriptors;
+   struct util_idalloc bindless_used_slots;
 unsigned    num_bindl

[Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless slot 0

2017-08-23 Thread Samuel Pitoiset

When assertions were disabled, the compiler removed
the call to util_idalloc_alloc() and the first allocated
bindless slot was 0 which is invalid per the spec.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index f66ecc3e68..c53253ac8d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2192,7 +2192,7 @@ static void si_init_bindless_descriptors(struct 
si_context *sctx,
util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
 
/* Reserve slot 0 because it's an invalid handle for bindless. */
-   assert(!util_idalloc_alloc(&sctx->bindless_used_slots));
+   util_idalloc_alloc(&sctx->bindless_used_slots);
 }
 
 static void si_release_bindless_descriptors(struct si_context *sctx)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/5] intel/isl: Set MOCS based on view usage

2017-08-23 Thread Pohjolainen, Topi

On Tue, Aug 01, 2017 at 03:48:29PM -0700, Jason Ekstrand wrote:
> This little series changes things around so that, instead of passing MOCS
> values into ISL, ISL knows how to set them itself.  This allows us to
> centralize some of the decisions about how MOCS gets set for surfaces and
> hopefully, if we ever do anything crazy in the future, we can share it
> between GL and Vulkan.  Unfortunately, surfaces are not the only places
> where MOCS is used.  It also shows up in vertex buffers, index buffers, and
> streamout buffers.  However those are always set to the platform equivalent
> of I915_MOCS_CACHED (and that's not all that liable to change) so they're
> not particularly interesting.
> 
> If people like this approach, I'd like to Cc it to stable for 17.2 because
> it has the side-effect of making Vulkan MOCS a bit more sane.

Looks like there weren't any input yet. I think this is clearer and less error
prone. So +1 and series:

Reviewed-by: Topi Pohjolainen 

> 
> Jason Ekstrand (5):
>   intel/isl: Set MOCS based on usage for surface states
>   intel/blorp: Delete the MOCS plumbing
>   i965: Stop passing MOCS information into ISL
>   anv: Stop passing MOCS information into ISL
>   intel/isl: Get rid of the mocs fields in fill/emit_info
> 
>  src/intel/blorp/blorp.h  |  6 ---
>  src/intel/blorp/blorp_genX_exec.h| 37 +++--
>  src/intel/isl/isl.h  | 22 --
>  src/intel/isl/isl_emit_depth_stencil.c   | 12 +++---
>  src/intel/isl/isl_genX_mocs.h| 53 
> 
>  src/intel/isl/isl_surface_state.c|  9 ++--
>  src/intel/vulkan/anv_blorp.c |  3 --
>  src/intel/vulkan/anv_device.c|  1 -
>  src/intel/vulkan/anv_image.c | 12 ++
>  src/intel/vulkan/anv_private.h   |  2 -
>  src/intel/vulkan/genX_cmd_buffer.c   | 13 ++
>  src/intel/vulkan/genX_state.c|  3 --
>  src/mesa/drivers/dri/i965/brw_blorp.c| 15 ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 26 ++--
>  14 files changed, 101 insertions(+), 113 deletions(-)
>  create mode 100644 src/intel/isl/isl_genX_mocs.h
> 
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless slot 0

2017-08-23 Thread Samuel Pitoiset


Both solutions look good to me.

On 08/23/2017 10:06 AM, Michael Schellenberger Costa wrote:

Hi Samuel,

do you want to fully remove the assert or should this be something the kind of

MAYBE_UNUSED unsigned res = util_idalloc_alloc(&sctx->bindless_used_slots);
assert(res != 0);

--Michael

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag von 
Samuel Pitoiset
Gesendet: Mittwoch, 23. August 2017 09:43
An: mesa-dev@lists.freedesktop.org
Betreff: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless 
slot 0

When assertions were disabled, the compiler removed
the call to util_idalloc_alloc() and the first allocated
bindless slot was 0 which is invalid per the spec.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/radeonsi/si_descriptors.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index f66ecc3e68..c53253ac8d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2192,7 +2192,7 @@ static void si_init_bindless_descriptors(struct 
si_context *sctx,
util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
  
  	/* Reserve slot 0 because it's an invalid handle for bindless. */

-   assert(!util_idalloc_alloc(&sctx->bindless_used_slots));
+   util_idalloc_alloc(&sctx->bindless_used_slots);
  }
  
  static void si_release_bindless_descriptors(struct si_context *sctx)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i965: Issue performance warnings when growing the program cache

2017-08-23 Thread Chris Wilson

Quoting Kenneth Graunke (2017-08-22 21:47:54)
> This involves a bunch of unnecessary copying, a batch flush, and
> state re-emission.

> ---
>  src/mesa/drivers/dri/i965/brw_program_cache.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
> b/src/mesa/drivers/dri/i965/brw_program_cache.c
> index 4dcfd5234df..e9706be8961 100644
> --- a/src/mesa/drivers/dri/i965/brw_program_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
> @@ -217,6 +217,9 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t 
> new_size)
> struct brw_context *brw = cache->brw;
> struct brw_bo *new_bo;
>  
> +   perf_debug("Copying to larger program cache: %zu kB -> %u kB\n",
> +  cache->bo->size / 1024, new_size / 1024);

Hmm, z -> size_t but bo->size is uin64_t, so sadly we need "%"PRIu64
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radeonsi: minor cleanups in si_make_{texture, image}_handle_resident()

2017-08-23 Thread Samuel Pitoiset

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 446c52e747..4a7e0dc53a 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2278,7 +2278,6 @@ static void si_invalidate_bindless_buf_desc(struct 
si_context *sctx,
si_set_buf_desc_address(buf, offset, &desc_list[0]);
 
*desc_dirty = true;
-   sctx->bindless_descriptors_dirty = true;
}
 }
 
@@ -2390,12 +2389,7 @@ static void si_make_texture_handle_resident(struct 
pipe_context *ctx,
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
 
-   /* Re-upload the descriptor if it has been updated
-* while it wasn't resident.
-*/
si_update_resident_texture_descriptor(sctx, tex_handle);
-   if (tex_handle->desc_dirty)
-   sctx->bindless_descriptors_dirty = true;
} else {
si_invalidate_bindless_buf_desc(sctx,
tex_handle->desc_slot,
@@ -2404,6 +2398,12 @@ static void si_make_texture_handle_resident(struct 
pipe_context *ctx,

&tex_handle->desc_dirty);
}
 
+   /* Re-upload the descriptor if it has been updated while it
+* wasn't resident.
+*/
+   if (tex_handle->desc_dirty)
+   sctx->bindless_descriptors_dirty = true;
+
/* Add the texture handle to the per-context list. */
util_dynarray_append(&sctx->resident_tex_handles,
 struct si_texture_handle *, tex_handle);
@@ -2525,13 +2525,7 @@ static void si_make_image_handle_resident(struct 
pipe_context *ctx,
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
 
-   /* Re-upload the descriptor if it has been updated
-* while it wasn't resident.
-*/
si_update_resident_image_descriptor(sctx, img_handle);
-   if (img_handle->desc_dirty)
-   sctx->bindless_descriptors_dirty = true;
-
} else {
si_invalidate_bindless_buf_desc(sctx,
img_handle->desc_slot,
@@ -2540,6 +2534,12 @@ static void si_make_image_handle_resident(struct 
pipe_context *ctx,

&img_handle->desc_dirty);
}
 
+   /* Re-upload the descriptor if it has been updated while it
+* wasn't resident.
+*/
+   if (img_handle->desc_dirty)
+   sctx->bindless_descriptors_dirty = true;
+
/* Add the image handle to the per-context list. */
util_dynarray_append(&sctx->resident_img_handles,
 struct si_image_handle *, img_handle);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radeonsi: rename some bindless-related helper functions

2017-08-23 Thread Samuel Pitoiset

I think it makes more sense.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 42 +--
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 4a7e0dc53a..3fdfdbbec7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1866,7 +1866,7 @@ static void si_upload_bindless_descriptors(struct 
si_context *sctx)
 }
 
 /* Update mutable image descriptor fields of all resident textures. */
-static void si_update_resident_texture_descriptor(struct si_context *sctx,
+static void si_update_bindless_texture_descriptor(struct si_context *sctx,
  struct si_texture_handle 
*tex_handle)
 {
struct si_sampler_view *sview = (struct si_sampler_view 
*)tex_handle->view;
@@ -1888,7 +1888,7 @@ static void si_update_resident_texture_descriptor(struct 
si_context *sctx,
}
 }
 
-static void si_update_resident_image_descriptor(struct si_context *sctx,
+static void si_update_bindless_image_descriptor(struct si_context *sctx,
struct si_image_handle 
*img_handle)
 {
struct si_descriptors *desc = &sctx->bindless_descriptors;
@@ -1915,12 +1915,12 @@ static void 
si_update_all_resident_texture_descriptors(struct si_context *sctx)
 {
util_dynarray_foreach(&sctx->resident_tex_handles,
  struct si_texture_handle *, tex_handle) {
-   si_update_resident_texture_descriptor(sctx, *tex_handle);
+   si_update_bindless_texture_descriptor(sctx, *tex_handle);
}
 
util_dynarray_foreach(&sctx->resident_img_handles,
  struct si_image_handle *, img_handle) {
-   si_update_resident_image_descriptor(sctx, *img_handle);
+   si_update_bindless_image_descriptor(sctx, *img_handle);
}
 
si_upload_bindless_descriptors(sctx);
@@ -2253,11 +2253,11 @@ si_create_bindless_descriptor(struct si_context *sctx, 
uint32_t *desc_list,
return desc_slot;
 }
 
-static void si_invalidate_bindless_buf_desc(struct si_context *sctx,
-   unsigned desc_slot,
-   struct pipe_resource *resource,
-   uint64_t offset,
-   bool *desc_dirty)
+static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
+unsigned desc_slot,
+struct pipe_resource *resource,
+uint64_t offset,
+bool *desc_dirty)
 {
struct si_descriptors *desc = &sctx->bindless_descriptors;
struct r600_resource *buf = r600_resource(resource);
@@ -2389,13 +2389,13 @@ static void si_make_texture_handle_resident(struct 
pipe_context *ctx,
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
 
-   si_update_resident_texture_descriptor(sctx, tex_handle);
+   si_update_bindless_texture_descriptor(sctx, tex_handle);
} else {
-   si_invalidate_bindless_buf_desc(sctx,
-   tex_handle->desc_slot,
-   sview->base.texture,
-   
sview->base.u.buf.offset,
-   
&tex_handle->desc_dirty);
+   si_update_bindless_buffer_descriptor(sctx,
+
tex_handle->desc_slot,
+
sview->base.texture,
+
sview->base.u.buf.offset,
+
&tex_handle->desc_dirty);
}
 
/* Re-upload the descriptor if it has been updated while it
@@ -2525,13 +2525,13 @@ static void si_make_image_handle_resident(struct 
pipe_context *ctx,
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
 
-   si_update_resident_image_descriptor(sctx, img_handle);
+   si_update_bindless_image_descriptor(sctx, img_handle);
} else {
-   si_invalidate_bindless_buf_desc(sctx,
-   img_handle->desc_slot,
-   view->resource,
-

Re: [Mesa-dev] [PATCH 8/8] glsl: stop adding pointers from bindless structs to the cache

2017-08-23 Thread Samuel Pitoiset


Assuming this doesn't introduce regressions, series is:

Reviewed-by: Samuel Pitoiset 

On 08/14/2017 01:49 AM, Timothy Arceri wrote:

This is so we always create reproducible cache entries. Consistency
is required for verification of any third party distributed shaders.
---
  src/compiler/glsl/shader_cache.cpp | 12 
  1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/shader_cache.cpp 
b/src/compiler/glsl/shader_cache.cpp
index aa63bdcf01..aa6c067d04 100644
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -1212,32 +1212,34 @@ write_shader_metadata(struct blob *metadata, 
gl_linked_shader *shader)
  sizeof(glprog->SamplerUnits));
 blob_write_bytes(metadata, glprog->sh.SamplerTargets,
  sizeof(glprog->sh.SamplerTargets));
 blob_write_uint32(metadata, glprog->ShadowSamplers);
  
 blob_write_bytes(metadata, glprog->sh.ImageAccess,

  sizeof(glprog->sh.ImageAccess));
 blob_write_bytes(metadata, glprog->sh.ImageUnits,
  sizeof(glprog->sh.ImageUnits));
  
+   size_t ptr_size = sizeof(GLvoid *);

+
 blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers);
 blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler);
 for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
blob_write_bytes(metadata, &glprog->sh.BindlessSamplers[i],
-   sizeof(struct gl_bindless_sampler));
+   sizeof(struct gl_bindless_sampler) - ptr_size);
 }
  
 blob_write_uint32(metadata, glprog->sh.NumBindlessImages);

 blob_write_uint32(metadata, glprog->sh.HasBoundBindlessImage);
 for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
blob_write_bytes(metadata, &glprog->sh.BindlessImages[i],
-   sizeof(struct gl_bindless_image));
+   sizeof(struct gl_bindless_image) - ptr_size);
 }
  
 write_shader_parameters(metadata, glprog->Parameters);

  }
  
  static void

  read_shader_metadata(struct blob_reader *metadata,
   struct gl_program *glprog,
   gl_linked_shader *linked)
  {
@@ -1251,43 +1253,45 @@ read_shader_metadata(struct blob_reader *metadata,
 sizeof(glprog->SamplerUnits));
 blob_copy_bytes(metadata, (uint8_t *) glprog->sh.SamplerTargets,
 sizeof(glprog->sh.SamplerTargets));
 glprog->ShadowSamplers = blob_read_uint32(metadata);
  
 blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess,

 sizeof(glprog->sh.ImageAccess));
 blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits,
 sizeof(glprog->sh.ImageUnits));
  
+   size_t ptr_size = sizeof(GLvoid *);

+
 glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata);
 glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata);
 if (glprog->sh.NumBindlessSamplers > 0) {
glprog->sh.BindlessSamplers =
   rzalloc_array(glprog, gl_bindless_sampler,
 glprog->sh.NumBindlessSamplers);
  
for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {

   blob_copy_bytes(metadata, (uint8_t *) 
&glprog->sh.BindlessSamplers[i],
- sizeof(struct gl_bindless_sampler));
+ sizeof(struct gl_bindless_sampler) - ptr_size);
}
 }
  
 glprog->sh.NumBindlessImages = blob_read_uint32(metadata);

 glprog->sh.HasBoundBindlessImage = blob_read_uint32(metadata);
 if (glprog->sh.NumBindlessImages > 0) {
glprog->sh.BindlessImages =
   rzalloc_array(glprog, gl_bindless_image,
 glprog->sh.NumBindlessImages);
  
for (i = 0; i < glprog->sh.NumBindlessImages; i++) {

   blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessImages[i],
-sizeof(struct gl_bindless_image));
+sizeof(struct gl_bindless_image) - ptr_size);
}
 }
  
 glprog->Parameters = _mesa_new_parameter_list();

 read_shader_parameters(metadata, glprog->Parameters);
  }
  
  static void

  create_binding_str(const char *key, unsigned value, void *closure)
  {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless slot 0

2017-08-23 Thread Timothy Arceri


On 23/08/17 18:10, Samuel Pitoiset wrote:

Both solutions look good to me.

On 08/23/2017 10:06 AM, Michael Schellenberger Costa wrote:

Hi Samuel,

do you want to fully remove the assert or should this be something the 
kind of


MAYBE_UNUSED unsigned res = 
util_idalloc_alloc(&sctx->bindless_used_slots);

assert(res != 0);



I was going to suggest this also. With that:

Reviewed-by: Timothy Arceri 



--Michael

-Ursprüngliche Nachricht-
Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im 
Auftrag von Samuel Pitoiset

Gesendet: Mittwoch, 23. August 2017 09:43
An: mesa-dev@lists.freedesktop.org
Betreff: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving 
bindless slot 0


When assertions were disabled, the compiler removed
the call to util_idalloc_alloc() and the first allocated
bindless slot was 0 which is invalid per the spec.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/radeonsi/si_descriptors.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c

index f66ecc3e68..c53253ac8d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2192,7 +2192,7 @@ static void si_init_bindless_descriptors(struct 
si_context *sctx,

  util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
  /* Reserve slot 0 because it's an invalid handle for bindless. */
-assert(!util_idalloc_alloc(&sctx->bindless_used_slots));
+util_idalloc_alloc(&sctx->bindless_used_slots);
  }
  static void si_release_bindless_descriptors(struct si_context *sctx)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radeonsi: rename some bindless-related helper functions

2017-08-23 Thread Marek Olšák

For the series:

Reviewed-by: Marek Olšák 

Marek

On Wed, Aug 23, 2017 at 10:59 AM, Samuel Pitoiset
 wrote:
> I think it makes more sense.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 42 
> +--
>  1 file changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 4a7e0dc53a..3fdfdbbec7 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1866,7 +1866,7 @@ static void si_upload_bindless_descriptors(struct 
> si_context *sctx)
>  }
>
>  /* Update mutable image descriptor fields of all resident textures. */
> -static void si_update_resident_texture_descriptor(struct si_context *sctx,
> +static void si_update_bindless_texture_descriptor(struct si_context *sctx,
>   struct si_texture_handle 
> *tex_handle)
>  {
> struct si_sampler_view *sview = (struct si_sampler_view 
> *)tex_handle->view;
> @@ -1888,7 +1888,7 @@ static void 
> si_update_resident_texture_descriptor(struct si_context *sctx,
> }
>  }
>
> -static void si_update_resident_image_descriptor(struct si_context *sctx,
> +static void si_update_bindless_image_descriptor(struct si_context *sctx,
> struct si_image_handle 
> *img_handle)
>  {
> struct si_descriptors *desc = &sctx->bindless_descriptors;
> @@ -1915,12 +1915,12 @@ static void 
> si_update_all_resident_texture_descriptors(struct si_context *sctx)
>  {
> util_dynarray_foreach(&sctx->resident_tex_handles,
>   struct si_texture_handle *, tex_handle) {
> -   si_update_resident_texture_descriptor(sctx, *tex_handle);
> +   si_update_bindless_texture_descriptor(sctx, *tex_handle);
> }
>
> util_dynarray_foreach(&sctx->resident_img_handles,
>   struct si_image_handle *, img_handle) {
> -   si_update_resident_image_descriptor(sctx, *img_handle);
> +   si_update_bindless_image_descriptor(sctx, *img_handle);
> }
>
> si_upload_bindless_descriptors(sctx);
> @@ -2253,11 +2253,11 @@ si_create_bindless_descriptor(struct si_context 
> *sctx, uint32_t *desc_list,
> return desc_slot;
>  }
>
> -static void si_invalidate_bindless_buf_desc(struct si_context *sctx,
> -   unsigned desc_slot,
> -   struct pipe_resource *resource,
> -   uint64_t offset,
> -   bool *desc_dirty)
> +static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
> +unsigned desc_slot,
> +struct pipe_resource 
> *resource,
> +uint64_t offset,
> +bool *desc_dirty)
>  {
> struct si_descriptors *desc = &sctx->bindless_descriptors;
> struct r600_resource *buf = r600_resource(resource);
> @@ -2389,13 +2389,13 @@ static void si_make_texture_handle_resident(struct 
> pipe_context *ctx,
> p_atomic_read(&rtex->framebuffers_bound))
> sctx->need_check_render_feedback = true;
>
> -   si_update_resident_texture_descriptor(sctx, 
> tex_handle);
> +   si_update_bindless_texture_descriptor(sctx, 
> tex_handle);
> } else {
> -   si_invalidate_bindless_buf_desc(sctx,
> -   tex_handle->desc_slot,
> -   sview->base.texture,
> -   
> sview->base.u.buf.offset,
> -   
> &tex_handle->desc_dirty);
> +   si_update_bindless_buffer_descriptor(sctx,
> +
> tex_handle->desc_slot,
> +
> sview->base.texture,
> +
> sview->base.u.buf.offset,
> +
> &tex_handle->desc_dirty);
> }
>
> /* Re-upload the descriptor if it has been updated while it
> @@ -2525,13 +2525,13 @@ static void si_make_image_handle_resident(struct 
> pipe_context *ctx,
> p_atomic_read(&rtex->framebuffers_bound))
> sctx->need_check_render_feedback = true;
>
> -   si_update_resident_image_descriptor(sctx, img_handle);
> +

Re: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving bindless slot 0

2017-08-23 Thread Marek Olšák

Sounds good to me.

Marek

On Wed, Aug 23, 2017 at 11:39 AM, Timothy Arceri  wrote:
> On 23/08/17 18:10, Samuel Pitoiset wrote:
>>
>> Both solutions look good to me.
>>
>> On 08/23/2017 10:06 AM, Michael Schellenberger Costa wrote:
>>>
>>> Hi Samuel,
>>>
>>> do you want to fully remove the assert or should this be something the
>>> kind of
>>>
>>> MAYBE_UNUSED unsigned res =
>>> util_idalloc_alloc(&sctx->bindless_used_slots);
>>> assert(res != 0);
>>>
>
> I was going to suggest this also. With that:
>
> Reviewed-by: Timothy Arceri 
>
>
>
>>> --Michael
>>>
>>> -Ursprüngliche Nachricht-
>>> Von: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] Im Auftrag
>>> von Samuel Pitoiset
>>> Gesendet: Mittwoch, 23. August 2017 09:43
>>> An: mesa-dev@lists.freedesktop.org
>>> Betreff: [Mesa-dev] [PATCH] radeonsi: do not assert when reserving
>>> bindless slot 0
>>>
>>> When assertions were disabled, the compiler removed
>>> the call to util_idalloc_alloc() and the first allocated
>>> bindless slot was 0 which is invalid per the spec.
>>>
>>> Signed-off-by: Samuel Pitoiset 
>>> ---
>>>   src/gallium/drivers/radeonsi/si_descriptors.c | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> index f66ecc3e68..c53253ac8d 100644
>>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>>> @@ -2192,7 +2192,7 @@ static void si_init_bindless_descriptors(struct
>>> si_context *sctx,
>>>   util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
>>>   /* Reserve slot 0 because it's an invalid handle for bindless. */
>>> -assert(!util_idalloc_alloc(&sctx->bindless_used_slots));
>>> +util_idalloc_alloc(&sctx->bindless_used_slots);
>>>   }
>>>   static void si_release_bindless_descriptors(struct si_context *sctx)
>>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallium/docs: improve docs for SAMPLE_POS, SAMPLE_INFO, TXQS, MSAA semantics

2017-08-23 Thread Marek Olšák

On Wed, Aug 23, 2017 at 2:49 AM, Roland Scheidegger  wrote:
> Am 23.08.2017 um 01:59 schrieb Marek Olšák:
>> On Wed, Aug 23, 2017 at 12:30 AM, Roland Scheidegger  
>> wrote:
>>> Am 22.08.2017 um 17:15 schrieb Marek Olšák:
 On Sun, Aug 20, 2017 at 12:32 AM, Roland Scheidegger  
 wrote:
> Am 19.08.2017 um 21:32 schrieb Marek Olšák:
>> How about we remove all opcodes that are unused? Like:
>>
>> SAMPLE_POS
>> SAMPLE_INFO
>> SAMPLE
>> SAMPLE_I
>> SAMPLE_I_MS
>> SAMPLE_B
>> SAMPLE_C
>> SAMPLE_C_LZ
>> SAMPLE_D
>> SAMPLE_L
>> GATHER4
>> SVIEWINFO
> These are all d3d10 opcodes, and we need them (llvmpipe supports all of
> them with the exception of sample_pos and sample_info, right now). (It's

 SAMPLE_INFO is almost the same as TXQS and given the current state of
 driver support, it would be better to remove SAMPLE_INFO and keep
 TXQS.

 SAMPLE_INFO returns (samples, 0, 0, 0), while TXQS returns (samples,
 undef, undef, undef).

 There is also RESQ, which returns (w, h, d|layers, samples).

>>>
>>> They take different register types, however.
>>
>> Most instructions support multiple register types. MOV supports TEMP,
>> CONST, IN, OUT. LOAD supports IMAGE, BUFFER, and in the future maybe
>> also CONSTBUF and SAMP.
>>
> That's true, but there aren't really any opcodes which could take either
> sampler view reg file or sampler. Albeit I suppose it would be doable.
> Though it looks to me like you could easily ditch TXQS in favor of RESQ
> too then...

and TXF could be ditched in favor of LOAD.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/47] glsl: Add "built-in" functions to do eq(fp64, fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 128 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  46 
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 182 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index d41d114b84..9fc716871d 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -138,3 +138,131 @@ fsign64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+extractFloat64FracLo(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0026 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0026);
+   ir_swizzle *const r0027 = swizzle_x(r0026);
+   body.emit(ret(r0027));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+extractFloat64FracHi(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0028 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0028);
+   ir_expression *const r0029 = bit_and(swizzle_y(r0028), 
body.constant(1048575u));
+   body.emit(ret(r0029));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+extractFloat64Exp(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::int_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r002A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r002A);
+   ir_expression *const r002B = rshift(swizzle_y(r002A), 
body.constant(int(20)));
+   ir_expression *const r002C = bit_and(r002B, body.constant(2047u));
+   ir_expression *const r002D = expr(ir_unop_u2i, r002C);
+   body.emit(ret(r002D));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+feq64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r002E = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r002E);
+   ir_variable *const r002F = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r002F);
+   ir_variable *const r0030 = body.make_temp(glsl_type::bool_type, 
"return_value");
+   ir_variable *const r0031 = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isbNaN", ir_var_auto);
+   body.emit(r0031);
+   ir_variable *const r0032 = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isaNaN", ir_var_auto);
+   body.emit(r0032);
+   ir_expression *const r0033 = rshift(swizzle_y(r002E), 
body.constant(int(20)));
+   ir_expression *const r0034 = bit_and(r0033, body.constant(2047u));
+   ir_expression *const r0035 = expr(ir_unop_u2i, r0034);
+   ir_expression *const r0036 = equal(r0035, body.constant(int(2047)));
+   ir_expression *const r0037 = bit_and(swizzle_y(r002E), 
body.constant(1048575u));
+   ir_expression *const r0038 = bit_or(r0037, swizzle_x(r002E));
+   ir_expression *const r0039 = nequal(r0038, body.constant(0u));
+   body.emit(assign(r0032, logic_and(r0036, r0039), 0x01));
+
+   ir_expression *const r003A = rshift(swizzle_y(r002F), 
body.constant(int(20)));
+   ir_expression *const r003B = bit_and(r003A, body.constant(2047u));
+   ir_expression *const r003C = expr(ir_unop_u2i, r003B);
+   ir_expression *const r003D = equal(r003C, body.constant(int(2047)));
+   ir_expression *const r003E = bit_and(swizzle_y(r002F), 
body.constant(1048575u));
+   ir_expression *const r003F = bit_or(r003E, swizzle_x(r002F));
+   ir_expression *const r0040 = nequal(r003F, body.constant(0u));
+   body.emit(assign(r0031, logic_and(r003D, r0040), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0042 = logic_or(r0032, r0031);
+   ir_if *f0041 = new(mem_ctx) ir_if(operand(r0042).val);
+   exec_list *const f0041_parent_instructions = body.instructions;
+
+  /* THEN INST

[Mesa-dev] [PATCH 02/47] glsl: Add "built-in" functions to do neg(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 72 +
 src/compiler/glsl/builtin_functions.cpp |  4 ++
 src/compiler/glsl/builtin_functions.h   |  3 ++
 src/compiler/glsl/float64.glsl  | 23 +++
 src/compiler/glsl/glcpp/glcpp-parse.y   |  1 +
 5 files changed, 103 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 7b572314fa..d8c25da825 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -17,3 +17,75 @@ fabs64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+is_nan(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r000C);
+   ir_expression *const r000D = lshift(swizzle_y(r000C), 
body.constant(int(1)));
+   ir_expression *const r000E = lequal(body.constant(4292870144u), r000D);
+   ir_expression *const r000F = nequal(swizzle_x(r000C), body.constant(0u));
+   ir_expression *const r0010 = bit_and(swizzle_y(r000C), 
body.constant(1048575u));
+   ir_expression *const r0011 = nequal(r0010, body.constant(0u));
+   ir_expression *const r0012 = logic_or(r000F, r0011);
+   ir_expression *const r0013 = logic_and(r000E, r0012);
+   body.emit(ret(r0013));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+fneg64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0014 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0014);
+   ir_variable *const r0015 = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   /* IF CONDITION */
+   ir_expression *const r0017 = lshift(swizzle_y(r0014), 
body.constant(int(1)));
+   ir_expression *const r0018 = lequal(body.constant(4292870144u), r0017);
+   ir_expression *const r0019 = nequal(swizzle_x(r0014), body.constant(0u));
+   ir_expression *const r001A = bit_and(swizzle_y(r0014), 
body.constant(1048575u));
+   ir_expression *const r001B = nequal(r001A, body.constant(0u));
+   ir_expression *const r001C = logic_or(r0019, r001B);
+   ir_expression *const r001D = logic_and(r0018, r001C);
+   ir_if *f0016 = new(mem_ctx) ir_if(operand(r001D).val);
+   exec_list *const f0016_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0016->then_instructions;
+
+  body.emit(assign(r0015, r0014, 0x03));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0016->else_instructions;
+
+  body.emit(assign(r0014, bit_xor(swizzle_y(r0014), 
body.constant(2147483648u)), 0x02));
+
+  body.emit(assign(r0015, r0014, 0x03));
+
+
+   body.instructions = f0016_parent_instructions;
+   body.emit(f0016);
+
+   /* END IF */
+
+   body.emit(ret(r0015));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 37cc164a73..0445f733ab 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3328,6 +3328,10 @@ builtin_builder::create_builtins()
 generate_ir::fabs64(mem_ctx, integer_functions_supported),
 NULL);
 
+   add_function("__builtin_fneg64",
+generate_ir::fneg64(mem_ctx, integer_functions_supported),
+NULL);
+
 #undef F
 #undef FI
 #undef FIUD_VEC
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index deaf640447..adec424292 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -70,6 +70,9 @@ udivmod64(void *mem_ctx, builtin_available_predicate avail);
 ir_function_signature *
 fabs64(void *mem_ctx, builtin_available_predicate avail);
 
+ir_function_signature *
+fneg64(void *mem_ctx, builtin_available_predicate avail);
+
 }
 
 #endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index d798d7e7ac..cef939323b 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -27,3 +27,26 @@ fabs64(uvec2 a)
a.y &= 0x7FFFu;
return a;
 }
+
+/* Returns 1 if the double-precision floating-point value `a' is a NaN;
+ * otherwise returns 0.
+ */
+bool
+is_nan(uvec2 a)
+{
+   return (0xFFE0u <= (a.y<<1)) &&
+

[Mesa-dev] [PATCH 03/47] glsl: Add "built-in" function to do sign(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 49 +
 src/compiler/glsl/builtin_functions.cpp |  4 +++
 src/compiler/glsl/builtin_functions.h   |  3 ++
 src/compiler/glsl/float64.glsl  | 12 
 src/compiler/glsl/glcpp/glcpp-parse.y   |  1 +
 5 files changed, 69 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index d8c25da825..d41d114b84 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -89,3 +89,52 @@ fneg64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+fsign64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r001E = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r001E);
+   ir_variable *const r001F = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0020 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"retval", ir_var_auto);
+   body.emit(r0020);
+   /* IF CONDITION */
+   ir_expression *const r0022 = lshift(swizzle_y(r001E), 
body.constant(int(1)));
+   ir_expression *const r0023 = bit_or(r0022, swizzle_x(r001E));
+   ir_expression *const r0024 = equal(r0023, body.constant(0u));
+   ir_if *f0021 = new(mem_ctx) ir_if(operand(r0024).val);
+   exec_list *const f0021_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0021->then_instructions;
+
+  body.emit(assign(r001F, ir_constant::zero(mem_ctx, 
glsl_type::uvec2_type), 0x03));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0021->else_instructions;
+
+  body.emit(assign(r0020, body.constant(0u), 0x01));
+
+  ir_expression *const r0025 = bit_and(swizzle_y(r001E), 
body.constant(2147483648u));
+  body.emit(assign(r0020, bit_or(r0025, body.constant(1072693248u)), 
0x02));
+
+  body.emit(assign(r001F, r0020, 0x03));
+
+
+   body.instructions = f0021_parent_instructions;
+   body.emit(f0021);
+
+   /* END IF */
+
+   body.emit(ret(r001F));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 0445f733ab..1e3101bac3 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3332,6 +3332,10 @@ builtin_builder::create_builtins()
 generate_ir::fneg64(mem_ctx, integer_functions_supported),
 NULL);
 
+   add_function("__builtin_fsign64",
+generate_ir::fsign64(mem_ctx, integer_functions_supported),
+NULL);
+
 #undef F
 #undef FI
 #undef FIUD_VEC
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index adec424292..7954373154 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -73,6 +73,9 @@ fabs64(void *mem_ctx, builtin_available_predicate avail);
 ir_function_signature *
 fneg64(void *mem_ctx, builtin_available_predicate avail);
 
+ir_function_signature *
+fsign64(void *mem_ctx, builtin_available_predicate avail);
+
 }
 
 #endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index cef939323b..6561c0e7e6 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -50,3 +50,15 @@ fneg64(uvec2 a)
a.y ^= (1u<<31);
return a;
 }
+
+uvec2
+fsign64(uvec2 a)
+{
+   if ((a.y << 1 | a.x) == 0u)
+  return uvec2(0u, 0u);
+
+   uvec2 retval;
+   retval.x = 0u;
+   retval.y = (a.y & 0x8000u) | 0x3FF0u;
+   return retval;
+}
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 4486326d86..72011dec68 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2451,6 +2451,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  add_builtin_define(parser, "__have_builtin_builtin_imod64", 1);
  add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1);
  add_builtin_define(parser, "__have_builtin_builtin_fneg64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fsign64", 1);
   }
}
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 00/47] WIP: fp64 support for r600g

2017-08-23 Thread Elie Tournier

From: Elie Tournier 

TL;DR
This series is a "status update" of my work done for adding fp64 support on 
r600g.
One of the biggest issue is due to a lake of accuracy on the rcp implementation.
Divide relay on rcp.

A branch is available on 
https://github.com/Hopetech/mesa/tree/glsl_arb_gpu_shader_fp64_v3
Comments and reviews are welcome.

Patches 1-18:
These few patches implement the basic fp64 operations.

Patches 19-47:
Lower operations using the builtin functions previously implemented.

Known issues:
- operations on matrix crash the system.
- sqrt and d2f are not accurate enought so the piglit tests are failling.
  But sqrt and d2f are working correctly using softpipe.
  However, implementing sqrt64 as f2d(sqrt32(d2f()) seems to be good enought 
for Piglit.
- rcp is define as pow(pow(x, -0.5), 2)
  NIR and NV convert the input in a fp32, realize a rcp, convert back to a fp64 
and realize some Newton-Raphson step.
  This is not possible with GLSL IR because using fma will generate a massive 
builtin_float64.h file.
- dot is failing
- piglit report crashes on mod. This is a regression, I'm currently working on 
it.

Piglit result:
Pass: 917 Fail: 211 Crash: 241

Dave Airlie (2):
  glsl/lower_64bit: handle any/all operations
  glsl/lower_64bit: lower d2b using comparison

Elie Tournier (45):
  glsl: Add "built-in" function to do abs(fp64)
  glsl: Add "built-in" functions to do neg(fp64)
  glsl: Add "built-in" function to do sign(fp64)
  glsl: Add "built-in" functions to do eq(fp64, fp64)
  glsl: Add "built-in" functions to do le(fp64, fp64)
  glsl: Add "built-in" functions to do lt(fp64, fp64)
  glsl: Add "built-in" functions to do add(fp64, fp64)
  glsl: Add "built-in" functions to do mul(fp64, fp64)
  glsl: Add "built-in" functions to do fp64_to_uint(fp64)
  glsl: Add "built-in" functions to do uint_to_fp64(uint)
  glsl: Add "built-in" functions to do fp64_to_int(fp64)
  glsl: Add "built-in" functions to do int_to_fp64(int)
  glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
  glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
  glsl: Add "built-in" functions to do sqrt(fp64)
  glsl: Add "built-in" functions to do trunc(fp64)
  glsl: Add "built-in" functions to do round(fp64)
  glsl: Add "built-in" functions to do rcp(fp64)
  glsl: Add a lowering pass for 64-bit float abs()
  glsl: Add a lowering pass for 64-bit float neg()
  glsl: Add a lowering pass for 64-bit float sign()
  glsl: Add a lowering pass for 64-bit float equal()
  glsl: Add a lowering pass for 64-bit float lequal()
  glsl: Add a lowering pass for 64-bit float less()
  glsl: Add a lowering pass for 64-bit float add()
  glsl: Add a lowering pass for 64-bit float mul()
  glsl: Add a lowering pass for 64-bit float d2u()
  glsl: Add a lowering pass for 64-bit float u2d()
  glsl: Add a lowering pass for 64-bit float d2i()
  glsl: Add a lowering pass for 64-bit float i2d()
  glsl: Add a lowering pass for 64-bit float d2f()
  glsl: Add a lowering pass for 64-bit float f2d()
  glsl: Add a lowering pass for 64-bit float sqrt()
  glsl: Add a lowering pass for 64-bit float trunc()
  glsl: Add a lowering pass for 64-bit float round()
  glsl: Add a lowering pass for 64-bit float rcp()
  glsl: Add a lowering pass for 64-bit float gequal()
  glsl: Add a lowering pass for 64-bit float greater()
  glsl: Add a lowering pass for 64-bit float nequal()
  glsl: Add a lowering pass for 64-bit float min()
  glsl: Add a lowering pass for 64-bit float max()
  glsl: Add a lowering pass for 64-bit float floor()
  glsl: Add a lowering pass for 64-bit float ceil()
  glsl: Add a lowering pass for 64-bit float frac()
  glsl: Add a lowering pass for 64-bit float div()

 src/compiler/Makefile.sources  | 3 +-
 src/compiler/glsl/builtin_float64.h| 20310 +++
 src/compiler/glsl/builtin_functions.cpp|72 +
 src/compiler/glsl/builtin_functions.h  |54 +
 src/compiler/glsl/float64.glsl |  1494 ++
 src/compiler/glsl/generate_ir.cpp  | 1 +
 src/compiler/glsl/glcpp/glcpp-parse.y  |18 +
 src/compiler/glsl/glsl_parser_extras.cpp   | 1 +
 src/compiler/glsl/ir_optimization.h|25 +-
 .../glsl/{lower_int64.cpp => lower_64bit.cpp}  |   325 +-
 src/compiler/glsl/lower_instructions.cpp   |   139 +-
 src/mesa/state_tracker/st_extensions.c | 3 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |27 +
 13 files changed, 22442 insertions(+), 30 deletions(-)
 create mode 100644 src/compiler/glsl/builtin_float64.h
 create mode 100644 src/compiler/glsl/float64.glsl
 rename src/compiler/glsl/{lower_int64.cpp => lower_64bit.cpp} (51%)

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/47] glsl: Add "built-in" functions to do lt(fp64, fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 155 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  43 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 206 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index aa769d9672..c52e50a6f9 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -439,3 +439,158 @@ fle64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+lt64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0080 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a0", ir_var_function_in);
+   sig_parameters.push_tail(r0080);
+   ir_variable *const r0081 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a1", ir_var_function_in);
+   sig_parameters.push_tail(r0081);
+   ir_variable *const r0082 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b0", ir_var_function_in);
+   sig_parameters.push_tail(r0082);
+   ir_variable *const r0083 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b1", ir_var_function_in);
+   sig_parameters.push_tail(r0083);
+   ir_expression *const r0084 = less(r0080, r0082);
+   ir_expression *const r0085 = equal(r0080, r0082);
+   ir_expression *const r0086 = less(r0081, r0083);
+   ir_expression *const r0087 = logic_and(r0085, r0086);
+   ir_expression *const r0088 = logic_or(r0084, r0087);
+   body.emit(ret(r0088));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+flt64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0089 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0089);
+   ir_variable *const r008A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r008A);
+   ir_variable *const r008B = body.make_temp(glsl_type::bool_type, 
"return_value");
+   ir_variable *const r008C = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isbNaN", ir_var_auto);
+   body.emit(r008C);
+   ir_variable *const r008D = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isaNaN", ir_var_auto);
+   body.emit(r008D);
+   ir_expression *const r008E = rshift(swizzle_y(r0089), 
body.constant(int(20)));
+   ir_expression *const r008F = bit_and(r008E, body.constant(2047u));
+   ir_expression *const r0090 = expr(ir_unop_u2i, r008F);
+   ir_expression *const r0091 = equal(r0090, body.constant(int(2047)));
+   ir_expression *const r0092 = bit_and(swizzle_y(r0089), 
body.constant(1048575u));
+   ir_expression *const r0093 = bit_or(r0092, swizzle_x(r0089));
+   ir_expression *const r0094 = nequal(r0093, body.constant(0u));
+   body.emit(assign(r008D, logic_and(r0091, r0094), 0x01));
+
+   ir_expression *const r0095 = rshift(swizzle_y(r008A), 
body.constant(int(20)));
+   ir_expression *const r0096 = bit_and(r0095, body.constant(2047u));
+   ir_expression *const r0097 = expr(ir_unop_u2i, r0096);
+   ir_expression *const r0098 = equal(r0097, body.constant(int(2047)));
+   ir_expression *const r0099 = bit_and(swizzle_y(r008A), 
body.constant(1048575u));
+   ir_expression *const r009A = bit_or(r0099, swizzle_x(r008A));
+   ir_expression *const r009B = nequal(r009A, body.constant(0u));
+   body.emit(assign(r008C, logic_and(r0098, r009B), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r009D = logic_or(r008D, r008C);
+   ir_if *f009C = new(mem_ctx) ir_if(operand(r009D).val);
+   exec_list *const f009C_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f009C->then_instructions;
+
+  body.emit(assign(r008B, body.constant(false), 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f009C->else_instructions;
+
+  ir_variable *const r009E = body.make_temp(glsl_type::uint_type, 
"extractFloat64Sign_retval");
+  body.emit(assign(r009E, rshift(swizzle_y(r0089), 
body.constant(int(31))), 0x01));
+
+  ir_variable *const r009F = body.make_temp(glsl_type::uint_type, 
"extractFloat64Sign_retval");
+  body.emit(assign(r009F, rshift(swizzle_y(r008A), 
body.constant(int(31))), 0x01));
+
+  /* IF CONDITION */
+  ir_expression *const r00A1 = nequal(r009E, r009F);
+  ir_if *f00A0 = new(mem_ctx) ir_if(operand(r00A1).val);
+  exec

[Mesa-dev] [PATCH 01/47] glsl: Add "built-in" function to do abs(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/Makefile.sources   |  1 +
 src/compiler/glsl/builtin_float64.h | 19 +++
 src/compiler/glsl/builtin_functions.cpp |  4 
 src/compiler/glsl/builtin_functions.h   |  3 +++
 src/compiler/glsl/float64.glsl  | 29 +
 src/compiler/glsl/generate_ir.cpp   |  1 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |  1 +
 7 files changed, 58 insertions(+)
 create mode 100644 src/compiler/glsl/builtin_float64.h
 create mode 100644 src/compiler/glsl/float64.glsl

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index a56a710351..24fa7716de 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -22,6 +22,7 @@ LIBGLSL_FILES = \
glsl/builtin_functions.cpp \
glsl/builtin_functions.h \
glsl/builtin_int64.h \
+   glsl/builtin_float64.h \
glsl/builtin_types.cpp \
glsl/builtin_variables.cpp \
glsl/generate_ir.cpp \
diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
new file mode 100644
index 00..7b572314fa
--- /dev/null
+++ b/src/compiler/glsl/builtin_float64.h
@@ -0,0 +1,19 @@
+ir_function_signature *
+fabs64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r000B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r000B);
+   body.emit(assign(r000B, bit_and(swizzle_y(r000B), 
body.constant(2147483647u)), 0x02));
+
+   body.emit(ret(r000B));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 9df9671f13..37cc164a73 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3324,6 +3324,10 @@ builtin_builder::create_builtins()
 generate_ir::umul64(mem_ctx, integer_functions_supported),
 NULL);
 
+   add_function("__builtin_fabs64",
+generate_ir::fabs64(mem_ctx, integer_functions_supported),
+NULL);
+
 #undef F
 #undef FI
 #undef FIUD_VEC
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index 89ec9b7d5d..deaf640447 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -67,6 +67,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail);
 ir_function_signature *
 udivmod64(void *mem_ctx, builtin_available_predicate avail);
 
+ir_function_signature *
+fabs64(void *mem_ctx, builtin_available_predicate avail);
+
 }
 
 #endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
new file mode 100644
index 00..d798d7e7ac
--- /dev/null
+++ b/src/compiler/glsl/float64.glsl
@@ -0,0 +1,29 @@
+/* Compile with:
+ *
+ * glsl_compiler --version 130 --dump-builder float64.glsl > builtin_float64.h
+ *
+ */
+
+#version 130
+#extension GL_ARB_shader_bit_encoding : enable
+
+/* Software IEEE floating-point rounding mode.
+ * GLSL spec section "4.7.1 Range and Precision":
+ * The rounding mode cannot be set and is undefined.
+ * But here, we are able to define the rounding mode at the compilation time.
+ */
+#define FLOAT_ROUND_NEAREST_EVEN0
+#define FLOAT_ROUND_TO_ZERO 1
+#define FLOAT_ROUND_DOWN2
+#define FLOAT_ROUND_UP  3
+#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN
+
+/* Absolute value of a Float64 :
+ * Clear the sign bit
+ */
+uvec2
+fabs64(uvec2 a)
+{
+   a.y &= 0x7FFFu;
+   return a;
+}
diff --git a/src/compiler/glsl/generate_ir.cpp 
b/src/compiler/glsl/generate_ir.cpp
index 255b0484f2..e6ece4860f 100644
--- a/src/compiler/glsl/generate_ir.cpp
+++ b/src/compiler/glsl/generate_ir.cpp
@@ -29,5 +29,6 @@ using namespace ir_builder;
 namespace generate_ir {
 
 #include "builtin_int64.h"
+#include "builtin_float64.h"
 
 }
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 898a26044f..5b68887c8e 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2449,6 +2449,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  add_builtin_define(parser, "__have_builtin_builtin_umod64", 1);
  add_builtin_define(parser, "__have_builtin_builtin_idiv64", 1);
  add_builtin_define(parser, "__have_builtin_builtin_imod64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1);
   }
}
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.o

[Mesa-dev] [PATCH 05/47] glsl: Add "built-in" functions to do le(fp64, fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 173 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  50 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 231 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 9fc716871d..aa769d9672 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -266,3 +266,176 @@ feq64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+extractFloat64Sign(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r004B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r004B);
+   ir_expression *const r004C = rshift(swizzle_y(r004B), 
body.constant(int(31)));
+   body.emit(ret(r004C));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+le64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r004D = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a0", ir_var_function_in);
+   sig_parameters.push_tail(r004D);
+   ir_variable *const r004E = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a1", ir_var_function_in);
+   sig_parameters.push_tail(r004E);
+   ir_variable *const r004F = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b0", ir_var_function_in);
+   sig_parameters.push_tail(r004F);
+   ir_variable *const r0050 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b1", ir_var_function_in);
+   sig_parameters.push_tail(r0050);
+   ir_expression *const r0051 = less(r004D, r004F);
+   ir_expression *const r0052 = equal(r004D, r004F);
+   ir_expression *const r0053 = lequal(r004E, r0050);
+   ir_expression *const r0054 = logic_and(r0052, r0053);
+   ir_expression *const r0055 = logic_or(r0051, r0054);
+   body.emit(ret(r0055));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+fle64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0056 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0056);
+   ir_variable *const r0057 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r0057);
+   ir_variable *const r0058 = body.make_temp(glsl_type::bool_type, 
"return_value");
+   ir_variable *const r0059 = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isbNaN", ir_var_auto);
+   body.emit(r0059);
+   ir_variable *const r005A = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isaNaN", ir_var_auto);
+   body.emit(r005A);
+   ir_expression *const r005B = rshift(swizzle_y(r0056), 
body.constant(int(20)));
+   ir_expression *const r005C = bit_and(r005B, body.constant(2047u));
+   ir_expression *const r005D = expr(ir_unop_u2i, r005C);
+   ir_expression *const r005E = equal(r005D, body.constant(int(2047)));
+   ir_expression *const r005F = bit_and(swizzle_y(r0056), 
body.constant(1048575u));
+   ir_expression *const r0060 = bit_or(r005F, swizzle_x(r0056));
+   ir_expression *const r0061 = nequal(r0060, body.constant(0u));
+   body.emit(assign(r005A, logic_and(r005E, r0061), 0x01));
+
+   ir_expression *const r0062 = rshift(swizzle_y(r0057), 
body.constant(int(20)));
+   ir_expression *const r0063 = bit_and(r0062, body.constant(2047u));
+   ir_expression *const r0064 = expr(ir_unop_u2i, r0063);
+   ir_expression *const r0065 = equal(r0064, body.constant(int(2047)));
+   ir_expression *const r0066 = bit_and(swizzle_y(r0057), 
body.constant(1048575u));
+   ir_expression *const r0067 = bit_or(r0066, swizzle_x(r0057));
+   ir_expression *const r0068 = nequal(r0067, body.constant(0u));
+   body.emit(assign(r0059, logic_and(r0065, r0068), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r006A = logic_or(r005A, r0059);
+   ir_if *f0069 = new(mem_ctx) ir_if(operand(r006A).val);
+   exec_list *const f0069_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0069->then_instructions;
+
+  body.emit(assign(r0058, body.constant(false), 0x01));

[Mesa-dev] [PATCH 14/47] glsl: Add "built-in" functions to do fp32_to_fp64(fp32)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 311 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  38 
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 357 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index c372de0f4d..62f246ebc1 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -13665,3 +13665,314 @@ fp64_to_fp32(void *mem_ctx, 
builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0B83 = new(mem_ctx) ir_variable(glsl_type::float_type, 
"f", ir_var_function_in);
+   sig_parameters.push_tail(r0B83);
+   ir_variable *const r0B84 = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r0B84, body.constant(true), 0x01));
+
+   ir_variable *const r0B85 = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0B86 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aSign", ir_var_auto);
+   body.emit(r0B86);
+   ir_variable *const r0B87 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"aExp", ir_var_auto);
+   body.emit(r0B87);
+   ir_variable *const r0B88 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFrac", ir_var_auto);
+   body.emit(r0B88);
+   ir_variable *const r0B89 = body.make_temp(glsl_type::uint_type, 
"floatBitsToUint_retval");
+   body.emit(assign(r0B89, expr(ir_unop_bitcast_f2u, r0B83), 0x01));
+
+   ir_variable *const r0B8A = body.make_temp(glsl_type::uint_type, 
"assignment_tmp");
+   body.emit(assign(r0B8A, bit_and(r0B89, body.constant(8388607u)), 0x01));
+
+   body.emit(assign(r0B88, r0B8A, 0x01));
+
+   ir_variable *const r0B8B = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+   ir_expression *const r0B8C = rshift(r0B89, body.constant(int(23)));
+   ir_expression *const r0B8D = bit_and(r0B8C, body.constant(255u));
+   body.emit(assign(r0B8B, expr(ir_unop_u2i, r0B8D), 0x01));
+
+   body.emit(assign(r0B87, r0B8B, 0x01));
+
+   body.emit(assign(r0B86, rshift(r0B89, body.constant(int(31))), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0B8F = equal(r0B8B, body.constant(int(255)));
+   ir_if *f0B8E = new(mem_ctx) ir_if(operand(r0B8F).val);
+   exec_list *const f0B8E_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0B8E->then_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0B91 = nequal(r0B8A, body.constant(0u));
+  ir_if *f0B90 = new(mem_ctx) ir_if(operand(r0B91).val);
+  exec_list *const f0B90_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0B90->then_instructions;
+
+ ir_variable *const r0B92 = body.make_temp(glsl_type::uint_type, 
"assignment_tmp");
+ body.emit(assign(r0B92, lshift(r0B89, body.constant(int(9))), 0x01));
+
+ ir_variable *const r0B93 = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+ ir_expression *const r0B94 = lshift(r0B92, body.constant(int(20)));
+ body.emit(assign(r0B93, bit_or(r0B94, body.constant(0u)), 0x01));
+
+ ir_expression *const r0B95 = rshift(r0B92, body.constant(int(12)));
+ ir_expression *const r0B96 = lshift(r0B86, body.constant(int(31)));
+ ir_expression *const r0B97 = bit_or(r0B96, 
body.constant(2146959360u));
+ body.emit(assign(r0B93, bit_or(r0B95, r0B97), 0x02));
+
+ body.emit(assign(r0B85, r0B93, 0x03));
+
+ body.emit(assign(r0B84, body.constant(false), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0B90->else_instructions;
+
+ ir_variable *const r0B98 = new(mem_ctx) 
ir_variable(glsl_type::uvec2_type, "z", ir_var_auto);
+ body.emit(r0B98);
+ ir_expression *const r0B99 = lshift(r0B86, body.constant(int(31)));
+ body.emit(assign(r0B98, add(r0B99, body.constant(2146435072u)), 
0x02));
+
+ body.emit(assign(r0B98, body.constant(0u), 0x01));
+
+ body.emit(assign(r0B85, r0B98, 0x03));
+
+ body.emit(assign(r0B84, body.constant(false), 0x01));
+
+
+  body.instructions = f0B90_parent_instructions;
+  body.emit(f0B90);
+
+  /* END IF */
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0B8E->else_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0B9B = equal(r0B8B, body.constant(int(0)));
+  ir_if *f0B9A = new(mem_ctx) ir_if(operand(r0B9B).val);
+  exec_list *const f0B9A_pare

[Mesa-dev] [PATCH 09/47] glsl: Add "built-in" functions to do fp64_to_uint(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 446 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  68 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 522 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 5213751223..bfe387b0f5 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -11848,3 +11848,449 @@ fmul64(void *mem_ctx, builtin_available_predicate 
avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+shift64Right(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::void_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0A04 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a0", ir_var_function_in);
+   sig_parameters.push_tail(r0A04);
+   ir_variable *const r0A05 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a1", ir_var_function_in);
+   sig_parameters.push_tail(r0A05);
+   ir_variable *const r0A06 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"count", ir_var_function_in);
+   sig_parameters.push_tail(r0A06);
+   ir_variable *const r0A07 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z0Ptr", ir_var_function_inout);
+   sig_parameters.push_tail(r0A07);
+   ir_variable *const r0A08 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z1Ptr", ir_var_function_inout);
+   sig_parameters.push_tail(r0A08);
+   ir_variable *const r0A09 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z1", ir_var_auto);
+   body.emit(r0A09);
+   ir_variable *const r0A0A = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z0", ir_var_auto);
+   body.emit(r0A0A);
+   ir_variable *const r0A0B = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+   ir_expression *const r0A0C = neg(r0A06);
+   body.emit(assign(r0A0B, bit_and(r0A0C, body.constant(int(31))), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0A0E = equal(r0A06, body.constant(int(0)));
+   ir_if *f0A0D = new(mem_ctx) ir_if(operand(r0A0E).val);
+   exec_list *const f0A0D_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0A0D->then_instructions;
+
+  body.emit(assign(r0A09, r0A05, 0x01));
+
+  body.emit(assign(r0A0A, r0A04, 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0A0D->else_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0A10 = less(r0A06, body.constant(int(32)));
+  ir_if *f0A0F = new(mem_ctx) ir_if(operand(r0A10).val);
+  exec_list *const f0A0F_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0A0F->then_instructions;
+
+ ir_expression *const r0A11 = lshift(r0A04, r0A0B);
+ ir_expression *const r0A12 = rshift(r0A05, r0A06);
+ body.emit(assign(r0A09, bit_or(r0A11, r0A12), 0x01));
+
+ body.emit(assign(r0A0A, rshift(r0A04, r0A06), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0A0F->else_instructions;
+
+ ir_variable *const r0A13 = body.make_temp(glsl_type::uint_type, 
"conditional_tmp");
+ /* IF CONDITION */
+ ir_expression *const r0A15 = less(r0A06, body.constant(int(64)));
+ ir_if *f0A14 = new(mem_ctx) ir_if(operand(r0A15).val);
+ exec_list *const f0A14_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0A14->then_instructions;
+
+ir_expression *const r0A16 = bit_and(r0A06, 
body.constant(int(31)));
+body.emit(assign(r0A13, rshift(r0A04, r0A16), 0x01));
+
+
+/* ELSE INSTRUCTIONS */
+body.instructions = &f0A14->else_instructions;
+
+body.emit(assign(r0A13, body.constant(0u), 0x01));
+
+
+ body.instructions = f0A14_parent_instructions;
+ body.emit(f0A14);
+
+ /* END IF */
+
+ body.emit(assign(r0A09, r0A13, 0x01));
+
+ body.emit(assign(r0A0A, body.constant(0u), 0x01));
+
+
+  body.instructions = f0A0F_parent_instructions;
+  body.emit(f0A0F);
+
+  /* END IF */
+
+
+   body.instructions = f0A0D_parent_instructions;
+   body.emit(f0A0D);
+
+   /* END IF */
+
+   body.emit(assign(r0A08, r0A09, 0x01));
+
+   body.emit(assign(r0A07, r0A0A, 0x01));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+fp64_to_uint(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list s

[Mesa-dev] [PATCH 18/47] glsl: Add "built-in" functions to do rcp(fp64)

2017-08-23 Thread Elie Tournier

This algorithm is not accurate.

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 1829 +++
 src/compiler/glsl/builtin_functions.cpp |4 +
 src/compiler/glsl/builtin_functions.h   |3 +
 src/compiler/glsl/float64.glsl  |   10 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |1 +
 5 files changed, 1847 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 97d6adfd9f..e6b654cb0d 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -18479,3 +18479,1832 @@ fround64(void *mem_ctx, builtin_available_predicate 
avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+frcp64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0F45 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0F45);
+   ir_variable *const r0F46 = body.make_temp(glsl_type::uint_type, "z1Ptr");
+   body.emit(assign(r0F46, sub(body.constant(2406117202u), swizzle_x(r0F45)), 
0x01));
+
+   ir_expression *const r0F47 = sub(body.constant(3217938081u), 
swizzle_y(r0F45));
+   ir_expression *const r0F48 = less(body.constant(2406117202u), 
swizzle_x(r0F45));
+   ir_expression *const r0F49 = expr(ir_unop_b2i, r0F48);
+   ir_expression *const r0F4A = expr(ir_unop_i2u, r0F49);
+   body.emit(assign(r0F45, sub(r0F47, r0F4A), 0x02));
+
+   body.emit(assign(r0F45, r0F46, 0x01));
+
+   ir_variable *const r0F4B = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z1", ir_var_auto);
+   body.emit(r0F4B);
+   ir_variable *const r0F4C = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z0", ir_var_auto);
+   body.emit(r0F4C);
+   ir_expression *const r0F4D = lshift(swizzle_y(r0F45), 
body.constant(int(31)));
+   ir_expression *const r0F4E = rshift(r0F46, body.constant(int(1)));
+   body.emit(assign(r0F4B, bit_or(r0F4D, r0F4E), 0x01));
+
+   body.emit(assign(r0F4C, rshift(swizzle_y(r0F45), body.constant(int(1))), 
0x01));
+
+   body.emit(assign(r0F45, r0F4C, 0x02));
+
+   body.emit(assign(r0F45, r0F4B, 0x01));
+
+   ir_variable *const r0F4F = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r0F4F, body.constant(true), 0x01));
+
+   ir_variable *const r0F50 = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0F51 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_auto);
+   body.emit(r0F51);
+   ir_variable *const r0F52 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"bExp", ir_var_auto);
+   body.emit(r0F52);
+   ir_variable *const r0F53 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"aExp", ir_var_auto);
+   body.emit(r0F53);
+   ir_variable *const r0F54 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"bFracHi", ir_var_auto);
+   body.emit(r0F54);
+   ir_variable *const r0F55 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"bFracLo", ir_var_auto);
+   body.emit(r0F55);
+   ir_variable *const r0F56 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFracHi", ir_var_auto);
+   body.emit(r0F56);
+   ir_variable *const r0F57 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFracLo", ir_var_auto);
+   body.emit(r0F57);
+   ir_variable *const r0F58 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"zExp", ir_var_auto);
+   body.emit(r0F58);
+   ir_variable *const r0F59 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac2", ir_var_auto);
+   body.emit(r0F59);
+   ir_variable *const r0F5A = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac1", ir_var_auto);
+   body.emit(r0F5A);
+   ir_variable *const r0F5B = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac0", ir_var_auto);
+   body.emit(r0F5B);
+   body.emit(assign(r0F5B, body.constant(0u), 0x01));
+
+   body.emit(assign(r0F5A, body.constant(0u), 0x01));
+
+   body.emit(assign(r0F59, body.constant(0u), 0x01));
+
+   ir_variable *const r0F5C = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracLo_retval");
+   body.emit(assign(r0F5C, swizzle_x(r0F45), 0x01));
+
+   body.emit(assign(r0F57, r0F5C, 0x01));
+
+   ir_variable *const r0F5D = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r0F5D, bit_and(r0F4C, body.constant(1048575u)), 0x01));
+
+   body.emit(assign(r0F56, r0F5D, 0x01));
+
+   ir_variable *const r0F5E = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracLo_retval");
+   body.emit(assign(r0F5E, swizzle_x(r0F45), 0x01));
+
+   body.emit(assign(r0F55, r0F5E, 0x01));
+
+   ir_variable *const r0F5F = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r0F5F, bit_and(r0F4C, body.constant(1048575u)), 0x01));
+
+   body.emit(assign(r0F54, r0F5F,

[Mesa-dev] [PATCH 10/47] glsl: Add "built-in" functions to do uint_to_fp64(uint)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 259 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  26 
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 293 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index bfe387b0f5..2c11e463ca 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -12294,3 +12294,262 @@ fp64_to_uint(void *mem_ctx, 
builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+uint_to_fp64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0A5E = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0A5E);
+   ir_variable *const r0A5F = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0A60 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aLow", ir_var_auto);
+   body.emit(r0A60);
+   ir_variable *const r0A61 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aHigh", ir_var_auto);
+   body.emit(r0A61);
+   /* IF CONDITION */
+   ir_expression *const r0A63 = equal(r0A5E, body.constant(0u));
+   ir_if *f0A62 = new(mem_ctx) ir_if(operand(r0A63).val);
+   exec_list *const f0A62_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0A62->then_instructions;
+
+  body.emit(assign(r0A5F, ir_constant::zero(mem_ctx, 
glsl_type::uvec2_type), 0x03));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0A62->else_instructions;
+
+  ir_variable *const r0A64 = body.make_temp(glsl_type::uint_type, "a");
+  body.emit(assign(r0A64, r0A5E, 0x01));
+
+  ir_variable *const r0A65 = body.make_temp(glsl_type::int_type, 
"return_value");
+  ir_variable *const r0A66 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"shiftCount", ir_var_auto);
+  body.emit(r0A66);
+  /* IF CONDITION */
+  ir_expression *const r0A68 = equal(r0A5E, body.constant(0u));
+  ir_if *f0A67 = new(mem_ctx) ir_if(operand(r0A68).val);
+  exec_list *const f0A67_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0A67->then_instructions;
+
+ body.emit(assign(r0A65, body.constant(int(32)), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0A67->else_instructions;
+
+ body.emit(assign(r0A66, body.constant(int(0)), 0x01));
+
+ /* IF CONDITION */
+ ir_expression *const r0A6A = bit_and(r0A5E, 
body.constant(4294901760u));
+ ir_expression *const r0A6B = equal(r0A6A, body.constant(0u));
+ ir_if *f0A69 = new(mem_ctx) ir_if(operand(r0A6B).val);
+ exec_list *const f0A69_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0A69->then_instructions;
+
+body.emit(assign(r0A66, body.constant(int(16)), 0x01));
+
+body.emit(assign(r0A64, lshift(r0A5E, body.constant(int(16))), 
0x01));
+
+
+ body.instructions = f0A69_parent_instructions;
+ body.emit(f0A69);
+
+ /* END IF */
+
+ /* IF CONDITION */
+ ir_expression *const r0A6D = bit_and(r0A64, 
body.constant(4278190080u));
+ ir_expression *const r0A6E = equal(r0A6D, body.constant(0u));
+ ir_if *f0A6C = new(mem_ctx) ir_if(operand(r0A6E).val);
+ exec_list *const f0A6C_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0A6C->then_instructions;
+
+body.emit(assign(r0A66, add(r0A66, body.constant(int(8))), 0x01));
+
+body.emit(assign(r0A64, lshift(r0A64, body.constant(int(8))), 
0x01));
+
+
+ body.instructions = f0A6C_parent_instructions;
+ body.emit(f0A6C);
+
+ /* END IF */
+
+ /* IF CONDITION */
+ ir_expression *const r0A70 = bit_and(r0A64, 
body.constant(4026531840u));
+ ir_expression *const r0A71 = equal(r0A70, body.constant(0u));
+ ir_if *f0A6F = new(mem_ctx) ir_if(operand(r0A71).val);
+ exec_list *const f0A6F_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0A6F->then_instructions;
+
+body.emit(assign(r0A66, add(r0A66, body.constant(int(4))), 0x01));
+
+body.emit(assign(r0A64, lshift(r0A64, body.constant(int(4))), 
0x01));
+
+
+ body.instructions = f0A6F_parent_instructions;
+ body.emit(f0A6F);
+
+ /* END IF */
+
+

[Mesa-dev] [PATCH 12/47] glsl: Add "built-in" functions to do int_to_fp64(int)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 334 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  23 +++
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 365 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index c5640c459a..644407a185 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -12837,3 +12837,337 @@ fp64_to_int(void *mem_ctx, 
builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+int_to_fp64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0ABE = new(mem_ctx) ir_variable(glsl_type::int_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0ABE);
+   ir_variable *const r0ABF = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0AC0 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_auto);
+   body.emit(r0AC0);
+   ir_variable *const r0AC1 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac1", ir_var_auto);
+   body.emit(r0AC1);
+   ir_variable *const r0AC2 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac0", ir_var_auto);
+   body.emit(r0AC2);
+   body.emit(assign(r0AC2, body.constant(0u), 0x01));
+
+   body.emit(assign(r0AC1, body.constant(0u), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0AC4 = equal(r0ABE, body.constant(int(0)));
+   ir_if *f0AC3 = new(mem_ctx) ir_if(operand(r0AC4).val);
+   exec_list *const f0AC3_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0AC3->then_instructions;
+
+  ir_variable *const r0AC5 = new(mem_ctx) 
ir_variable(glsl_type::uvec2_type, "z", ir_var_auto);
+  body.emit(r0AC5);
+  body.emit(assign(r0AC5, body.constant(0u), 0x02));
+
+  body.emit(assign(r0AC5, body.constant(0u), 0x01));
+
+  body.emit(assign(r0ABF, r0AC5, 0x03));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0AC3->else_instructions;
+
+  ir_expression *const r0AC6 = less(r0ABE, body.constant(int(0)));
+  ir_expression *const r0AC7 = expr(ir_unop_b2i, r0AC6);
+  body.emit(assign(r0AC0, expr(ir_unop_i2u, r0AC7), 0x01));
+
+  ir_variable *const r0AC8 = body.make_temp(glsl_type::uint_type, 
"conditional_tmp");
+  /* IF CONDITION */
+  ir_expression *const r0ACA = less(r0ABE, body.constant(int(0)));
+  ir_if *f0AC9 = new(mem_ctx) ir_if(operand(r0ACA).val);
+  exec_list *const f0AC9_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0AC9->then_instructions;
+
+ ir_expression *const r0ACB = neg(r0ABE);
+ body.emit(assign(r0AC8, expr(ir_unop_i2u, r0ACB), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0AC9->else_instructions;
+
+ body.emit(assign(r0AC8, expr(ir_unop_i2u, r0ABE), 0x01));
+
+
+  body.instructions = f0AC9_parent_instructions;
+  body.emit(f0AC9);
+
+  /* END IF */
+
+  ir_variable *const r0ACC = body.make_temp(glsl_type::uint_type, "a");
+  body.emit(assign(r0ACC, r0AC8, 0x01));
+
+  ir_variable *const r0ACD = body.make_temp(glsl_type::int_type, 
"return_value");
+  ir_variable *const r0ACE = new(mem_ctx) ir_variable(glsl_type::int_type, 
"shiftCount", ir_var_auto);
+  body.emit(r0ACE);
+  /* IF CONDITION */
+  ir_expression *const r0AD0 = equal(r0AC8, body.constant(0u));
+  ir_if *f0ACF = new(mem_ctx) ir_if(operand(r0AD0).val);
+  exec_list *const f0ACF_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0ACF->then_instructions;
+
+ body.emit(assign(r0ACD, body.constant(int(32)), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0ACF->else_instructions;
+
+ body.emit(assign(r0ACE, body.constant(int(0)), 0x01));
+
+ /* IF CONDITION */
+ ir_expression *const r0AD2 = bit_and(r0AC8, 
body.constant(4294901760u));
+ ir_expression *const r0AD3 = equal(r0AD2, body.constant(0u));
+ ir_if *f0AD1 = new(mem_ctx) ir_if(operand(r0AD3).val);
+ exec_list *const f0AD1_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0AD1->then_instructions;
+
+body.emit(assign(r0ACE, body.constant(int(16)), 0x01));
+
+body.emit(assign(r0ACC, lshift(r0AC8, body.constant(int(16))), 
0x01));
+
+
+ body.instructions = f0AD1_parent_instructions;
+ body.emit(

[Mesa-dev] [PATCH 16/47] glsl: Add "built-in" functions to do trunc(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 121 
 src/compiler/glsl/builtin_functions.cpp |   4 ++
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  23 ++
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 152 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index b16bc5def5..2bce013d0e 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -18133,3 +18133,124 @@ fsqrt64(void *mem_ctx, builtin_available_predicate 
avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+ftrunc64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0F09 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0F09);
+   ir_variable *const r0F0A = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0F0B = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+   ir_expression *const r0F0C = rshift(swizzle_y(r0F09), 
body.constant(int(20)));
+   ir_expression *const r0F0D = bit_and(r0F0C, body.constant(2047u));
+   ir_expression *const r0F0E = expr(ir_unop_u2i, r0F0D);
+   body.emit(assign(r0F0B, add(r0F0E, body.constant(int(-1023))), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0F10 = less(r0F0B, body.constant(int(0)));
+   ir_if *f0F0F = new(mem_ctx) ir_if(operand(r0F10).val);
+   exec_list *const f0F0F_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0F0F->then_instructions;
+
+  body.emit(assign(r0F0A, ir_constant::zero(mem_ctx, 
glsl_type::uvec2_type), 0x03));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = &f0F0F->else_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0F12 = greater(r0F0B, body.constant(int(52)));
+  ir_if *f0F11 = new(mem_ctx) ir_if(operand(r0F12).val);
+  exec_list *const f0F11_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0F11->then_instructions;
+
+ body.emit(assign(r0F0A, r0F09, 0x03));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0F11->else_instructions;
+
+ ir_variable *const r0F13 = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+ body.emit(assign(r0F13, sub(body.constant(int(52)), r0F0B), 0x01));
+
+ ir_variable *const r0F14 = body.make_temp(glsl_type::uint_type, 
"conditional_tmp");
+ /* IF CONDITION */
+ ir_expression *const r0F16 = gequal(r0F13, body.constant(int(32)));
+ ir_if *f0F15 = new(mem_ctx) ir_if(operand(r0F16).val);
+ exec_list *const f0F15_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0F15->then_instructions;
+
+body.emit(assign(r0F14, body.constant(0u), 0x01));
+
+
+/* ELSE INSTRUCTIONS */
+body.instructions = &f0F15->else_instructions;
+
+body.emit(assign(r0F14, lshift(body.constant(4294967295u), r0F13), 
0x01));
+
+
+ body.instructions = f0F15_parent_instructions;
+ body.emit(f0F15);
+
+ /* END IF */
+
+ ir_variable *const r0F17 = body.make_temp(glsl_type::uint_type, 
"conditional_tmp");
+ /* IF CONDITION */
+ ir_expression *const r0F19 = less(r0F13, body.constant(int(33)));
+ ir_if *f0F18 = new(mem_ctx) ir_if(operand(r0F19).val);
+ exec_list *const f0F18_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0F18->then_instructions;
+
+body.emit(assign(r0F17, body.constant(4294967295u), 0x01));
+
+
+/* ELSE INSTRUCTIONS */
+body.instructions = &f0F18->else_instructions;
+
+ir_expression *const r0F1A = add(r0F13, body.constant(int(-32)));
+body.emit(assign(r0F17, lshift(body.constant(4294967295u), r0F1A), 
0x01));
+
+
+ body.instructions = f0F18_parent_instructions;
+ body.emit(f0F18);
+
+ /* END IF */
+
+ ir_variable *const r0F1B = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+ body.emit(assign(r0F1B, bit_and(r0F14, swizzle_x(r0F09)), 0x01));
+
+ body.emit(assign(r0F1B, bit_and(r0F17, swizzle_y(r0F09)), 0x02));
+
+ body.emit(assign(r0F0A, r0F1B, 0x03));
+
+
+  body.instructions = f0F11_parent_instructions;
+  body.emit(f0F11);
+
+  /* END IF */
+
+
+   body.instructions = f0F0F_parent_instructions;
+   body.emit(f0F0F);
+
+   /* END IF */
+
+   body.emit(ret(r0F0A));

[Mesa-dev] [PATCH 19/47] glsl: Add a lowering pass for 64-bit float abs()

2017-08-23 Thread Elie Tournier

Squashed with:
glsl/lower_64bit: fix return type conversion (airlied)

Only do conversion for the 64-bit types, add a path
to do result merging without conversion.

Signed-off-by: Elie Tournier 
---
 src/compiler/Makefile.sources  |  2 +-
 src/compiler/glsl/ir_optimization.h|  8 +-
 .../glsl/{lower_int64.cpp => lower_64bit.cpp}  | 87 ++
 src/mesa/state_tracker/st_extensions.c |  3 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  9 +++
 5 files changed, 92 insertions(+), 17 deletions(-)
 rename src/compiler/glsl/{lower_int64.cpp => lower_64bit.cpp} (81%)

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 24fa7716de..699133234c 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -92,7 +92,7 @@ LIBGLSL_FILES = \
glsl/lower_distance.cpp \
glsl/lower_if_to_cond_assign.cpp \
glsl/lower_instructions.cpp \
-   glsl/lower_int64.cpp \
+   glsl/lower_64bit.cpp \
glsl/lower_jumps.cpp \
glsl/lower_mat_op_to_vec.cpp \
glsl/lower_noise.cpp \
diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 573ddb4a8d..6cc0909a80 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -55,11 +55,14 @@
 #define DIV_TO_MUL_RCP(FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 #define SQRT_TO_ABS_SQRT  0x20
 
-/* Opertaions for lower_64bit_integer_instructions() */
+/* Operations for lower_64bit_integer_instructions()
+ * and lower_64bit_double_instructions()
+ */
 #define MUL64 (1U << 0)
 #define SIGN64(1U << 1)
 #define DIV64 (1U << 2)
 #define MOD64 (1U << 3)
+#define ABS64 (1U << 4)
 
 /**
  * \see class lower_packing_builtins_visitor
@@ -177,4 +180,7 @@ compare_index_block(exec_list *instructions, ir_variable 
*index,
 bool lower_64bit_integer_instructions(exec_list *instructions,
   unsigned what_to_lower);
 
+bool lower_64bit_double_instructions(exec_list *instructions,
+  unsigned what_to_lower);
+
 #endif /* GLSL_IR_OPTIMIZATION_H */
diff --git a/src/compiler/glsl/lower_int64.cpp 
b/src/compiler/glsl/lower_64bit.cpp
similarity index 81%
rename from src/compiler/glsl/lower_int64.cpp
rename to src/compiler/glsl/lower_64bit.cpp
index 2d4fdbb1a5..4887e5538c 100644
--- a/src/compiler/glsl/lower_int64.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -22,7 +22,7 @@
  */
 
 /**
- * \file lower_int64.cpp
+ * \file lower_64bit.cpp
  *
  * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
  * to a uvec2.  For each operation that can be lowered, there is a function
@@ -56,6 +56,9 @@ void expand_source(ir_factory &, ir_rvalue *val, ir_variable 
**expanded_src);
 ir_dereference_variable *compact_destination(ir_factory &,
  const glsl_type *type,
  ir_variable *result[4]);
+ir_dereference_variable *merge_destination(ir_factory &,
+   const glsl_type *type,
+   ir_variable *result[4]);
 
 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
  ir_expression *ir,
@@ -132,7 +135,7 @@ private:
 #define lowering(x) (this->lower & x)
 
 bool
-lower_64bit_integer_instructions(exec_list *instructions,
+lower_64bit_instructions(exec_list *instructions,
  unsigned what_to_lower)
 {
if (instructions->is_empty())
@@ -163,6 +166,19 @@ lower_64bit_integer_instructions(exec_list *instructions,
return v.progress;
 }
 
+bool
+lower_64bit_integer_instructions(exec_list *instructions,
+ unsigned what_to_lower)
+{
+   return lower_64bit_instructions(instructions, what_to_lower);
+}
+
+bool
+lower_64bit_double_instructions(exec_list *instructions,
+ unsigned what_to_lower)
+{
+   return lower_64bit_instructions(instructions, what_to_lower);
+}
 
 /**
  * Expand individual 64-bit values to uvec2 values
@@ -200,18 +216,21 @@ lower_64bit::expand_source(ir_factory &body,
ir_rvalue *val,
ir_variable **expanded_src)
 {
-   assert(val->type->is_integer_64());
+   assert(val->type->is_integer_64() || val->type->is_double());
 
ir_variable *const temp = body.make_temp(val->type, "tmp");
 
body.emit(assign(temp, val));
 
const ir_expression_operation unpack_opcode =
-  val->type->base_type == GLSL_TYPE_UINT64
-  ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
+  val->type->base_type == GLSL_TYPE_DOUBLE
+  ? ir_unop_unpack_double_2x32 :
+  (val->type->base_type == GLSL_TYPE_UINT64
+  ? ir_unop_u

[Mesa-dev] [PATCH 11/47] glsl: Add "built-in" functions to do fp64_to_int(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 284 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  45 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 337 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 2c11e463ca..c5640c459a 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -12553,3 +12553,287 @@ uint_to_fp64(void *mem_ctx, 
builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+fp64_to_int(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::int_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0A87 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0A87);
+   ir_variable *const r0A88 = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r0A88, body.constant(true), 0x01));
+
+   ir_variable *const r0A89 = body.make_temp(glsl_type::int_type, 
"return_value");
+   ir_variable *const r0A8A = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"absZ", ir_var_auto);
+   body.emit(r0A8A);
+   ir_variable *const r0A8B = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aSign", ir_var_auto);
+   body.emit(r0A8B);
+   ir_variable *const r0A8C = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFracHi", ir_var_auto);
+   body.emit(r0A8C);
+   ir_variable *const r0A8D = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r0A8D, bit_and(swizzle_y(r0A87), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r0A8C, r0A8D, 0x01));
+
+   ir_variable *const r0A8E = body.make_temp(glsl_type::int_type, 
"extractFloat64Exp_retval");
+   ir_expression *const r0A8F = rshift(swizzle_y(r0A87), 
body.constant(int(20)));
+   ir_expression *const r0A90 = bit_and(r0A8F, body.constant(2047u));
+   body.emit(assign(r0A8E, expr(ir_unop_u2i, r0A90), 0x01));
+
+   body.emit(assign(r0A8B, rshift(swizzle_y(r0A87), body.constant(int(31))), 
0x01));
+
+   body.emit(assign(r0A8A, body.constant(0u), 0x01));
+
+   ir_variable *const r0A91 = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+   body.emit(assign(r0A91, add(r0A8E, body.constant(int(-1043))), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0A93 = lequal(body.constant(int(0)), r0A91);
+   ir_if *f0A92 = new(mem_ctx) ir_if(operand(r0A93).val);
+   exec_list *const f0A92_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0A92->then_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0A95 = less(body.constant(int(1054)), r0A8E);
+  ir_if *f0A94 = new(mem_ctx) ir_if(operand(r0A95).val);
+  exec_list *const f0A94_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0A94->then_instructions;
+
+ /* IF CONDITION */
+ ir_expression *const r0A97 = equal(r0A8E, body.constant(int(2047)));
+ ir_expression *const r0A98 = bit_or(r0A8D, swizzle_x(r0A87));
+ ir_expression *const r0A99 = expr(ir_unop_u2i, r0A98);
+ ir_expression *const r0A9A = expr(ir_unop_i2b, r0A99);
+ ir_expression *const r0A9B = logic_and(r0A97, r0A9A);
+ ir_if *f0A96 = new(mem_ctx) ir_if(operand(r0A9B).val);
+ exec_list *const f0A96_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0A96->then_instructions;
+
+body.emit(assign(r0A8B, body.constant(0u), 0x01));
+
+
+ body.instructions = f0A96_parent_instructions;
+ body.emit(f0A96);
+
+ /* END IF */
+
+ ir_variable *const r0A9C = body.make_temp(glsl_type::int_type, 
"conditional_tmp");
+ /* IF CONDITION */
+ ir_expression *const r0A9E = expr(ir_unop_u2i, r0A8B);
+ ir_expression *const r0A9F = expr(ir_unop_i2b, r0A9E);
+ ir_if *f0A9D = new(mem_ctx) ir_if(operand(r0A9F).val);
+ exec_list *const f0A9D_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0A9D->then_instructions;
+
+body.emit(assign(r0A9C, body.constant(int(-2147483648)), 0x01));
+
+
+/* ELSE INSTRUCTIONS */
+body.instructions = &f0A9D->else_instructions;
+
+body.emit(assign(r0A9C, body.constant(int(2147483647)), 0x01));
+
+
+ body.instructions = f0A9D_parent_instructions;
+ body.emit(f0A9D);
+
+ /* END IF */
+
+ body.emit(assign(r0A89, r0A9C, 0x01));
+
+ body.emit(

[Mesa-dev] [PATCH 17/47] glsl: Add "built-in" functions to do round(fp64)

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 225 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  41 ++
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 274 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 2bce013d0e..97d6adfd9f 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -18254,3 +18254,228 @@ ftrunc64(void *mem_ctx, builtin_available_predicate 
avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+fround64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0F1C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0F1C);
+   ir_variable *const r0F1D = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r0F1D, body.constant(true), 0x01));
+
+   ir_variable *const r0F1E = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0F1F = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aLo", ir_var_auto);
+   body.emit(r0F1F);
+   ir_variable *const r0F20 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aHi", ir_var_auto);
+   body.emit(r0F20);
+   ir_variable *const r0F21 = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+   ir_expression *const r0F22 = rshift(swizzle_y(r0F1C), 
body.constant(int(20)));
+   ir_expression *const r0F23 = bit_and(r0F22, body.constant(2047u));
+   ir_expression *const r0F24 = expr(ir_unop_u2i, r0F23);
+   body.emit(assign(r0F21, add(r0F24, body.constant(int(-1023))), 0x01));
+
+   body.emit(assign(r0F20, swizzle_y(r0F1C), 0x01));
+
+   body.emit(assign(r0F1F, swizzle_x(r0F1C), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0F26 = less(r0F21, body.constant(int(20)));
+   ir_if *f0F25 = new(mem_ctx) ir_if(operand(r0F26).val);
+   exec_list *const f0F25_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0F25->then_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0F28 = less(r0F21, body.constant(int(0)));
+  ir_if *f0F27 = new(mem_ctx) ir_if(operand(r0F28).val);
+  exec_list *const f0F27_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0F27->then_instructions;
+
+ body.emit(assign(r0F20, bit_and(swizzle_y(r0F1C), 
body.constant(2147483648u)), 0x01));
+
+ /* IF CONDITION */
+ ir_expression *const r0F2A = equal(r0F21, body.constant(int(-1)));
+ ir_expression *const r0F2B = nequal(swizzle_x(r0F1C), 
body.constant(0u));
+ ir_expression *const r0F2C = logic_and(r0F2A, r0F2B);
+ ir_if *f0F29 = new(mem_ctx) ir_if(operand(r0F2C).val);
+ exec_list *const f0F29_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0F29->then_instructions;
+
+body.emit(assign(r0F20, bit_or(r0F20, body.constant(1072693248u)), 
0x01));
+
+
+ body.instructions = f0F29_parent_instructions;
+ body.emit(f0F29);
+
+ /* END IF */
+
+ body.emit(assign(r0F1F, body.constant(0u), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0F27->else_instructions;
+
+ ir_variable *const r0F2D = body.make_temp(glsl_type::uint_type, 
"assignment_tmp");
+ body.emit(assign(r0F2D, rshift(body.constant(1048575u), r0F21), 
0x01));
+
+ /* IF CONDITION */
+ ir_expression *const r0F2F = bit_and(r0F20, r0F2D);
+ ir_expression *const r0F30 = equal(r0F2F, body.constant(0u));
+ ir_expression *const r0F31 = equal(r0F1F, body.constant(0u));
+ ir_expression *const r0F32 = logic_and(r0F30, r0F31);
+ ir_if *f0F2E = new(mem_ctx) ir_if(operand(r0F32).val);
+ exec_list *const f0F2E_parent_instructions = body.instructions;
+
+/* THEN INSTRUCTIONS */
+body.instructions = &f0F2E->then_instructions;
+
+body.emit(assign(r0F1E, r0F1C, 0x03));
+
+body.emit(assign(r0F1D, body.constant(false), 0x01));
+
+
+/* ELSE INSTRUCTIONS */
+body.instructions = &f0F2E->else_instructions;
+
+ir_expression *const r0F33 = rshift(body.constant(524288u), r0F21);
+body.emit(assign(r0F20, add(r0F20, r0F33), 0x01));
+
+ir_expression *const r0F34 = expr(ir_unop_bit_not, r0F2D);
+body.emit(assign(r0F20, bit_and(r0F20, r0F34), 0x01));
+
+body.emit(assign(r0F1F, body.constant(

[Mesa-dev] [PATCH 13/47] glsl: Add "built-in" functions to do fp64_to_fp32(fp64)

2017-08-23 Thread Elie Tournier

This algorithm seems to be ok on softpipe but fail the piglit test on r600g 
when we convert the fp64 representing the smallest fp32 into a fp32.
However, the IR is the same in both case.

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 494 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  | 108 +++
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 610 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 644407a185..c372de0f4d 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -13171,3 +13171,497 @@ int_to_fp64(void *mem_ctx, 
builtin_available_predicate avail)
sig->replace_parameters(&sig_parameters);
return sig;
 }
+ir_function_signature *
+packFloat32(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::float_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0AF8 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_function_in);
+   sig_parameters.push_tail(r0AF8);
+   ir_variable *const r0AF9 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"zExp", ir_var_function_in);
+   sig_parameters.push_tail(r0AF9);
+   ir_variable *const r0AFA = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac", ir_var_function_in);
+   sig_parameters.push_tail(r0AFA);
+   ir_variable *const r0AFB = body.make_temp(glsl_type::float_type, 
"uintBitsToFloat_retval");
+   ir_expression *const r0AFC = lshift(r0AF8, body.constant(int(31)));
+   ir_expression *const r0AFD = expr(ir_unop_i2u, r0AF9);
+   ir_expression *const r0AFE = lshift(r0AFD, body.constant(int(23)));
+   ir_expression *const r0AFF = add(r0AFC, r0AFE);
+   ir_expression *const r0B00 = add(r0AFF, r0AFA);
+   body.emit(assign(r0AFB, expr(ir_unop_bitcast_u2f, r0B00), 0x01));
+
+   body.emit(ret(r0AFB));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
+ir_function_signature *
+roundAndPackFloat32(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::float_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0B01 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_function_in);
+   sig_parameters.push_tail(r0B01);
+   ir_variable *const r0B02 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"zExp", ir_var_function_in);
+   sig_parameters.push_tail(r0B02);
+   ir_variable *const r0B03 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac", ir_var_function_in);
+   sig_parameters.push_tail(r0B03);
+   ir_variable *const r0B04 = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r0B04, body.constant(true), 0x01));
+
+   ir_variable *const r0B05 = body.make_temp(glsl_type::float_type, 
"return_value");
+   ir_variable *const r0B06 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"roundBits", ir_var_auto);
+   body.emit(r0B06);
+   ir_expression *const r0B07 = bit_and(r0B03, body.constant(127u));
+   body.emit(assign(r0B06, expr(ir_unop_u2i, r0B07), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0B09 = expr(ir_unop_i2u, r0B02);
+   ir_expression *const r0B0A = lequal(body.constant(253u), r0B09);
+   ir_if *f0B08 = new(mem_ctx) ir_if(operand(r0B0A).val);
+   exec_list *const f0B08_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = &f0B08->then_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r0B0C = less(body.constant(int(253)), r0B02);
+  ir_expression *const r0B0D = equal(r0B02, body.constant(int(253)));
+  ir_expression *const r0B0E = expr(ir_unop_u2i, r0B03);
+  ir_expression *const r0B0F = less(r0B0E, body.constant(int(-64)));
+  ir_expression *const r0B10 = logic_and(r0B0D, r0B0F);
+  ir_expression *const r0B11 = logic_or(r0B0C, r0B10);
+  ir_if *f0B0B = new(mem_ctx) ir_if(operand(r0B11).val);
+  exec_list *const f0B0B_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = &f0B0B->then_instructions;
+
+ ir_expression *const r0B12 = lshift(r0B01, body.constant(int(31)));
+ ir_expression *const r0B13 = add(r0B12, body.constant(2139095040u));
+ body.emit(assign(r0B05, expr(ir_unop_bitcast_u2f, r0B13), 0x01));
+
+ body.emit(assign(r0B04, body.constant(false), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = &f0B0B->else_instructions;
+
+ /* IF CONDITION */
+ ir_expression *const r0B15 = less(r0B02, body.constant(int(0)));
+ i

[Mesa-dev] [PATCH 21/47] glsl: Add a lowering pass for 64-bit float sign()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_64bit.cpp  | 5 -
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 91b972fe82..86ee13cb89 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -421,7 +421,10 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
 
case ir_unop_sign:
   if (lowering(SIGN64)) {
- *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fsign64", generate_ir::fsign64);
+ else
+*rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
   }
   break;
 
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3b0b19be53..1d5e74ab31 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7059,7 +7059,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   if (!pscreen->get_param(pscreen, PIPE_CAP_DOUBLES) &&
 ctx->Const.GLSLVersion >= 130) {
  unsigned lower_inst = ABS64 |
-   NEG64;
+   NEG64 |
+   SIGN64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 30/47] glsl: Add a lowering pass for 64-bit float i2d()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 157c7e1a6f..c59d7f6ee2 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -71,6 +71,7 @@
 #define D2U   (1U << 10)
 #define U2D   (1U << 11)
 #define D2I   (1U << 12)
+#define I2D   (1U << 13)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 41255094ec..570f997779 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -450,6 +450,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_i2d:
+  if (lowering(I2D)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_int_to_fp64", 
generate_ir::int_to_fp64, true);
+  }
+  break;
+
case ir_unop_neg:
   if (lowering(NEG64)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 821461e439..1be223a3cb 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7068,7 +7068,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
MUL64 |
D2U |
U2D |
-   D2I;
+   D2I |
+   I2D;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 20/47] glsl: Add a lowering pass for 64-bit float neg()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 6cc0909a80..ad76a7032e 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -63,6 +63,7 @@
 #define DIV64 (1U << 2)
 #define MOD64 (1U << 3)
 #define ABS64 (1U << 4)
+#define NEG64 (1U << 5)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 4887e5538c..91b972fe82 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -412,6 +412,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_neg:
+  if (lowering(NEG64)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fneg64", generate_ir::fneg64);
+  }
+  break;
+
case ir_unop_sign:
   if (lowering(SIGN64)) {
  *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 991eae4917..3b0b19be53 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7058,7 +7058,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
*/
   if (!pscreen->get_param(pscreen, PIPE_CAP_DOUBLES) &&
 ctx->Const.GLSLVersion >= 130) {
- unsigned lower_inst = ABS64;
+ unsigned lower_inst = ABS64 |
+   NEG64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 23/47] glsl: Add a lowering pass for 64-bit float lequal()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 86cfdf7619..271dad77e4 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -65,6 +65,7 @@
 #define ABS64 (1U << 4)
 #define NEG64 (1U << 5)
 #define EQ64  (1U << 6)
+#define LE64  (1U << 7)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 95d7c4c3d4..68ffa8f706 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -445,6 +445,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_lequal:
+  if (lowering(LE64)) {
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fle64", generate_ir::fle64);
+  }
+  break;
+
case ir_binop_mod:
   if (lowering(MOD64)) {
  if (ir->type->base_type == GLSL_TYPE_UINT64) {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e7b848872b..78fa1e3a76 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7061,7 +7061,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
  unsigned lower_inst = ABS64 |
NEG64 |
SIGN64 |
-   EQ64;
+   EQ64 |
+   LE64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 25/47] glsl: Add a lowering pass for 64-bit float add()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index ee8cd92269..81ffe67077 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -67,6 +67,7 @@
 #define EQ64  (1U << 6)
 #define LE64  (1U << 7)
 #define LT64  (1U << 8)
+#define ADD64 (1U << 9)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 2b6580d293..414e8414f9 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -428,6 +428,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_add:
+  if (lowering(ADD64)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fadd64", generate_ir::fadd64);
+  }
+  break;
+
case ir_binop_div:
   if (lowering(DIV64)) {
  if (ir->type->base_type == GLSL_TYPE_UINT64) {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 59f896a785..fab331edea 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7063,7 +7063,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
SIGN64 |
EQ64 |
LE64 |
-   LT64;
+   LT64 |
+   ADD64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 22/47] glsl: Add a lowering pass for 64-bit float equal()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index ad76a7032e..86cfdf7619 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -64,6 +64,7 @@
 #define MOD64 (1U << 3)
 #define ABS64 (1U << 4)
 #define NEG64 (1U << 5)
+#define EQ64  (1U << 6)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 86ee13cb89..95d7c4c3d4 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -438,6 +438,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_equal:
+  if (lowering(EQ64)) {
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_feq64", generate_ir::feq64);
+  }
+  break;
+
case ir_binop_mod:
   if (lowering(MOD64)) {
  if (ir->type->base_type == GLSL_TYPE_UINT64) {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 1d5e74ab31..e7b848872b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7060,7 +7060,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
 ctx->Const.GLSLVersion >= 130) {
  unsigned lower_inst = ABS64 |
NEG64 |
-   SIGN64;
+   SIGN64 |
+   EQ64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 29/47] glsl: Add a lowering pass for 64-bit float d2i()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 5d1118d0cf..157c7e1a6f 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -70,6 +70,7 @@
 #define ADD64 (1U << 9)
 #define D2U   (1U << 10)
 #define U2D   (1U << 11)
+#define D2I   (1U << 12)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 38b0420baa..41255094ec 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -436,6 +436,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_d2i:
+  if (lowering(D2I)) {
+ if (ir->type->base_type == GLSL_TYPE_INT)
+*rvalue = handle_op(ir, "__builtin_fp64_to_int", 
generate_ir::fp64_to_int);
+  }
+  break;
+
case ir_unop_d2u:
   if (lowering(D2U)) {
  if (ir->type->base_type == GLSL_TYPE_UINT)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index df815a86d1..821461e439 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7067,7 +7067,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
ADD64 |
MUL64 |
D2U |
-   U2D;
+   U2D |
+   D2I;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 28/47] glsl: Add a lowering pass for 64-bit float u2d()

2017-08-23 Thread Elie Tournier

Handle non 64bit sources (airlied)

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h|  1 +
 src/compiler/glsl/lower_64bit.cpp  | 49 --
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  3 +-
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index aa71dfdb39..5d1118d0cf 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -69,6 +69,7 @@
 #define LT64  (1U << 8)
 #define ADD64 (1U << 9)
 #define D2U   (1U << 10)
+#define U2D   (1U << 11)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 0b9d466b9d..38b0420baa 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -52,7 +52,7 @@ using namespace ir_builder;
 
 namespace lower_64bit {
 void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
-
+void extract_source(ir_factory &, ir_rvalue *val, ir_variable **extracted_src);
 ir_dereference_variable *compact_destination(ir_factory &,
  const glsl_type *type,
  ir_variable *result[4]);
@@ -124,7 +124,7 @@ private:
ir_factory added_functions;
 
ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
-function_generator generator);
+function_generator generator, bool conv_to_double = 
false);
 };
 
 } /* anonymous namespace */
@@ -245,6 +245,25 @@ lower_64bit::expand_source(ir_factory &body,
   expanded_src[i] = expanded_src[0];
 }
 
+void
+lower_64bit::extract_source(ir_factory &body,
+ir_rvalue *val,
+ir_variable **extracted_src)
+{
+   ir_variable *const temp = body.make_temp(val->type, "tmp");
+
+   body.emit(assign(temp, val));
+   unsigned i;
+   for (i = 0; i < val->type->vector_elements; i++) {
+  extracted_src[i] = body.make_temp(val->type->get_scalar_type(), 
"extracted_source");
+
+  body.emit(assign(extracted_src[i], swizzle(temp, i, 1)));
+   }
+
+   for (/* empty */; i < 4; i++)
+  extracted_src[i] = extracted_src[0];
+}
+
 /**
  * Convert a series of uvec2 results into a single 64-bit integer vector
  */
@@ -315,7 +334,10 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
ir_factory body(&instructions, mem_ctx);
 
for (unsigned i = 0; i < num_operands; i++) {
-  expand_source(body, ir->operands[i], src[i]);
+  if (ir->operands[i]->type->is_64bit())
+ expand_source(body, ir->operands[i], src[i]);
+  else
+ extract_source(body, ir->operands[i], src[i]);
 
   if (ir->operands[i]->type->vector_elements > source_components)
  source_components = ir->operands[i]->type->vector_elements;
@@ -365,13 +387,15 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
 ir_rvalue *
 lower_64bit_visitor::handle_op(ir_expression *ir,
const char *function_name,
-   function_generator generator)
+   function_generator generator,
+   bool conv_to_double)
 {
-   for (unsigned i = 0; i < ir->get_num_operands(); i++)
-  if (!ir->operands[i]->type->is_integer_64() &&
-  !ir->operands[i]->type->is_double())
- return ir;
-
+   if (conv_to_double == false) {
+  for (unsigned i = 0; i < ir->num_operands; i++)
+ if (!ir->operands[i]->type->is_integer_64() &&
+ !ir->operands[i]->type->is_double())
+return ir;
+   }
/* Get a handle to the correct ir_function_signature for the core
 * operation.
 */
@@ -435,6 +459,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_u2d:
+  if (lowering(U2D)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_uint_to_fp64", 
generate_ir::uint_to_fp64, true);
+  }
+  break;
+
case ir_binop_add:
   if (lowering(ADD64)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index de5a499f8d..df815a86d1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7066,7 +7066,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
LT64 |
ADD64 |
MUL64 |
-   D2U;
+   D2U |
+   U2D;
  lower_64bit_double_instructions(ir, lower_inst);

[Mesa-dev] [PATCH 33/47] glsl: Add a lowering pass for 64-bit float sqrt()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index dce0bf4a9f..7a5a30541f 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -74,6 +74,7 @@
 #define I2D   (1U << 13)
 #define D2F   (1U << 14)
 #define F2D   (1U << 15)
+#define SQRT64(1U << 16)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 054cdcb50a..3681c4f0c5 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -487,6 +487,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_sqrt:
+  if (lowering(SQRT64)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fsqrt64", generate_ir::fsqrt64);
+  }
+  break;
+
case ir_unop_u2d:
   if (lowering(U2D)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f26368812f..474e9bcdea 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7071,7 +7071,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
D2I |
I2D |
D2F |
-   F2D;
+   F2D |
+   SQRT64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 24/47] glsl: Add a lowering pass for 64-bit float less()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 271dad77e4..ee8cd92269 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -66,6 +66,7 @@
 #define NEG64 (1U << 5)
 #define EQ64  (1U << 6)
 #define LE64  (1U << 7)
+#define LT64  (1U << 8)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 68ffa8f706..2b6580d293 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -452,6 +452,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_less:
+  if (lowering(LT64)) {
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_flt64", generate_ir::flt64);
+  }
+  break;
+
case ir_binop_mod:
   if (lowering(MOD64)) {
  if (ir->type->base_type == GLSL_TYPE_UINT64) {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 78fa1e3a76..59f896a785 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7062,7 +7062,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
NEG64 |
SIGN64 |
EQ64 |
-   LE64;
+   LE64 |
+   LT64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 37/47] glsl/lower_64bit: handle any/all operations

2017-08-23 Thread Elie Tournier

From: Dave Airlie 

This just splits them out and combines the results.

Signed-off-by: Dave Airlie 
---
 src/compiler/glsl/lower_64bit.cpp | 61 ++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index f8fcf9fd0a..79148bb169 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -59,6 +59,12 @@ ir_dereference_variable *compact_destination(ir_factory &,
 ir_dereference_variable *merge_destination(ir_factory &,
const glsl_type *type,
ir_variable *result[4]);
+ir_dereference_variable *all_equal_destination(ir_factory &,
+  const glsl_type *type,
+  ir_variable *result[4]);
+ir_dereference_variable *any_nequal_destination(ir_factory &,
+   const glsl_type *type,
+   ir_variable *result[4]);
 
 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
  ir_expression *ir,
@@ -309,6 +315,47 @@ lower_64bit::merge_destination(ir_factory &body,
return new(mem_ctx) ir_dereference_variable(merged_result);
 }
 
+/*
+ * and the results from each comparison.
+ */
+ir_dereference_variable *
+lower_64bit::all_equal_destination(ir_factory &body,
+const glsl_type *type,
+ir_variable *result[4])
+{
+   ir_variable *const merged_result =
+  body.make_temp(glsl_type::bool_type, "all_result");
+
+   body.emit(assign(merged_result, result[0]));
+   for (unsigned i = 1; i < type->vector_elements; i++) {
+  body.emit(assign(merged_result, logic_and(merged_result, result[i])));
+   }
+
+   void *const mem_ctx = ralloc_parent(merged_result);
+   return new(mem_ctx) ir_dereference_variable(merged_result);
+}
+
+/*
+ * and the results from each comparison, the not the result
+ */
+ir_dereference_variable *
+lower_64bit::any_nequal_destination(ir_factory &body,
+const glsl_type *type,
+ir_variable *result[4])
+{
+   ir_variable *const merged_result =
+  body.make_temp(glsl_type::bool_type, "any_result");
+
+   body.emit(assign(merged_result, result[0]));
+   for (unsigned i = 1; i < type->vector_elements; i++) {
+  body.emit(assign(merged_result, logic_and(merged_result, result[i])));
+   }
+
+   body.emit(assign(merged_result, logic_not(merged_result)));
+   void *const mem_ctx = ralloc_parent(merged_result);
+   return new(mem_ctx) ir_dereference_variable(merged_result);
+}
+
 ir_rvalue *
 lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
ir_expression *ir,
@@ -362,7 +409,11 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
}
 
ir_rvalue *rv;
-   if (ir->type->is_64bit())
+   if (ir->operation == ir_binop_all_equal)
+  rv = all_equal_destination(body, ir->type, dst);
+   else if (ir->operation == ir_binop_any_nequal)
+  rv = any_nequal_destination(body, ir->type, dst);
+   else if (ir->type->is_64bit())
   rv = compact_destination(body, ir->type, dst);
else
   rv = merge_destination(body, ir->type, dst);
@@ -579,6 +630,14 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_all_equal:
+   case ir_binop_any_nequal:
+  if (lowering(EQ64)) {
+if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
+*rvalue = handle_op(ir, "__builtin_feq64", generate_ir::feq64);
+}
+  }
+  break;
default:
   break;
}
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 39/47] glsl: Add a lowering pass for 64-bit float gequal()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_64bit.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 4978759eb9..07d39de79d 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -417,7 +417,8 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
 
   body.emit(c);
 
-  if (ir->operation == ir_unop_d2b)
+  if (ir->operation == ir_unop_d2b ||
+  ir->operation == ir_binop_gequal)
  body.emit(assign(dst[i], logic_not(dst[i])));
}
 
@@ -624,6 +625,7 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_gequal:
case ir_binop_less:
   if (lowering(LT64)) {
  if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 38/47] glsl/lower_64bit: lower d2b using comparison

2017-08-23 Thread Elie Tournier

From: Dave Airlie 

This just does a compare to 0 and inverts the result to lower
d2b. Not 100% sure this is always correct, but it passes piglit
---
 src/compiler/glsl/lower_64bit.cpp | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 79148bb169..4978759eb9 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -361,7 +361,7 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
ir_expression *ir,
ir_function_signature *callee)
 {
-   const unsigned num_operands = ir->num_operands;
+   unsigned num_operands = ir->num_operands;
ir_variable *src[4][4];
ir_variable *dst[4];
void *const mem_ctx = ralloc_parent(ir);
@@ -390,6 +390,16 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
  source_components = ir->operands[i]->type->vector_elements;
}
 
+   if (ir->operation == ir_unop_d2b) {
+  for (unsigned i = 0; i < source_components; i++) {
+ src[1][i] = body.make_temp(glsl_type::uvec2_type, "zero");
+
+ body.emit(assign(src[1][i], body.constant(0u), 1));
+ body.emit(assign(src[1][i], body.constant(0u), 2));
+  }
+  num_operands++;
+   }
+
for (unsigned i = 0; i < source_components; i++) {
   dst[i] = body.make_temp(result_type, "expanded_64bit_result");
 
@@ -406,6 +416,9 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
   ¶meters);
 
   body.emit(c);
+
+  if (ir->operation == ir_unop_d2b)
+ body.emit(assign(dst[i], logic_not(dst[i])));
}
 
ir_rvalue *rv;
@@ -487,6 +500,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_d2b:
+  if (lowering(EQ64)) {
+ if (ir->type->base_type == GLSL_TYPE_BOOL)
+*rvalue = handle_op(ir, "__builtin_feq64", generate_ir::feq64);
+  }
+  break;
+
case ir_unop_d2f:
   if (lowering(D2F)) {
  if (ir->type->base_type == GLSL_TYPE_FLOAT)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 46/47] glsl: Add a lowering pass for 64-bit float frac()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_instructions.cpp | 25 +
 1 file changed, 25 insertions(+)

diff --git a/src/compiler/glsl/lower_instructions.cpp 
b/src/compiler/glsl/lower_instructions.cpp
index 3ef13d1166..9bb9a32bc6 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -181,6 +181,7 @@ private:
void max_to_less(ir_expression *ir);
void dfloor_to_dtrunc(ir_expression *ir);
void dceil_to_dtrunc(ir_expression *ir);
+   void dfrac_to_dtrunc(ir_expression *ir);
 
ir_expression *_carry(operand a, operand b);
 };
@@ -1707,6 +1708,24 @@ 
lower_instructions_visitor::dceil_to_dtrunc(ir_expression *ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::dfrac_to_dtrunc(ir_expression *ir)
+{
+   ir_expression *const floor_expr =
+  new(ir) ir_expression(ir_unop_floor,
+ir->operands[0]->type, ir->operands[0]);
+   dfloor_to_dtrunc(floor_expr);
+   ir_expression *const neg_expr =
+  new(ir) ir_expression(ir_unop_neg,
+ir->operands[0]->type, floor_expr);
+
+   ir->operation = ir_binop_add;
+   ir->init_num_operands();
+   ir->operands[1] = neg_expr;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1872,6 +1891,12 @@ lower_instructions_visitor::visit_leave(ir_expression 
*ir)
  max_to_less(ir);
   break;
 
+   case ir_unop_fract:
+  if (lowering(DOPS_TO_DTRUNC) &&
+  ir->type->is_double() && ir->type->is_scalar())
+ dfrac_to_dtrunc(ir);
+  break;
+
default:
   return visit_continue;
}
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 40/47] glsl: Add a lowering pass for 64-bit float greater()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_64bit.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 07d39de79d..f9e565600b 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -418,7 +418,8 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
   body.emit(c);
 
   if (ir->operation == ir_unop_d2b ||
-  ir->operation == ir_binop_gequal)
+  ir->operation == ir_binop_gequal ||
+  ir->operation == ir_binop_greater)
  body.emit(assign(dst[i], logic_not(dst[i])));
}
 
@@ -618,6 +619,7 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_greater:
case ir_binop_lequal:
   if (lowering(LE64)) {
  if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 27/47] glsl: Add a lowering pass for 64-bit float d2u()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 81ffe67077..aa71dfdb39 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -68,6 +68,7 @@
 #define LE64  (1U << 7)
 #define LT64  (1U << 8)
 #define ADD64 (1U << 9)
+#define D2U   (1U << 10)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 9087cabda2..0b9d466b9d 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -412,6 +412,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_d2u:
+  if (lowering(D2U)) {
+ if (ir->type->base_type == GLSL_TYPE_UINT)
+*rvalue = handle_op(ir, "__builtin_fp64_to_uint", 
generate_ir::fp64_to_uint);
+  }
+  break;
+
case ir_unop_neg:
   if (lowering(NEG64)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 488d052336..de5a499f8d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7065,7 +7065,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
LE64 |
LT64 |
ADD64 |
-   MUL64;
+   MUL64 |
+   D2U;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 26/47] glsl: Add a lowering pass for 64-bit float mul()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_64bit.cpp  | 5 -
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 414e8414f9..9087cabda2 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -478,7 +478,10 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
 
case ir_binop_mul:
   if (lowering(MUL64)) {
- *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fmul64", generate_ir::fmul64);
+ else
+*rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
   }
   break;
 
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index fab331edea..488d052336 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7064,7 +7064,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
EQ64 |
LE64 |
LT64 |
-   ADD64;
+   ADD64 |
+   MUL64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 32/47] glsl: Add a lowering pass for 64-bit float f2d()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 2d5a210dca..dce0bf4a9f 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -73,6 +73,7 @@
 #define D2I   (1U << 12)
 #define I2D   (1U << 13)
 #define D2F   (1U << 14)
+#define F2D   (1U << 15)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 48c053b02b..054cdcb50a 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -457,6 +457,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_f2d:
+  if (lowering(F2D)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fp32_to_fp64", 
generate_ir::fp32_to_fp64, true);
+  }
+  break;
+
case ir_unop_i2d:
   if (lowering(I2D)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 53d85360b2..f26368812f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7070,7 +7070,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
U2D |
D2I |
I2D |
-   D2F;
+   D2F |
+   F2D;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 34/47] glsl: Add a lowering pass for 64-bit float trunc()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 7a5a30541f..3566673d30 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -75,6 +75,7 @@
 #define D2F   (1U << 14)
 #define F2D   (1U << 15)
 #define SQRT64(1U << 16)
+#define TRUNC64   (1U << 17)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 3681c4f0c5..0bbcfacf2a 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -494,6 +494,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_trunc:
+  if (lowering(TRUNC64)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_ftrunc64", 
generate_ir::ftrunc64);
+  }
+  break;
+
case ir_unop_u2d:
   if (lowering(U2D)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 474e9bcdea..f746c5531a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7072,7 +7072,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
I2D |
D2F |
F2D |
-   SQRT64;
+   SQRT64 |
+   TRUNC64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 45/47] glsl: Add a lowering pass for 64-bit float ceil()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_instructions.cpp | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/lower_instructions.cpp 
b/src/compiler/glsl/lower_instructions.cpp
index 50c35124fb..3ef13d1166 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -180,6 +180,7 @@ private:
void min_to_less(ir_expression *ir);
void max_to_less(ir_expression *ir);
void dfloor_to_dtrunc(ir_expression *ir);
+   void dceil_to_dtrunc(ir_expression *ir);
 
ir_expression *_carry(operand a, operand b);
 };
@@ -1685,6 +1686,27 @@ 
lower_instructions_visitor::dfloor_to_dtrunc(ir_expression *ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::dceil_to_dtrunc(ir_expression *ir)
+{
+   /* if x < 0,ceil(x) = trunc(x)
+* else if (x - trunc(x) == 0), ceil(x) = x
+* else,ceil(x) = trunc(x) + 1
+*/
+
+   ir_rvalue *src = ir->operands[0]->clone(ir, NULL);
+   ir_rvalue *tr = trunc(src);
+
+   ir->operation = ir_triop_csel;
+   ir->init_num_operands();
+   ir->operands[0] = logic_or(less(src, new(ir) ir_constant(0.0, 1)),
+  equal(src, tr));
+   ir->operands[1] = tr;
+   ir->operands[2] = add(tr, new(ir) ir_constant(1.0, 1));
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1768,8 +1790,13 @@ lower_instructions_visitor::visit_leave(ir_expression 
*ir)
   break;
 
case ir_unop_ceil:
-  if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dceil_to_dfrac(ir);
+  if (ir->type->is_double()) {
+ if (lowering(DOPS_TO_DFRAC)) {
+dceil_to_dfrac(ir);
+ } else if (lowering(DOPS_TO_DTRUNC) && ir->type->is_scalar()) {
+dceil_to_dtrunc(ir);
+ }
+  }
   break;
 
case ir_unop_floor:
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 43/47] glsl: Add a lowering pass for 64-bit float max()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_instructions.cpp | 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/compiler/glsl/lower_instructions.cpp 
b/src/compiler/glsl/lower_instructions.cpp
index 76dde68c23..c6c3a627d3 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -177,6 +177,7 @@ private:
void imul_high_to_mul(ir_expression *ir);
void sqrt_to_abs_sqrt(ir_expression *ir);
void min_to_less(ir_expression *ir);
+   void max_to_less(ir_expression *ir);
 
ir_expression *_carry(operand a, operand b);
 };
@@ -1645,6 +1646,20 @@ lower_instructions_visitor::min_to_less(ir_expression 
*ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::max_to_less(ir_expression *ir)
+{
+   ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL);
+   ir_rvalue *y_clone = ir->operands[1]->clone(ir, NULL);
+   ir->operation = ir_triop_csel;
+   ir->init_num_operands();
+   ir->operands[0] = less(ir->operands[0], ir->operands[1]);
+   ir->operands[1] = y_clone;
+   ir->operands[2] = x_clone;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1794,6 +1809,12 @@ lower_instructions_visitor::visit_leave(ir_expression 
*ir)
  min_to_less(ir);
   break;
 
+   case ir_binop_max:
+  if (lowering(MIN_MAX_TO_LESS) &&
+  ir->type->is_double() && ir->type->is_scalar())
+ max_to_less(ir);
+  break;
+
default:
   return visit_continue;
}
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 41/47] glsl: Add a lowering pass for 64-bit float nequal()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/lower_64bit.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index f9e565600b..441440d729 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -419,7 +419,8 @@ lower_64bit::lower_op_to_function_call(ir_instruction 
*base_ir,
 
   if (ir->operation == ir_unop_d2b ||
   ir->operation == ir_binop_gequal ||
-  ir->operation == ir_binop_greater)
+  ir->operation == ir_binop_greater ||
+  ir->operation == ir_binop_nequal)
  body.emit(assign(dst[i], logic_not(dst[i])));
}
 
@@ -612,6 +613,7 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_binop_nequal:
case ir_binop_equal:
   if (lowering(EQ64)) {
  if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 31/47] glsl: Add a lowering pass for 64-bit float d2f()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index c59d7f6ee2..2d5a210dca 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -72,6 +72,7 @@
 #define U2D   (1U << 11)
 #define D2I   (1U << 12)
 #define I2D   (1U << 13)
+#define D2F   (1U << 14)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 570f997779..48c053b02b 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -436,6 +436,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_d2f:
+  if (lowering(D2F)) {
+ if (ir->type->base_type == GLSL_TYPE_FLOAT)
+*rvalue = handle_op(ir, "__builtin_fp64_to_fp32", 
generate_ir::fp64_to_fp32);
+  }
+  break;
+
case ir_unop_d2i:
   if (lowering(D2I)) {
  if (ir->type->base_type == GLSL_TYPE_INT)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 1be223a3cb..53d85360b2 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7069,7 +7069,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
D2U |
U2D |
D2I |
-   I2D;
+   I2D |
+   D2F;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 36/47] glsl: Add a lowering pass for 64-bit float rcp()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 4a44ee7660..e18924d62b 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -77,6 +77,7 @@
 #define SQRT64(1U << 16)
 #define TRUNC64   (1U << 17)
 #define ROUND64   (1U << 18)
+#define RCP64 (1U << 19)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 894503a14f..f8fcf9fd0a 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -478,6 +478,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_rcp:
+  if (lowering(RCP64)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_frcp64", generate_ir::frcp64);
+  }
+  break;
+
case ir_unop_round_even:
   if (lowering(ROUND64)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 760ffd594e..dc88a881f6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7074,7 +7074,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
F2D |
SQRT64 |
TRUNC64 |
-   ROUND64;
+   ROUND64 |
+   RCP64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 47/47] glsl: Add a lowering pass for 64-bit float div()

2017-08-23 Thread Elie Tournier

This is probably not the correct way to lower the division.
The lowering should be enable only for r600g and not in the parser.

The division don't pass the piglit test due to the rcp implementation.

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/glsl_parser_extras.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index cfb214e440..ac50737986 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2169,6 +2169,7 @@ do_common_optimization(exec_list *ir, bool linked,
} while (false)
 
OPT(lower_instructions, ir, SUB_TO_ADD_NEG);
+   OPT(lower_instructions, ir, DDIV_TO_MUL_RCP);
 
if (linked) {
   OPT(do_function_inlining, ir);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 42/47] glsl: Add a lowering pass for 64-bit float min()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h|  1 +
 src/compiler/glsl/lower_instructions.cpp   | 28 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  1 +
 3 files changed, 30 insertions(+)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index e18924d62b..a2ebc16e93 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -54,6 +54,7 @@
 #define DDIV_TO_MUL_RCP   0x10
 #define DIV_TO_MUL_RCP(FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 #define SQRT_TO_ABS_SQRT  0x20
+#define MIN_MAX_TO_LESS   0x40
 
 /* Operations for lower_64bit_integer_instructions()
  * and lower_64bit_double_instructions()
diff --git a/src/compiler/glsl/lower_instructions.cpp 
b/src/compiler/glsl/lower_instructions.cpp
index 0c1408911d..76dde68c23 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -43,6 +43,7 @@
  * - BORROW_TO_ARITH
  * - SAT_TO_CLAMP
  * - DOPS_TO_DFRAC
+ * - MIN_MAX_TO_LESS
  *
  * SUB_TO_ADD_NEG:
  * ---
@@ -115,6 +116,12 @@
  * DOPS_TO_DFRAC:
  * --
  * Converts double trunc, ceil, floor, round to fract
+ *
+ * MIN_MAX_TO_LESS:
+ * 
+ * Converts min, max into less.
+ * min(x,y) = less(x,y) ? x, y;
+ * max(x,y) = less(x,y) ? y, x;
  */
 
 #include "c99_math.h"
@@ -169,6 +176,7 @@ private:
void find_msb_to_float_cast(ir_expression *ir);
void imul_high_to_mul(ir_expression *ir);
void sqrt_to_abs_sqrt(ir_expression *ir);
+   void min_to_less(ir_expression *ir);
 
ir_expression *_carry(operand a, operand b);
 };
@@ -1623,6 +1631,20 @@ 
lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::min_to_less(ir_expression *ir)
+{
+   ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL);
+   ir_rvalue *y_clone = ir->operands[1]->clone(ir, NULL);
+   ir->operation = ir_triop_csel;
+   ir->init_num_operands();
+   ir->operands[0] = less(ir->operands[0], ir->operands[1]);
+   ir->operands[1] = x_clone;
+   ir->operands[2] = y_clone;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1766,6 +1788,12 @@ lower_instructions_visitor::visit_leave(ir_expression 
*ir)
  sqrt_to_abs_sqrt(ir);
   break;
 
+   case ir_binop_min:
+  if (lowering(MIN_MAX_TO_LESS) &&
+  ir->type->is_double() && ir->type->is_scalar())
+ min_to_less(ir);
+  break;
+
default:
   return visit_continue;
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index dc88a881f6..ef3a15932a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7058,6 +7058,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
*/
   if (!pscreen->get_param(pscreen, PIPE_CAP_DOUBLES) &&
 ctx->Const.GLSLVersion >= 130) {
+ lower_instructions(ir, MIN_MAX_TO_LESS);
  unsigned lower_inst = ABS64 |
NEG64 |
SIGN64 |
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallium/docs: improve docs for SAMPLE_POS, SAMPLE_INFO, TXQS, MSAA semantics

2017-08-23 Thread Marek Olšák

On Wed, Aug 23, 2017 at 1:04 PM, Marek Olšák  wrote:
> On Wed, Aug 23, 2017 at 2:49 AM, Roland Scheidegger  
> wrote:
>> Am 23.08.2017 um 01:59 schrieb Marek Olšák:
>>> On Wed, Aug 23, 2017 at 12:30 AM, Roland Scheidegger  
>>> wrote:
 Am 22.08.2017 um 17:15 schrieb Marek Olšák:
> On Sun, Aug 20, 2017 at 12:32 AM, Roland Scheidegger  
> wrote:
>> Am 19.08.2017 um 21:32 schrieb Marek Olšák:
>>> How about we remove all opcodes that are unused? Like:
>>>
>>> SAMPLE_POS
>>> SAMPLE_INFO
>>> SAMPLE
>>> SAMPLE_I
>>> SAMPLE_I_MS
>>> SAMPLE_B
>>> SAMPLE_C
>>> SAMPLE_C_LZ
>>> SAMPLE_D
>>> SAMPLE_L
>>> GATHER4
>>> SVIEWINFO
>> These are all d3d10 opcodes, and we need them (llvmpipe supports all of
>> them with the exception of sample_pos and sample_info, right now). (It's
>
> SAMPLE_INFO is almost the same as TXQS and given the current state of
> driver support, it would be better to remove SAMPLE_INFO and keep
> TXQS.
>
> SAMPLE_INFO returns (samples, 0, 0, 0), while TXQS returns (samples,
> undef, undef, undef).
>
> There is also RESQ, which returns (w, h, d|layers, samples).
>

 They take different register types, however.
>>>
>>> Most instructions support multiple register types. MOV supports TEMP,
>>> CONST, IN, OUT. LOAD supports IMAGE, BUFFER, and in the future maybe
>>> also CONSTBUF and SAMP.
>>>
>> That's true, but there aren't really any opcodes which could take either
>> sampler view reg file or sampler. Albeit I suppose it would be doable.
>> Though it looks to me like you could easily ditch TXQS in favor of RESQ
>> too then...
>
> and TXF could be ditched in favor of LOAD.

I also forgot to say that TXF, TXQ, TEX and all tex opcodes, LOAD,
STORE, and ATOMIC* accept IN, CONST, TEMP, OUT (if reads are allowed)
as the resource, which means it's a bindless resource handle.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 44/47] glsl: Add a lowering pass for 64-bit float floor()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h|  1 +
 src/compiler/glsl/lower_instructions.cpp   | 34 --
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  2 +-
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index a2ebc16e93..f004380670 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -55,6 +55,7 @@
 #define DIV_TO_MUL_RCP(FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 #define SQRT_TO_ABS_SQRT  0x20
 #define MIN_MAX_TO_LESS   0x40
+#define DOPS_TO_DTRUNC0x80
 
 /* Operations for lower_64bit_integer_instructions()
  * and lower_64bit_double_instructions()
diff --git a/src/compiler/glsl/lower_instructions.cpp 
b/src/compiler/glsl/lower_instructions.cpp
index c6c3a627d3..50c35124fb 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -44,6 +44,7 @@
  * - SAT_TO_CLAMP
  * - DOPS_TO_DFRAC
  * - MIN_MAX_TO_LESS
+ * - DOPS_TO_DTRUNC
  *
  * SUB_TO_ADD_NEG:
  * ---
@@ -178,6 +179,7 @@ private:
void sqrt_to_abs_sqrt(ir_expression *ir);
void min_to_less(ir_expression *ir);
void max_to_less(ir_expression *ir);
+   void dfloor_to_dtrunc(ir_expression *ir);
 
ir_expression *_carry(operand a, operand b);
 };
@@ -1660,6 +1662,29 @@ lower_instructions_visitor::max_to_less(ir_expression 
*ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::dfloor_to_dtrunc(ir_expression *ir)
+{
+   /*
+* For x >= 0, floor(x) = trunc(x)
+* For x < 0,
+*- if x is integer, floor(x) = x
+*- otherwise, floor(x) = trunc(x) - 1
+*/
+
+   ir_rvalue *src = ir->operands[0]->clone(ir, NULL);
+   ir_rvalue *tr = trunc(src);
+
+   ir->operation = ir_triop_csel;
+   ir->init_num_operands();
+   ir->operands[0] = logic_or(gequal(src, new(ir) ir_constant(0.0, 1)),
+  equal(src, tr));
+   ir->operands[1] = tr;
+   ir->operands[2] = add(tr, new(ir) ir_constant(-1.0, 1));
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1748,8 +1773,13 @@ lower_instructions_visitor::visit_leave(ir_expression 
*ir)
   break;
 
case ir_unop_floor:
-  if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dfloor_to_dfrac(ir);
+  if (ir->type->is_double()) {
+ if (lowering(DOPS_TO_DFRAC)) {
+dfloor_to_dfrac(ir);
+ } else if (lowering(DOPS_TO_DTRUNC) && ir->type->is_scalar()) {
+dfloor_to_dtrunc(ir);
+ }
+  }
   break;
 
case ir_unop_round_even:
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ef3a15932a..4fbf2c0a99 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7058,7 +7058,7 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
*/
   if (!pscreen->get_param(pscreen, PIPE_CAP_DOUBLES) &&
 ctx->Const.GLSLVersion >= 130) {
- lower_instructions(ir, MIN_MAX_TO_LESS);
+ lower_instructions(ir, MIN_MAX_TO_LESS | DOPS_TO_DTRUNC);
  unsigned lower_inst = ABS64 |
NEG64 |
SIGN64 |
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 35/47] glsl: Add a lowering pass for 64-bit float round()

2017-08-23 Thread Elie Tournier

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/ir_optimization.h| 1 +
 src/compiler/glsl/lower_64bit.cpp  | 7 +++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 3566673d30..4a44ee7660 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -76,6 +76,7 @@
 #define F2D   (1U << 15)
 #define SQRT64(1U << 16)
 #define TRUNC64   (1U << 17)
+#define ROUND64   (1U << 18)
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_64bit.cpp 
b/src/compiler/glsl/lower_64bit.cpp
index 0bbcfacf2a..894503a14f 100644
--- a/src/compiler/glsl/lower_64bit.cpp
+++ b/src/compiler/glsl/lower_64bit.cpp
@@ -478,6 +478,13 @@ lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
   }
   break;
 
+   case ir_unop_round_even:
+  if (lowering(ROUND64)) {
+ if (ir->type->base_type == GLSL_TYPE_DOUBLE)
+*rvalue = handle_op(ir, "__builtin_fround64", 
generate_ir::fround64);
+  }
+  break;
+
case ir_unop_sign:
   if (lowering(SIGN64)) {
  if (ir->type->base_type == GLSL_TYPE_DOUBLE)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f746c5531a..760ffd594e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7073,7 +7073,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
D2F |
F2D |
SQRT64 |
-   TRUNC64;
+   TRUNC64 |
+   ROUND64;
  lower_64bit_double_instructions(ir, lower_inst);
   }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vbo: fix glVertexAttrib(index=0)

2017-08-23 Thread Marek Olšák

Reviewed-by: Marek Olšák 

Marek

On Tue, Aug 22, 2017 at 10:21 PM, Brian Paul  wrote:
> Depending on which extension or GL spec you read the behavior of
> glVertexAttrib(index=0) either sets the current value for generic
> attribute 0, or it emits a vertex just like glVertex().  I believe
> it should do either, depending on context (see below).
>
> The piglit gl-2.0-vertex-const-attr test declares two vertex attributes:
>   attribute vec2 vertex;
>   attribute vec4 attr;
> and the GLSL linker assigns "vertex" to location 0 and "attr" to location 1.
> The test passes.
>
> But if the declarations were reversed such that "attr" was location 0 and
> "vertex" was location 1, the test would fail to draw properly.
>
> The problem is the call to glVertexAttrib(index=0) to set attr's value
> was interpreted as glVertex() and did not set generic attribute[0]'s value.
> Interesting, calling glVertex() outside glBegin/End (which is effectively
> what the piglit test does) does not generate a GL error.
>
> I believe the behavior of glVertexAttrib(index=0) should depend on
> whether it's called inside or outside of glBegin/glEnd().  If inside
> glBegin/End(), it should act like glVertex().  Else, it should behave
> like glVertexAttrib(index > 0).  This seems to be what NVIDIA does.
>
> This patch makes two changes:
>
> 1. Check if we're inside glBegin/End for glVertexAttrib()
> 2. Fix the vertex array binding for recalculate_input_bindings().  As it was,
>we were using &vbo->currval[VBO_ATTRIB_POS], but that's interpreted
>as a zero-stride attribute and doesn't make sense for array drawing.
>
> No Piglit regressions.  Fixes updated gl-2.0-vertex-const-attr test and
> passes new gl-2.0-vertex-attrib-0 test.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101941
> ---
>  src/mesa/vbo/vbo_attrib_tmp.h | 7 +--
>  src/mesa/vbo/vbo_exec_array.c | 2 +-
>  2 files changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h
> index 5718ac5..126e4ef 100644
> --- a/src/mesa/vbo/vbo_attrib_tmp.h
> +++ b/src/mesa/vbo/vbo_attrib_tmp.h
> @@ -524,15 +524,18 @@ TAG(MultiTexCoord4fv)(GLenum target, const GLfloat * v)
>
>  /**
>   * If index=0, does glVertexAttrib*() alias glVertex() to emit a vertex?
> + * It depends on a few things, including whether we're inside or outside
> + * of glBegin/glEnd.
>   */
>  static inline bool
>  is_vertex_position(const struct gl_context *ctx, GLuint index)
>  {
> -   return index == 0 && _mesa_attr_zero_aliases_vertex(ctx);
> +   return (index == 0 &&
> +   _mesa_attr_zero_aliases_vertex(ctx) &&
> +   _mesa_inside_begin_end(ctx));
>  }
>
>
> -
>  static void GLAPIENTRY
>  TAG(VertexAttrib1fARB)(GLuint index, GLfloat x)
>  {
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index edd55ce..e3421fa 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -356,7 +356,7 @@ recalculate_input_bindings(struct gl_context *ctx)
>   else if (array[VERT_ATTRIB_POS].Enabled)
>  inputs[0] = &vertexAttrib[VERT_ATTRIB_POS];
>   else {
> -inputs[0] = &vbo->currval[VBO_ATTRIB_POS];
> +inputs[0] = &vbo->currval[VBO_ATTRIB_GENERIC0];
>  const_inputs |= VERT_BIT_POS;
>   }
>
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/5] util/disk_cache: rename mesa cache dir and introduce cache versioning

2017-08-23 Thread Vedran Miletić

On 08/23/2017 08:32 AM, Timothy Arceri wrote:
> Steam is already analysing cache items, unfortunatly we did not
> introduce a versioning mechanism for identifying structural changes
> to cache entries earlier so the only way to do so is to rename the
> cache directory.
> 
> Since we are renaming it we take the opportunity to give the directory
> a more meaningful name.
> 
> Adding a version field to the header of cache entries will help us to
> avoid having to rename the directory in future. Please note this is
> versioning for the internal structure of the entries as defined in
> disk_cache.{c,h} as opposed to the structure of the data provided to
> the disk cache by the GLSL compiler and the various driver backends.
> ---
>  src/compiler/glsl/tests/cache_test.c |  6 +++--
>  src/util/disk_cache.c| 46 
> ++--
>  src/util/disk_cache.h|  2 ++
>  3 files changed, 40 insertions(+), 14 deletions(-)
> 
> diff --git a/src/compiler/glsl/tests/cache_test.c 
> b/src/compiler/glsl/tests/cache_test.c
> index af1b66fb3d..3796ce6170 100644
> --- a/src/compiler/glsl/tests/cache_test.c
> +++ b/src/compiler/glsl/tests/cache_test.c
> @@ -178,38 +178,40 @@ test_disk_cache_create(void)
> /* Test with XDG_CACHE_HOME set */
> setenv("XDG_CACHE_HOME", CACHE_TEST_TMP "/xdg-cache-home", 1);
> cache = disk_cache_create("test", "make_check", 0);
> expect_null(cache, "disk_cache_create with XDG_CACHE_HOME set with"
> "a non-existing parent directory");
>  
> mkdir(CACHE_TEST_TMP, 0755);
> cache = disk_cache_create("test", "make_check", 0);
> expect_non_null(cache, "disk_cache_create with XDG_CACHE_HOME set");
>  
> -   check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/mesa");
> +   check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/"
> + CACHE_DIR_NAME);
>  
> disk_cache_destroy(cache);
>  
> /* Test with MESA_GLSL_CACHE_DIR set */
> err = rmrf_local(CACHE_TEST_TMP);
> expect_equal(err, 0, "Removing " CACHE_TEST_TMP);
>  
> setenv("MESA_GLSL_CACHE_DIR", CACHE_TEST_TMP "/mesa-glsl-cache-dir", 1);
> cache = disk_cache_create("test", "make_check", 0);
> expect_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set with"
> "a non-existing parent directory");
>  
> mkdir(CACHE_TEST_TMP, 0755);
> cache = disk_cache_create("test", "make_check", 0);
> expect_non_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set");
>  
> -   check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/mesa");
> +   check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/"
> + CACHE_DIR_NAME);
>  
> disk_cache_destroy(cache);
>  }
>  
>  static bool
>  does_cache_contain(struct disk_cache *cache, const cache_key key)
>  {
> void *result;
>  
> result = disk_cache_get(cache, key, NULL);
> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
> index b2229874e0..644a911e53 100644
> --- a/src/util/disk_cache.c
> +++ b/src/util/disk_cache.c
> @@ -51,20 +51,34 @@
>  
>  /* Number of bits to mask off from a cache key to get an index. */
>  #define CACHE_INDEX_KEY_BITS 16
>  
>  /* Mask for computing an index from a key. */
>  #define CACHE_INDEX_KEY_MASK ((1 << CACHE_INDEX_KEY_BITS) - 1)
>  
>  /* The number of keys that can be stored in the index. */
>  #define CACHE_INDEX_MAX_KEYS (1 << CACHE_INDEX_KEY_BITS)
>  
> +/* The cache version should be bumped whenever a change is made to the
> + * structure of cache entries or the index. This will give any 3rd party
> + * applications reading the cache entries a chance to adjust to the changes.
> + *
> + * - The cache version is checked internally when reading a cache entry. If 
> we
> + *   ever have a mismatch we are in big trouble as this means we had a cache
> + *   collision. In case of such an event please check the skys for giant
> + *   asteroids and that the entire Mesa team hasn't been eaten by wolves.
> + *
> + * - There is no strict requirement that cache versions be backwards
> + *   compatible but effort should be taken to limit disruption where 
> possible.
> + */
> +#define CACHE_VERSION 1
> +
>  struct disk_cache {
> /* The path to the cache directory. */
> char *path;
>  
> /* Thread queue for compressing and writing cache entries to disk */
> struct util_queue cache_queue;
>  
> /* Seed for rand, which is used to pick a random directory */
> uint64_t seed_xorshift128plus[2];
>  
> @@ -153,20 +167,25 @@ concatenate_and_mkdir(void *ctx, const char *path, 
> const char *name)
>return NULL;
>  
> new_path = ralloc_asprintf(ctx, "%s/%s", path, name);
>  
> if (mkdir_if_needed(new_path) == 0)
>return new_path;
> else
>return NULL;
>  }
>  
> +#define DRV_KEY_CPY(_dst, _src, _src_size) { \
> +   memcpy(_dst, _src, _src_size);\
> +   _dst += _src_size;

[Mesa-dev] [Bug 100199] [performance] clover: implement non-blocking clEnqueueWriteBuffer and clEnqueueReadBuffer

2017-08-23 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=100199

Vedran Miletić  changed:

   What|Removed |Added

Summary|clover: implement   |[performance] clover:
   |non-blocking|implement non-blocking
   |clEnqueueWriteBuffer and|clEnqueueWriteBuffer and
   |clEnqueueReadBuffer |clEnqueueReadBuffer
 Blocks||99553


Referenced Bugs:

https://bugs.freedesktop.org/show_bug.cgi?id=99553
[Bug 99553] Tracker bug for runnning OpenCL applications on Clover
-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH V2 1/2] mesa: Fix backward compatibility for XML parser

2017-08-23 Thread Nicolai Hähnle


On 19.08.2017 10:01, QuRyu wrote:

From: Quentin Liu 

If the type of drirc options are changed, the parser will not be able to
  recognize xml files that had been present before the change. To achieve
backward compatibility, the parser is relaxed to recognize boolean type
options with enum values.
---
  src/util/xmlconfig.c | 17 +++--
  1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/util/xmlconfig.c b/src/util/xmlconfig.c
index d3f47ec..d81a07b 100644
--- a/src/util/xmlconfig.c
+++ b/src/util/xmlconfig.c
@@ -317,8 +317,21 @@ parseValue(driOptionValue *v, driOptionType type, const 
XML_Char *string)
  v->_bool = true;
  tail = string + 4;
  }
-else
-return false;
+else {
+/* Some drirc options, such as pp_celshalde, were formerly enum
+ * values. Now that they have been turned into boolean values,
+ * to achieve backward compatibility relax the check here a
+ * little bit */
+int value = strToI(str...ing, &tail, 0);
+if (value == 1)
+v->_bool = true;
+else if (value == 0)
+v->_bool = false;
+else
+return false; /* wrong value here */
+}


Erm... this doesn't even compile.

Please make sure to always do at least this minimal amount of 
verification on patches you send out.


Another thing I noticed is that your patch has trailing whitespace. 
Please remove that (you should be able to configure your editor to do it 
automatically). Actually, Git should warn you about this.


Anyway, apart from these two issues that are really simple to fix, your 
changes look good to me, thanks!


Cheers,
Nicolai

P.S.: It is customary to add a line like

v2: brief explanation of changes

when sending out updated patches. Not really a big deal in this case, 
just thought you should know for the future :)




+   }
+
  break;
case DRI_ENUM: /* enum is just a special integer */
case DRI_INT:




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium/docs: Add missing word "Not"

2017-08-23 Thread Nicolai Hähnle


On 22.08.2017 17:29, Gwan-gyeong Mun wrote:

Signed-off-by: Mun Gwan-gyeong 


R-b and pushed, thanks.



---
  src/gallium/docs/source/tgsi.rst | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index b148c3c939..f9b1385e55 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1762,7 +1762,7 @@ two-component vectors with doubled precision in each 
component.
  
dst.z = src0.zw == src1.zw ? \sim 0 : 0
  
-.. opcode:: DSNE - Set on Equal

+.. opcode:: DSNE - Set on Not Equal
  
  .. math::
  




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium/docs: Fix the math formula of U2I64

2017-08-23 Thread Nicolai Hähnle


On 22.08.2017 17:57, Gwan-gyeong Mun wrote:

before:
   dst.xy = (uint64_t) src0.x
   dst.zw = (uint64_t) src0.y

after:
   dst.xy = (int64_t) src0.x
   dst.zw = (int64_t) src0.y

Signed-off-by: Mun Gwan-gyeong 


R-b and pushed, thanks.



---
  src/gallium/docs/source/tgsi.rst | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index f9b1385e55..31331ef511 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2199,9 +2199,9 @@ two-component vectors with 64-bits in each component.
  
  .. math::
  
-   dst.xy = (uint64_t) src0.x

+   dst.xy = (int64_t) src0.x
  
-   dst.zw = (uint64_t) src0.y

+   dst.zw = (int64_t) src0.y
  
  .. opcode:: I2I64 - Signed Integer to 64-bit Integer
  




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/47] WIP: fp64 support for r600g

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 13:07, Elie Tournier wrote:

From: Elie Tournier 

TL;DR
This series is a "status update" of my work done for adding fp64 support on 
r600g.
One of the biggest issue is due to a lake of accuracy on the rcp implementation.
Divide relay on rcp.

A branch is available on 
https://github.com/Hopetech/mesa/tree/glsl_arb_gpu_shader_fp64_v3
Comments and reviews are welcome.

Patches 1-18:
These few patches implement the basic fp64 operations.

Patches 19-47:
Lower operations using the builtin functions previously implemented.

Known issues:
- operations on matrix crash the system.
- sqrt and d2f are not accurate enought so the piglit tests are failling.
   But sqrt and d2f are working correctly using softpipe.
   However, implementing sqrt64 as f2d(sqrt32(d2f()) seems to be good enought 
for Piglit.
- rcp is define as pow(pow(x, -0.5), 2)
   NIR and NV convert the input in a fp32, realize a rcp, convert back to a 
fp64 and realize some Newton-Raphson step.
   This is not possible with GLSL IR because using fma will generate a massive 
builtin_float64.h file.


I don't understand this part. You need multiplication and addition 
anyway. So if it's only fma which is the problem (why?), then why not 
just use non-fused multiply-add? It may end up being slightly less 
accurate, but we don't give any strong guarantees about rcp accuracy 
anyway, do we?


Cheers,
Nicolai



- dot is failing
- piglit report crashes on mod. This is a regression, I'm currently working on 
it.

Piglit result:
Pass: 917 Fail: 211 Crash: 241

Dave Airlie (2):
   glsl/lower_64bit: handle any/all operations
   glsl/lower_64bit: lower d2b using comparison

Elie Tournier (45):
   glsl: Add "built-in" function to do abs(fp64)
   glsl: Add "built-in" functions to do neg(fp64)
   glsl: Add "built-in" function to do sign(fp64)
   glsl: Add "built-in" functions to do eq(fp64, fp64)
   glsl: Add "built-in" functions to do le(fp64, fp64)
   glsl: Add "built-in" functions to do lt(fp64, fp64)
   glsl: Add "built-in" functions to do add(fp64, fp64)
   glsl: Add "built-in" functions to do mul(fp64, fp64)
   glsl: Add "built-in" functions to do fp64_to_uint(fp64)
   glsl: Add "built-in" functions to do uint_to_fp64(uint)
   glsl: Add "built-in" functions to do fp64_to_int(fp64)
   glsl: Add "built-in" functions to do int_to_fp64(int)
   glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
   glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
   glsl: Add "built-in" functions to do sqrt(fp64)
   glsl: Add "built-in" functions to do trunc(fp64)
   glsl: Add "built-in" functions to do round(fp64)
   glsl: Add "built-in" functions to do rcp(fp64)
   glsl: Add a lowering pass for 64-bit float abs()
   glsl: Add a lowering pass for 64-bit float neg()
   glsl: Add a lowering pass for 64-bit float sign()
   glsl: Add a lowering pass for 64-bit float equal()
   glsl: Add a lowering pass for 64-bit float lequal()
   glsl: Add a lowering pass for 64-bit float less()
   glsl: Add a lowering pass for 64-bit float add()
   glsl: Add a lowering pass for 64-bit float mul()
   glsl: Add a lowering pass for 64-bit float d2u()
   glsl: Add a lowering pass for 64-bit float u2d()
   glsl: Add a lowering pass for 64-bit float d2i()
   glsl: Add a lowering pass for 64-bit float i2d()
   glsl: Add a lowering pass for 64-bit float d2f()
   glsl: Add a lowering pass for 64-bit float f2d()
   glsl: Add a lowering pass for 64-bit float sqrt()
   glsl: Add a lowering pass for 64-bit float trunc()
   glsl: Add a lowering pass for 64-bit float round()
   glsl: Add a lowering pass for 64-bit float rcp()
   glsl: Add a lowering pass for 64-bit float gequal()
   glsl: Add a lowering pass for 64-bit float greater()
   glsl: Add a lowering pass for 64-bit float nequal()
   glsl: Add a lowering pass for 64-bit float min()
   glsl: Add a lowering pass for 64-bit float max()
   glsl: Add a lowering pass for 64-bit float floor()
   glsl: Add a lowering pass for 64-bit float ceil()
   glsl: Add a lowering pass for 64-bit float frac()
   glsl: Add a lowering pass for 64-bit float div()

  src/compiler/Makefile.sources  | 3 +-
  src/compiler/glsl/builtin_float64.h| 20310 +++
  src/compiler/glsl/builtin_functions.cpp|72 +
  src/compiler/glsl/builtin_functions.h  |54 +
  src/compiler/glsl/float64.glsl |  1494 ++
  src/compiler/glsl/generate_ir.cpp  | 1 +
  src/compiler/glsl/glcpp/glcpp-parse.y  |18 +
  src/compiler/glsl/glsl_parser_extras.cpp   | 1 +
  src/compiler/glsl/ir_optimization.h|25 +-
  .../glsl/{lower_int64.cpp => lower_64bit.cpp}  |   325 +-
  src/compiler/glsl/lower_instructions.cpp   |   139 +-
  src/mesa/state_tracker/st_extensions.c | 3 +-
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |27 +
  13 files changed, 22442 insertions(+), 30 deletions(-)
  crea

Re: [Mesa-dev] [PATCH 10/45] spirv: Enable FPRoundingMode decorator to nir operations

2017-08-23 Thread Chema Casanova

El 17/08/17 a las 21:16, Jason Ekstrand escribió:
> On Thu, Jul 13, 2017 at 7:35 AM, Alejandro Piñeiro
> mailto:apinhe...@igalia.com>> wrote:
>
> From: Jose Maria Casanova Crespo  >
>
> SpvOpFConvert now manages the FPRoundingMode decorator for the
> returning values enabling the nir_rounding_mode in the conversion
> operation to fp16 values.
> ---
>  src/compiler/spirv/vtn_alu.c | 33 -
>  1 file changed, 32 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/spirv/vtn_alu.c
> b/src/compiler/spirv/vtn_alu.c
> index 7ec30b8..5a6cc1b 100644
> --- a/src/compiler/spirv/vtn_alu.c
> +++ b/src/compiler/spirv/vtn_alu.c
> @@ -354,7 +354,6 @@ vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode,
> bool *swap,
> case SpvOpConvertSToF:
> case SpvOpConvertUToF:
> case SpvOpSConvert:
> -   case SpvOpFConvert:
>
>
> Getting rid of this will break specialization constants.  Also, can
> you apply a rounding mode to an OpSpecConstantOp?  I'm guessing the
> spec doesn't say...

Good catch. This line removal wasn't needed at all, and we can just use
the nir_rounding_mode_undef for SpvOpFConvert. As you said the
VK_KHR_16bit_storage extension doesn't have impact on specialization
constants because it doesn't apply to conversions of constants that
could be specialized. It just applies to data to be load or stored using
push constants, uniforms, ssbo and input/outputs.

Locally fixed.

>  
>
>return nir_type_conversion_op(src, dst,
> nir_rounding_mode_undef);
>
> /* Derivatives: */
> @@ -381,6 +380,26 @@ handle_no_contraction(struct vtn_builder *b,
> struct vtn_value *val, int member,
> b->nb.exact = true;
>  }
>
> +static void
> +handle_rounding_mode(struct vtn_builder *b, struct vtn_value
> *val, int member,
> + const struct vtn_decoration *dec, void
> *_out_rounding_mode)
> +{
> +assert(dec->scope == VTN_DEC_DECORATION);
> +if (dec->decoration != SpvDecorationFPRoundingMode)
> +   return;
> +switch (dec->literals[0]) {
> +case SpvFPRoundingModeRTE:
> +   *((nir_rounding_mode *) _out_rounding_mode) =
> nir_rounding_mode_rtne;
> +   break;
> +case SpvFPRoundingModeRTZ:
> +   *((nir_rounding_mode *) _out_rounding_mode) =
> nir_rounding_mode_rtz;
> +   break;
> +default:
> +   unreachable("Not supported rounding mode");
> +   break;
> +}
> +}
> +
>  void
>  vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
> const uint32_t *w, unsigned count)
> @@ -568,6 +587,18 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp
> opcode,
>vtn_handle_bitcast(b, val->ssa, src[0]);
>break;
>
> +   case SpvOpFConvert: {
> +  nir_alu_type src_alu_type =
> nir_get_nir_type_for_glsl_type(vtn_src[0]->type);
> +  nir_alu_type dst_alu_type =
> nir_get_nir_type_for_glsl_type(type);
> +  nir_rounding_mode rounding_mode = nir_rounding_mode_undef;
> +
> +  vtn_foreach_decoration(b, val, handle_rounding_mode,
> &rounding_mode);
> +  nir_op op = nir_type_conversion_op(src_alu_type,
> dst_alu_type, rounding_mode);
> +
> +  val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1],
> NULL, NULL);
> +  break;
> +   }
> +
> default: {
>bool swap;
>nir_alu_type src_alu_type =
> nir_get_nir_type_for_glsl_type(vtn_src[0]->type);
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org 
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
>
>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Nicolai Hähnle


On 22.08.2017 22:39, Roland Scheidegger wrote:

Am 22.08.2017 um 19:10 schrieb Marek Olšák:

Hi,

I'd like to discuss 16-bit float and integer support in TGSI. I'm
proposing this:

  struct tgsi_instruction
  {
 unsigned Type   : 4;  /* TGSI_TOKEN_TYPE_INSTRUCTION */
 unsigned NrTokens   : 8;  /* UINT */
 unsigned Opcode : 8;  /* TGSI_OPCODE_ */
 unsigned Saturate   : 1;  /* BOOL */
 unsigned NumDstRegs : 2;  /* UINT */
 unsigned NumSrcRegs : 4;  /* UINT */
 unsigned Label  : 1;
 unsigned Texture: 1;
 unsigned Memory : 1;
 unsigned Precise: 1;
-   unsigned Padding: 1;
+   unsigned HalfPrecision : 1;
  };

There won't be any 16-bit TEMPs in TGSI, but each instruction will
have the HalfPrecision flag, which is a hint for drivers that they can
use a 16-bit opcode. Even texture, load, and store instructions can
set HalfPrecision, which means they can accept and return 16-bit
values.

The catch is that drivers will have to insert 16-bit <-> 32-bit
conversions manually, because they won't be present in TGSI. The
advantage is that we don't have to add 200 new opcodes for the 3 new
16-bit types.

What do you think?



Flagging instructions as 16bit doesn't look too bad to me, but I'm
wondering if this isn't a bit problematic wrt register files. Clearly,
this is a restriction of tgsi "everything is a 32x4 value". Doubles, of
course, have a similar problem, but in the end they still have
well-defined interactions with the register files, because it's defined
what bits ultimately represent a 64bit value (at least in theory from
tgsi's point of view, it is perfectly valid to use some 32bit
calculations to set some reg, then just use double instructions directly
without conversion on these values - it may not be meaningful but it is
well defined).
But it looks like you want to avoid to have a well-defined mapping of
the registers to 16bit types (and with 16 bits instruction just being
hints, I can't see how it could exist).
Note that being able to flag instructions as HalfPrecision does not
necessarily mean you can't have any explicit 16bit conversion
instructions too.


Those already exist: PK2H and UP2H. Or did you have something else in mind?

More generally, there are really two use cases for this, and we need to 
be careful not to mix them up:


- transparent downgrading to 16-bit of lowp and mediump
- support for extensions that explicitly introduce 16-bit types

For lowp and mediump, the approach of just having a HalfPrecision bit on 
the instructions is probably fine.


The second case is different. I don't think there are ARB extensions for 
that yet, but there are AMD_gpu_shader_{int16,half_float} with 
explicitly 16-bit types. (There's also NV_half_float, but that's from 
earlier days without GLSL.) For those, we'd really need to provide 
exactly the required operation. No special handling of TGSI temporaries 
is needed: an f16vec4 is represented as a normal 4-component vector in 
TGSI, just that the upper 16 bits of each component are ignored.


Here's another question: What does "low precision" mean on a texture 
instruction? Are the offsets low precision or is it the output? Maybe we 
can punt on this for now -- at least GCN doesn't have low precision 
there anyway.


To sum it up:
- I think there have to be separate flags for "this is a true 16-bit 
instruction" and for "optional low precision" -- in the latter, the 
driver is responsible for on-the-fly conversion between half and full types
- Apart from potential future issues with texture instructions, I think 
the flags on instructions are fine. So the plan is fine for GLES 
lowp/mediump.


Also, we're running out of bits here, but some of those bits can be 
moved into a separate instruction flags word when the time comes.


Cheers,
Nicolai




Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Nicolai Hähnle


On 22.08.2017 19:32, Marek Olšák wrote:

On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin  wrote:

How do you propose defining the semantics for e.g. loading a 16-bit
value from a constbuf/ssbo? Would those get separate instructions?


st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and
drivers can replace them with MOV if HalfPrecision == 1.


You mean, if HalfPrecision == 1 for subsequent operations?

How *do* we implement this for LLVM, anyway? Downcast (fptrunc) from 
float to half whenever we're loading operands of a HalfPrecision == 1 
instruction, and then casting (fpext) back up before storing the result?


LLVM instcombine seems quite capable of seeing through that in simple 
code, but I worry about control flow.


Cheers,
Nicolai



Marek



On Tue, Aug 22, 2017 at 1:10 PM, Marek Olšák  wrote:

Hi,

I'd like to discuss 16-bit float and integer support in TGSI. I'm
proposing this:

  struct tgsi_instruction
  {
 unsigned Type   : 4;  /* TGSI_TOKEN_TYPE_INSTRUCTION */
 unsigned NrTokens   : 8;  /* UINT */
 unsigned Opcode : 8;  /* TGSI_OPCODE_ */
 unsigned Saturate   : 1;  /* BOOL */
 unsigned NumDstRegs : 2;  /* UINT */
 unsigned NumSrcRegs : 4;  /* UINT */
 unsigned Label  : 1;
 unsigned Texture: 1;
 unsigned Memory : 1;
 unsigned Precise: 1;
-   unsigned Padding: 1;
+   unsigned HalfPrecision : 1;
  };

There won't be any 16-bit TEMPs in TGSI, but each instruction will
have the HalfPrecision flag, which is a hint for drivers that they can
use a 16-bit opcode. Even texture, load, and store instructions can
set HalfPrecision, which means they can accept and return 16-bit
values.

The catch is that drivers will have to insert 16-bit <-> 32-bit
conversions manually, because they won't be present in TGSI. The
advantage is that we don't have to add 200 new opcodes for the 3 new
16-bit types.

What do you think?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type

2017-08-23 Thread Nicolai Hähnle


On 22.08.2017 16:56, Ilia Mirkin wrote:

On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger  wrote:

I am probably missing something here, but why do you need a new register
file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't
you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing?
Or do you need to know how it's going to be accessed in advance?


With bindless, LOAD can take a CONST I believe [which contains the
value of the bindless id]. I think it's nice to keep those concepts
separate... having CONST sometimes mean the value and other times mean
the address is a bit weird. This way CONSTBUF[0] is the address of the
0th constbuf.


I'm still not quite convinced. The levels of indirection should clarify 
the meaning, shouldn't they?


You get

  LOAD dst, CONST[0][0], IMM[0]

when loading from offset IMM[0] of a bindless buffer whose handle is at 
the beginning of the buffer CONST[0].


You get

  LOAD dst, CONST[0], IMM[0]

when loading from offset IMM[0] of non-bindless buffer 0.

Is there ever really a situation where the two could be confused?

Cheers,
Nicolai




   -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/47] WIP: fp64 support for r600g

2017-08-23 Thread Emil Velikov

On 23 August 2017 at 13:23, Nicolai Hähnle  wrote:
> On 23.08.2017 13:07, Elie Tournier wrote:
>>
>> From: Elie Tournier 
>>
>> TL;DR
>> This series is a "status update" of my work done for adding fp64 support
>> on r600g.
>> One of the biggest issue is due to a lake of accuracy on the rcp
>> implementation.
>> Divide relay on rcp.
>>
>> A branch is available on
>> https://github.com/Hopetech/mesa/tree/glsl_arb_gpu_shader_fp64_v3
>> Comments and reviews are welcome.
>>
>> Patches 1-18:
>> These few patches implement the basic fp64 operations.
>>
>> Patches 19-47:
>> Lower operations using the builtin functions previously implemented.
>>
>> Known issues:
>> - operations on matrix crash the system.
>> - sqrt and d2f are not accurate enought so the piglit tests are failling.
>>But sqrt and d2f are working correctly using softpipe.
>>However, implementing sqrt64 as f2d(sqrt32(d2f()) seems to be good
>> enought for Piglit.
>> - rcp is define as pow(pow(x, -0.5), 2)
>>NIR and NV convert the input in a fp32, realize a rcp, convert back to
>> a fp64 and realize some Newton-Raphson step.
>>This is not possible with GLSL IR because using fma will generate a
>> massive builtin_float64.h file.
>
>
> I don't understand this part. You need multiplication and addition anyway.
> So if it's only fma which is the problem (why?), then why not just use
> non-fused multiply-add? It may end up being slightly less accurate, but we
> don't give any strong guarantees about rcp accuracy anyway, do we?
>
Pardon for dropping it like that. I'll try to explain things in a
slightly different way.

Due to the fp64 <> fp32 conversion the accuracy of RCP is pretty bad.

Thus a couple of Newton-Ralphson steps are used. Each one implemented via fma.
There's no native fma thus we use normal multiply and add.

As those get added to the generated file of built-ins
(builtin_float64.h), it grows by ~20k LoC making compilation/linking
quite slow.
Noticeably bloating the final binary size as well (Elie has some crazy
numbers from the very first experiments).

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 18/47] glsl: Add "built-in" functions to do rcp(fp64)

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 13:07, Elie Tournier wrote:

This algorithm is not accurate.

Signed-off-by: Elie Tournier 
---
  src/compiler/glsl/builtin_float64.h | 1829 +++
  src/compiler/glsl/builtin_functions.cpp |4 +
  src/compiler/glsl/builtin_functions.h   |3 +
  src/compiler/glsl/float64.glsl  |   10 +
  src/compiler/glsl/glcpp/glcpp-parse.y   |1 +
  5 files changed, 1847 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 97d6adfd9f..e6b654cb0d 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -18479,3 +18479,1832 @@ fround64(void *mem_ctx, builtin_available_predicate 
avail)
 sig->replace_parameters(&sig_parameters);
 return sig;
  }
+ir_function_signature *
+frcp64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0F45 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0F45);
+   ir_variable *const r0F46 = body.make_temp(glsl_type::uint_type, "z1Ptr");
+   body.emit(assign(r0F46, sub(body.constant(2406117202u), swizzle_x(r0F45)), 
0x01));
+
+   ir_expression *const r0F47 = sub(body.constant(3217938081u), 
swizzle_y(r0F45));
+   ir_expression *const r0F48 = less(body.constant(2406117202u), 
swizzle_x(r0F45));
+   ir_expression *const r0F49 = expr(ir_unop_b2i, r0F48);
+   ir_expression *const r0F4A = expr(ir_unop_i2u, r0F49);
+   body.emit(assign(r0F45, sub(r0F47, r0F4A), 0x02));
+
+   body.emit(assign(r0F45, r0F46, 0x01));
+
+   ir_variable *const r0F4B = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z1", ir_var_auto);
+   body.emit(r0F4B);
+   ir_variable *const r0F4C = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z0", ir_var_auto);
+   body.emit(r0F4C);
+   ir_expression *const r0F4D = lshift(swizzle_y(r0F45), 
body.constant(int(31)));
+   ir_expression *const r0F4E = rshift(r0F46, body.constant(int(1)));
+   body.emit(assign(r0F4B, bit_or(r0F4D, r0F4E), 0x01));
+
+   body.emit(assign(r0F4C, rshift(swizzle_y(r0F45), body.constant(int(1))), 
0x01));
+
+   body.emit(assign(r0F45, r0F4C, 0x02));
+
+   body.emit(assign(r0F45, r0F4B, 0x01));
+
+   ir_variable *const r0F4F = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r0F4F, body.constant(true), 0x01));
+
+   ir_variable *const r0F50 = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0F51 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_auto);
+   body.emit(r0F51);
+   ir_variable *const r0F52 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"bExp", ir_var_auto);
+   body.emit(r0F52);
+   ir_variable *const r0F53 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"aExp", ir_var_auto);
+   body.emit(r0F53);
+   ir_variable *const r0F54 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"bFracHi", ir_var_auto);
+   body.emit(r0F54);
+   ir_variable *const r0F55 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"bFracLo", ir_var_auto);
+   body.emit(r0F55);
+   ir_variable *const r0F56 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFracHi", ir_var_auto);
+   body.emit(r0F56);
+   ir_variable *const r0F57 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFracLo", ir_var_auto);
+   body.emit(r0F57);
+   ir_variable *const r0F58 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"zExp", ir_var_auto);
+   body.emit(r0F58);
+   ir_variable *const r0F59 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac2", ir_var_auto);
+   body.emit(r0F59);
+   ir_variable *const r0F5A = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac1", ir_var_auto);
+   body.emit(r0F5A);
+   ir_variable *const r0F5B = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac0", ir_var_auto);
+   body.emit(r0F5B);
+   body.emit(assign(r0F5B, body.constant(0u), 0x01));
+
+   body.emit(assign(r0F5A, body.constant(0u), 0x01));
+
+   body.emit(assign(r0F59, body.constant(0u), 0x01));
+
+   ir_variable *const r0F5C = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracLo_retval");
+   body.emit(assign(r0F5C, swizzle_x(r0F45), 0x01));
+
+   body.emit(assign(r0F57, r0F5C, 0x01));
+
+   ir_variable *const r0F5D = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r0F5D, bit_and(r0F4C, body.constant(1048575u)), 0x01));
+
+   body.emit(assign(r0F56, r0F5D, 0x01));
+
+   ir_variable *const r0F5E = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracLo_retval");
+   body.emit(assign(r0F5E, swizzle_x(r0F45), 0x01));
+
+   body.emit(assign(r0F55, r0F5E, 0x01));
+
+   ir_variable *const r0F5F = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r0F5F, bit_and(r0F4C, body.constant(104

Re: [Mesa-dev] [PATCH 00/47] WIP: fp64 support for r600g

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 15:26, Emil Velikov wrote:

On 23 August 2017 at 13:23, Nicolai Hähnle  wrote:

On 23.08.2017 13:07, Elie Tournier wrote:


From: Elie Tournier 

TL;DR
This series is a "status update" of my work done for adding fp64 support
on r600g.
One of the biggest issue is due to a lake of accuracy on the rcp
implementation.
Divide relay on rcp.

A branch is available on
https://github.com/Hopetech/mesa/tree/glsl_arb_gpu_shader_fp64_v3
Comments and reviews are welcome.

Patches 1-18:
These few patches implement the basic fp64 operations.

Patches 19-47:
Lower operations using the builtin functions previously implemented.

Known issues:
- operations on matrix crash the system.
- sqrt and d2f are not accurate enought so the piglit tests are failling.
But sqrt and d2f are working correctly using softpipe.
However, implementing sqrt64 as f2d(sqrt32(d2f()) seems to be good
enought for Piglit.
- rcp is define as pow(pow(x, -0.5), 2)
NIR and NV convert the input in a fp32, realize a rcp, convert back to
a fp64 and realize some Newton-Raphson step.
This is not possible with GLSL IR because using fma will generate a
massive builtin_float64.h file.



I don't understand this part. You need multiplication and addition anyway.
So if it's only fma which is the problem (why?), then why not just use
non-fused multiply-add? It may end up being slightly less accurate, but we
don't give any strong guarantees about rcp accuracy anyway, do we?


Pardon for dropping it like that. I'll try to explain things in a
slightly different way.

Due to the fp64 <> fp32 conversion the accuracy of RCP is pretty bad.

Thus a couple of Newton-Ralphson steps are used. Each one implemented via fma.
There's no native fma thus we use normal multiply and add.

As those get added to the generated file of built-ins
(builtin_float64.h), it grows by ~20k LoC making compilation/linking
quite slow.
Noticeably bloating the final binary size as well (Elie has some crazy
numbers from the very first experiments).


Oh, I think I get it now. The issue is that the mul+add gets inlined 
into the rcp in builtin_float64.h? Can that be avoided? Although I guess 
that just bloats the final shader, to questionable effects...


Thanks for helping me get it :)

Cheers,
Nicolai



-Emil




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/5] util/disk_cache: rename mesa cache dir and introduce cache versioning

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 08:32, Timothy Arceri wrote:

Steam is already analysing cache items, unfortunatly we did not
introduce a versioning mechanism for identifying structural changes
to cache entries earlier so the only way to do so is to rename the
cache directory.

Since we are renaming it we take the opportunity to give the directory
a more meaningful name.

Adding a version field to the header of cache entries will help us to
avoid having to rename the directory in future. Please note this is
versioning for the internal structure of the entries as defined in
disk_cache.{c,h} as opposed to the structure of the data provided to
the disk cache by the GLSL compiler and the various driver backends.
---
  src/compiler/glsl/tests/cache_test.c |  6 +++--
  src/util/disk_cache.c| 46 ++--
  src/util/disk_cache.h|  2 ++
  3 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/src/compiler/glsl/tests/cache_test.c 
b/src/compiler/glsl/tests/cache_test.c
index af1b66fb3d..3796ce6170 100644
--- a/src/compiler/glsl/tests/cache_test.c
+++ b/src/compiler/glsl/tests/cache_test.c
@@ -178,38 +178,40 @@ test_disk_cache_create(void)
 /* Test with XDG_CACHE_HOME set */
 setenv("XDG_CACHE_HOME", CACHE_TEST_TMP "/xdg-cache-home", 1);
 cache = disk_cache_create("test", "make_check", 0);
 expect_null(cache, "disk_cache_create with XDG_CACHE_HOME set with"
 "a non-existing parent directory");
  
 mkdir(CACHE_TEST_TMP, 0755);

 cache = disk_cache_create("test", "make_check", 0);
 expect_non_null(cache, "disk_cache_create with XDG_CACHE_HOME set");
  
-   check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/mesa");

+   check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/"
+ CACHE_DIR_NAME);
  
 disk_cache_destroy(cache);
  
 /* Test with MESA_GLSL_CACHE_DIR set */

 err = rmrf_local(CACHE_TEST_TMP);
 expect_equal(err, 0, "Removing " CACHE_TEST_TMP);
  
 setenv("MESA_GLSL_CACHE_DIR", CACHE_TEST_TMP "/mesa-glsl-cache-dir", 1);

 cache = disk_cache_create("test", "make_check", 0);
 expect_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set with"
 "a non-existing parent directory");
  
 mkdir(CACHE_TEST_TMP, 0755);

 cache = disk_cache_create("test", "make_check", 0);
 expect_non_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set");
  
-   check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/mesa");

+   check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/"
+ CACHE_DIR_NAME);
  
 disk_cache_destroy(cache);

  }
  
  static bool

  does_cache_contain(struct disk_cache *cache, const cache_key key)
  {
 void *result;
  
 result = disk_cache_get(cache, key, NULL);

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index b2229874e0..644a911e53 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -51,20 +51,34 @@
  
  /* Number of bits to mask off from a cache key to get an index. */

  #define CACHE_INDEX_KEY_BITS 16
  
  /* Mask for computing an index from a key. */

  #define CACHE_INDEX_KEY_MASK ((1 << CACHE_INDEX_KEY_BITS) - 1)
  
  /* The number of keys that can be stored in the index. */

  #define CACHE_INDEX_MAX_KEYS (1 << CACHE_INDEX_KEY_BITS)
  
+/* The cache version should be bumped whenever a change is made to the

+ * structure of cache entries or the index. This will give any 3rd party
+ * applications reading the cache entries a chance to adjust to the changes.
+ *
+ * - The cache version is checked internally when reading a cache entry. If we
+ *   ever have a mismatch we are in big trouble as this means we had a cache
+ *   collision. In case of such an event please check the skys for giant
+ *   asteroids and that the entire Mesa team hasn't been eaten by wolves.
+ *
+ * - There is no strict requirement that cache versions be backwards
+ *   compatible but effort should be taken to limit disruption where possible.
+ */
+#define CACHE_VERSION 1
+
  struct disk_cache {
 /* The path to the cache directory. */
 char *path;
  
 /* Thread queue for compressing and writing cache entries to disk */

 struct util_queue cache_queue;
  
 /* Seed for rand, which is used to pick a random directory */

 uint64_t seed_xorshift128plus[2];
  
@@ -153,20 +167,25 @@ concatenate_and_mkdir(void *ctx, const char *path, const char *name)

return NULL;
  
 new_path = ralloc_asprintf(ctx, "%s/%s", path, name);
  
 if (mkdir_if_needed(new_path) == 0)

return new_path;
 else
return NULL;
  }
  
+#define DRV_KEY_CPY(_dst, _src, _src_size) { \

+   memcpy(_dst, _src, _src_size);\
+   _dst += _src_size;\
+} while (0)
+
  struct disk_cache *
  disk_cache_create(const char *gpu_name, const char *timestamp,
uint64_t driver_flags)
  {
 v

Re: [Mesa-dev] [PATCH 00/47] WIP: fp64 support for r600g

2017-08-23 Thread Emil Velikov

On 23 August 2017 at 14:31, Nicolai Hähnle  wrote:
> On 23.08.2017 15:26, Emil Velikov wrote:
>>
>> On 23 August 2017 at 13:23, Nicolai Hähnle  wrote:
>>>
>>> On 23.08.2017 13:07, Elie Tournier wrote:


 From: Elie Tournier 

 TL;DR
 This series is a "status update" of my work done for adding fp64 support
 on r600g.
 One of the biggest issue is due to a lake of accuracy on the rcp
 implementation.
 Divide relay on rcp.

 A branch is available on
 https://github.com/Hopetech/mesa/tree/glsl_arb_gpu_shader_fp64_v3
 Comments and reviews are welcome.

 Patches 1-18:
 These few patches implement the basic fp64 operations.

 Patches 19-47:
 Lower operations using the builtin functions previously implemented.

 Known issues:
 - operations on matrix crash the system.
 - sqrt and d2f are not accurate enought so the piglit tests are
 failling.
 But sqrt and d2f are working correctly using softpipe.
 However, implementing sqrt64 as f2d(sqrt32(d2f()) seems to be good
 enought for Piglit.
 - rcp is define as pow(pow(x, -0.5), 2)
 NIR and NV convert the input in a fp32, realize a rcp, convert back
 to
 a fp64 and realize some Newton-Raphson step.
 This is not possible with GLSL IR because using fma will generate a
 massive builtin_float64.h file.
>>>
>>>
>>>
>>> I don't understand this part. You need multiplication and addition
>>> anyway.
>>> So if it's only fma which is the problem (why?), then why not just use
>>> non-fused multiply-add? It may end up being slightly less accurate, but
>>> we
>>> don't give any strong guarantees about rcp accuracy anyway, do we?
>>>
>> Pardon for dropping it like that. I'll try to explain things in a
>> slightly different way.
>>
>> Due to the fp64 <> fp32 conversion the accuracy of RCP is pretty bad.
>>
>> Thus a couple of Newton-Ralphson steps are used. Each one implemented via
>> fma.
>> There's no native fma thus we use normal multiply and add.
>>
>> As those get added to the generated file of built-ins
>> (builtin_float64.h), it grows by ~20k LoC making compilation/linking
>> quite slow.
>> Noticeably bloating the final binary size as well (Elie has some crazy
>> numbers from the very first experiments).
>
>
> Oh, I think I get it now. The issue is that the mul+add gets inlined into
> the rcp in builtin_float64.h?
Precisely. Note that pretty much _everything_ gets inlined. Which is
why the file is so big at the moment 20k.

> Can that be avoided?
AFAICT that's not possible atm.

> Although I guess that
> just bloats the final shader, to questionable effects...
>
Haven't looked at the final shader - Elie should have some numbers here.

At some point the binary size of generate_ir.cpp (the one that
includes builtin_float64.h) was ~1/3 of the total driver size.

> Thanks for helping me get it :)
>
Yw. I'm pretty sure Elie will correct me since, I'm not that expert in
the stuff.
Just helping him out see the light [at the end of the tunnel].

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type

2017-08-23 Thread Ilia Mirkin

On Wed, Aug 23, 2017 at 9:20 AM, Nicolai Hähnle  wrote:
> On 22.08.2017 16:56, Ilia Mirkin wrote:
>>
>> On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger 
>> wrote:
>>>
>>> I am probably missing something here, but why do you need a new register
>>> file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't
>>> you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing?
>>> Or do you need to know how it's going to be accessed in advance?
>>
>>
>> With bindless, LOAD can take a CONST I believe [which contains the
>> value of the bindless id]. I think it's nice to keep those concepts
>> separate... having CONST sometimes mean the value and other times mean
>> the address is a bit weird. This way CONSTBUF[0] is the address of the
>> 0th constbuf.
>
>
> I'm still not quite convinced. The levels of indirection should clarify the
> meaning, shouldn't they?
>
> You get
>
>   LOAD dst, CONST[0][0], IMM[0]
>
> when loading from offset IMM[0] of a bindless buffer whose handle is at the
> beginning of the buffer CONST[0].
>
> You get
>
>   LOAD dst, CONST[0], IMM[0]
>
> when loading from offset IMM[0] of non-bindless buffer 0.
>
> Is there ever really a situation where the two could be confused?

I always considered CONST[0] == CONST[0][0]. Technically they're not,
since once has the second dimension in the TGSI encoding while the
other doesn't. But practically,

MOV TEMP[0], CONST[0]

and

MOV TEMP[0], CONST[0][0]

are in every way identical. Currently st/mesa will just use CONST[0]
everywhere, never adding the 2nd dimension. As such, I don't think we
should start having behavioural differences for those on some
instructions.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 5/5] util/disk_cache: write cache item metadata to disk

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 08:33, Timothy Arceri wrote:

---
  src/util/disk_cache.c | 89 ++-
  1 file changed, 88 insertions(+), 1 deletion(-)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index c1b7429b92..dd12a09841 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -105,20 +105,22 @@ struct disk_cache_put_job {
  
 struct disk_cache *cache;
  
 cache_key key;
  
 /* Copy of cache data to be compressed and written. */

 void *data;
  
 /* Size of data to be compressed and written. */

 size_t size;
+
+   struct cache_item_metadata cache_item_metadata;
  };
  
  /* Create a directory named 'path' if it does not already exist.

   *
   * Returns: 0 if path already exists as a directory or if created.
   * -1 in all other cases.
   */
  static int
  mkdir_if_needed(const char *path)
  {
@@ -771,29 +773,60 @@ create_put_job(struct disk_cache *cache, const cache_key 
key,
  {
 struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *)
malloc(sizeof(struct disk_cache_put_job) + size);
  
 if (dc_job) {

dc_job->cache = cache;
memcpy(dc_job->key, key, sizeof(cache_key));
dc_job->data = dc_job + 1;
memcpy(dc_job->data, data, size);
dc_job->size = size;
+
+  /* Copy the cache item metadata */
+  if (cache_item_metadata) {
+ dc_job->cache_item_metadata.type = cache_item_metadata->type;
+ if (cache_item_metadata->type == CACHE_ITEM_TYPE_GLSL) {
+dc_job->cache_item_metadata.num_keys =
+   cache_item_metadata->num_keys;
+dc_job->cache_item_metadata.keys = (cache_key *)
+   malloc(cache_item_metadata->num_keys * sizeof(cache_key));
+
+if (!dc_job->cache_item_metadata.keys)
+   goto fail;
+
+for (unsigned i = 0; i < cache_item_metadata->num_keys; i++) {
+   memcpy(dc_job->cache_item_metadata.keys[i],
+  cache_item_metadata->keys[i], sizeof(cache_key));
+}


You can just use a single memcpy.

With that fixed, patches 2-5 are

Reviewed-by: Nicolai Hähnle 



+ }
+  } else {
+ dc_job->cache_item_metadata.type = CACHE_ITEM_TYPE_UNKNOWN;
+ dc_job->cache_item_metadata.keys = NULL;
+  }
 }
  
 return dc_job;

+
+fail:
+   free(dc_job->cache_item_metadata.keys);
+   free(dc_job);
+
+   return NULL;
  }
  
  static void

  destroy_put_job(void *job, int thread_index)
  {
 if (job) {
+  struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
+  free(dc_job->cache_item_metadata.keys);
+
free(job);
 }
  }
  
  struct cache_entry_file_data {

 uint32_t crc32;
 uint32_t uncompressed_size;
  };
  
  static void

@@ -868,20 +901,48 @@ cache_put(void *job, int thread_index)
  * mesa version that produced the entry or deal with hash collisions,
  * should that ever become a real problem.
  */
 ret = write_all(fd, dc_job->cache->driver_keys_blob,
 dc_job->cache->driver_keys_blob_size);
 if (ret == -1) {
unlink(filename_tmp);
goto done;
 }
  
+   /* Write the cache item metadata. This data can be used to deal with

+* hash collisions, as well as providing useful information to 3rd party
+* tools reading the cache files.
+*/
+   ret = write_all(fd, &dc_job->cache_item_metadata.type,
+   sizeof(uint32_t));
+   if (ret == -1) {
+  unlink(filename_tmp);
+  goto done;
+   }
+
+   if (dc_job->cache_item_metadata.type == CACHE_ITEM_TYPE_GLSL) {
+  ret = write_all(fd, &dc_job->cache_item_metadata.num_keys,
+  sizeof(uint32_t));
+  if (ret == -1) {
+ unlink(filename_tmp);
+ goto done;
+  }
+
+  ret = write_all(fd, dc_job->cache_item_metadata.keys[0],
+  dc_job->cache_item_metadata.num_keys *
+  sizeof(cache_key));
+  if (ret == -1) {
+ unlink(filename_tmp);
+ goto done;
+  }
+   }
+
 /* Create CRC of the data. We will read this when restoring the cache and
  * use it to check for corruption.
  */
 struct cache_entry_file_data cf_data;
 cf_data.crc32 = util_hash_crc32(dc_job->data, dc_job->size);
 cf_data.uncompressed_size = dc_job->size;
  
 size_t cf_data_size = sizeof(cf_data);

 ret = write_all(fd, &cf_data, cf_data_size);
 if (ret == -1) {
@@ -1019,29 +1080,55 @@ disk_cache_get(struct disk_cache *cache, const 
cache_key key, size_t *size)
goto fail;
  
 ret = read_all(fd, file_header, ck_size);

 if (ret == -1)
goto fail;
  
 /* Check for extremely unlikely hash collisions */

 if (memcmp(cache->driver_keys_blob, file_header, ck_size) != 0)
goto fail;
  
+   size_t cache_item_md_size = sizeof(uint32_t);

+   uint32_t md_type;
+   ret = read_all(fd, &md_type, cache_item

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Roland Scheidegger

Am 23.08.2017 um 15:08 schrieb Nicolai Hähnle:
> On 22.08.2017 22:39, Roland Scheidegger wrote:
>> Am 22.08.2017 um 19:10 schrieb Marek Olšák:
>>> Hi,
>>>
>>> I'd like to discuss 16-bit float and integer support in TGSI. I'm
>>> proposing this:
>>>
>>>   struct tgsi_instruction
>>>   {
>>>  unsigned Type   : 4;  /* TGSI_TOKEN_TYPE_INSTRUCTION */
>>>  unsigned NrTokens   : 8;  /* UINT */
>>>  unsigned Opcode : 8;  /* TGSI_OPCODE_ */
>>>  unsigned Saturate   : 1;  /* BOOL */
>>>  unsigned NumDstRegs : 2;  /* UINT */
>>>  unsigned NumSrcRegs : 4;  /* UINT */
>>>  unsigned Label  : 1;
>>>  unsigned Texture    : 1;
>>>  unsigned Memory : 1;
>>>  unsigned Precise    : 1;
>>> -   unsigned Padding    : 1;
>>> +   unsigned HalfPrecision : 1;
>>>   };
>>>
>>> There won't be any 16-bit TEMPs in TGSI, but each instruction will
>>> have the HalfPrecision flag, which is a hint for drivers that they can
>>> use a 16-bit opcode. Even texture, load, and store instructions can
>>> set HalfPrecision, which means they can accept and return 16-bit
>>> values.
>>>
>>> The catch is that drivers will have to insert 16-bit <-> 32-bit
>>> conversions manually, because they won't be present in TGSI. The
>>> advantage is that we don't have to add 200 new opcodes for the 3 new
>>> 16-bit types.
>>>
>>> What do you think?
>>>
>>
>> Flagging instructions as 16bit doesn't look too bad to me, but I'm
>> wondering if this isn't a bit problematic wrt register files. Clearly,
>> this is a restriction of tgsi "everything is a 32x4 value". Doubles, of
>> course, have a similar problem, but in the end they still have
>> well-defined interactions with the register files, because it's defined
>> what bits ultimately represent a 64bit value (at least in theory from
>> tgsi's point of view, it is perfectly valid to use some 32bit
>> calculations to set some reg, then just use double instructions directly
>> without conversion on these values - it may not be meaningful but it is
>> well defined).
>> But it looks like you want to avoid to have a well-defined mapping of
>> the registers to 16bit types (and with 16 bits instruction just being
>> hints, I can't see how it could exist).
>> Note that being able to flag instructions as HalfPrecision does not
>> necessarily mean you can't have any explicit 16bit conversion
>> instructions too.
> 
> Those already exist: PK2H and UP2H. Or did you have something else in mind?
> 
> More generally, there are really two use cases for this, and we need to
> be careful not to mix them up:
> 
> - transparent downgrading to 16-bit of lowp and mediump
> - support for extensions that explicitly introduce 16-bit types
> 
> For lowp and mediump, the approach of just having a HalfPrecision bit on
> the instructions is probably fine.
> 
> The second case is different. I don't think there are ARB extensions for
> that yet, but there are AMD_gpu_shader_{int16,half_float} with
> explicitly 16-bit types. (There's also NV_half_float, but that's from
> earlier days without GLSL.) For those, we'd really need to provide
> exactly the required operation. No special handling of TGSI temporaries
> is needed: an f16vec4 is represented as a normal 4-component vector in
> TGSI, just that the upper 16 bits of each component are ignored.
That looks ok to me, albeit you could choose that differently, hence why
I mentioned it (you could pack your 4 16bit members into the x/y
components of the 4x32bit vector).

> 
> Here's another question: What does "low precision" mean on a texture
> instruction? Are the offsets low precision or is it the output? Maybe we
> can punt on this for now -- at least GCN doesn't have low precision
> there anyway.
> 
> To sum it up:
> - I think there have to be separate flags for "this is a true 16-bit
> instruction" and for "optional low precision" -- in the latter, the
> driver is responsible for on-the-fly conversion between half and full types
> - Apart from potential future issues with texture instructions, I think
> the flags on instructions are fine. So the plan is fine for GLES
> lowp/mediump.
> 
> Also, we're running out of bits here, but some of those bits can be
> moved into a separate instruction flags word when the time comes.
> 

There's still some bits left in the instruction token if you really
really need them. Type doesn't need to be 4 bits (at least one bit can
go, even 2 is sufficient at least now, albeit you'd need to change all
tokens), the same is true for NumSrcRegs, where 4 bits is at least one
too many.

I am however still wondering if it really makes sense to have both
hinted and explicit 16bit instructions (because it looks like eventually
it's going to be more work for drivers, having to handle both some day).

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 15:49, Ilia Mirkin wrote:

On Wed, Aug 23, 2017 at 9:20 AM, Nicolai Hähnle  wrote:

On 22.08.2017 16:56, Ilia Mirkin wrote:


On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger 
wrote:


I am probably missing something here, but why do you need a new register
file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't
you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing?
Or do you need to know how it's going to be accessed in advance?



With bindless, LOAD can take a CONST I believe [which contains the
value of the bindless id]. I think it's nice to keep those concepts
separate... having CONST sometimes mean the value and other times mean
the address is a bit weird. This way CONSTBUF[0] is the address of the
0th constbuf.



I'm still not quite convinced. The levels of indirection should clarify the
meaning, shouldn't they?

You get

   LOAD dst, CONST[0][0], IMM[0]

when loading from offset IMM[0] of a bindless buffer whose handle is at the
beginning of the buffer CONST[0].

You get

   LOAD dst, CONST[0], IMM[0]

when loading from offset IMM[0] of non-bindless buffer 0.

Is there ever really a situation where the two could be confused?


I always considered CONST[0] == CONST[0][0]. Technically they're not,
since once has the second dimension in the TGSI encoding while the
other doesn't. But practically,

MOV TEMP[0], CONST[0]

and

MOV TEMP[0], CONST[0][0]

are in every way identical. Currently st/mesa will just use CONST[0]
everywhere, never adding the 2nd dimension. As such, I don't think we
should start having behavioural differences for those on some
instructions.


Oh, you're right, CONST[n] and CONST[0][n] are treated the same. That's 
pretty inconsistent and unfortunate :/


I suppose the CONSTBUF thing is fine, then.

Cheers,
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type

2017-08-23 Thread Roland Scheidegger

Am 23.08.2017 um 15:49 schrieb Ilia Mirkin:
> On Wed, Aug 23, 2017 at 9:20 AM, Nicolai Hähnle  wrote:
>> On 22.08.2017 16:56, Ilia Mirkin wrote:
>>>
>>> On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger 
>>> wrote:

 I am probably missing something here, but why do you need a new register
 file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't
 you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing?
 Or do you need to know how it's going to be accessed in advance?
>>>
>>>
>>> With bindless, LOAD can take a CONST I believe [which contains the
>>> value of the bindless id]. I think it's nice to keep those concepts
>>> separate... having CONST sometimes mean the value and other times mean
>>> the address is a bit weird. This way CONSTBUF[0] is the address of the
>>> 0th constbuf.
>>
>>
>> I'm still not quite convinced. The levels of indirection should clarify the
>> meaning, shouldn't they?
>>
>> You get
>>
>>   LOAD dst, CONST[0][0], IMM[0]
>>
>> when loading from offset IMM[0] of a bindless buffer whose handle is at the
>> beginning of the buffer CONST[0].
>>
>> You get
>>
>>   LOAD dst, CONST[0], IMM[0]
>>
>> when loading from offset IMM[0] of non-bindless buffer 0.
>>
>> Is there ever really a situation where the two could be confused?
> 
> I always considered CONST[0] == CONST[0][0]. Technically they're not,
> since once has the second dimension in the TGSI encoding while the
> other doesn't. But practically,
> 
> MOV TEMP[0], CONST[0]
> 
> and
> 
> MOV TEMP[0], CONST[0][0]
> 
> are in every way identical. Currently st/mesa will just use CONST[0]
> everywhere, never adding the 2nd dimension.
Maybe it would be worth the effort to fix this?

Roland


 As such, I don't think we
> should start having behavioural differences for those on some
> instructions.
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeon/video: Return false explicitly for HEVC if not the case

2017-08-23 Thread Leo Liu

Signed-off-by: Leo Liu 
---
 src/gallium/drivers/radeon/radeon_video.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c
index 7c2553364e..99b6676fee 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -280,6 +280,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
profile == 
PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
else if (rscreen->family >= CHIP_CARRIZO)
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
+   return false;
case PIPE_VIDEO_FORMAT_JPEG:
if (rscreen->family < CHIP_CARRIZO || rscreen->family 
>= CHIP_VEGA10)
return false;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] clover: Fix build after llvm r309911

2017-08-23 Thread Vedran Miletić

On 08/04/2017 12:22 AM, Jan Vesely wrote:
> Signed-off-by: Jan Vesely 
> ---
>  src/gallium/state_trackers/clover/llvm/codegen/native.cpp | 2 +-
>  src/gallium/state_trackers/clover/llvm/compat.hpp | 6 ++
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/clover/llvm/codegen/native.cpp 
> b/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
> index b9e6750719..12c83a92b6 100644
> --- a/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
> @@ -115,7 +115,7 @@ namespace {
>std::unique_ptr tm {
>   t->createTargetMachine(target.triple, target.cpu, "", {},
>  compat::default_reloc_model,
> -::llvm::CodeModel::Default,
> +compat::default_code_model,
>  ::llvm::CodeGenOpt::Default) };
>if (!tm)
>   fail(r_log, build_error(),
> diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp 
> b/src/gallium/state_trackers/clover/llvm/compat.hpp
> index 06d3b6ffe6..b22ecfa6b2 100644
> --- a/src/gallium/state_trackers/clover/llvm/compat.hpp
> +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
> @@ -174,6 +174,12 @@ namespace clover {
>  #endif
>   }
>  
> +#if HAVE_LLVM >= 0x0600
> + const auto default_code_model = ::llvm::None;
> +#else
> + const auto default_code_model = ::llvm::CodeModel::Default;
> +#endif
> +
>  #if HAVE_LLVM >= 0x0309
>   const auto default_reloc_model = ::llvm::None;
>  #else
> 

This patch ended up being ignored, though Mesa seems to compile fine.
Did r310200 address it?

Regards,
Vedran

-- 
Vedran Miletić
vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeon/video: Return false explicitly for HEVC if not the case

2017-08-23 Thread Christian König


Am 23.08.2017 um 15:55 schrieb Leo Liu:

Signed-off-by: Leo Liu 


Reviewed-by: Christian König 


---
  src/gallium/drivers/radeon/radeon_video.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c
index 7c2553364e..99b6676fee 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -280,6 +280,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
profile == 
PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
else if (rscreen->family >= CHIP_CARRIZO)
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
+   return false;
case PIPE_VIDEO_FORMAT_JPEG:
if (rscreen->family < CHIP_CARRIZO || rscreen->family 
>= CHIP_VEGA10)
return false;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 16:00, Roland Scheidegger wrote:

Am 23.08.2017 um 15:08 schrieb Nicolai Hähnle:

On 22.08.2017 22:39, Roland Scheidegger wrote:

Am 22.08.2017 um 19:10 schrieb Marek Olšák:

Hi,

I'd like to discuss 16-bit float and integer support in TGSI. I'm
proposing this:

   struct tgsi_instruction
   {
  unsigned Type   : 4;  /* TGSI_TOKEN_TYPE_INSTRUCTION */
  unsigned NrTokens   : 8;  /* UINT */
  unsigned Opcode : 8;  /* TGSI_OPCODE_ */
  unsigned Saturate   : 1;  /* BOOL */
  unsigned NumDstRegs : 2;  /* UINT */
  unsigned NumSrcRegs : 4;  /* UINT */
  unsigned Label  : 1;
  unsigned Texture: 1;
  unsigned Memory : 1;
  unsigned Precise: 1;
-   unsigned Padding: 1;
+   unsigned HalfPrecision : 1;
   };

There won't be any 16-bit TEMPs in TGSI, but each instruction will
have the HalfPrecision flag, which is a hint for drivers that they can
use a 16-bit opcode. Even texture, load, and store instructions can
set HalfPrecision, which means they can accept and return 16-bit
values.

The catch is that drivers will have to insert 16-bit <-> 32-bit
conversions manually, because they won't be present in TGSI. The
advantage is that we don't have to add 200 new opcodes for the 3 new
16-bit types.

What do you think?



Flagging instructions as 16bit doesn't look too bad to me, but I'm
wondering if this isn't a bit problematic wrt register files. Clearly,
this is a restriction of tgsi "everything is a 32x4 value". Doubles, of
course, have a similar problem, but in the end they still have
well-defined interactions with the register files, because it's defined
what bits ultimately represent a 64bit value (at least in theory from
tgsi's point of view, it is perfectly valid to use some 32bit
calculations to set some reg, then just use double instructions directly
without conversion on these values - it may not be meaningful but it is
well defined).
But it looks like you want to avoid to have a well-defined mapping of
the registers to 16bit types (and with 16 bits instruction just being
hints, I can't see how it could exist).
Note that being able to flag instructions as HalfPrecision does not
necessarily mean you can't have any explicit 16bit conversion
instructions too.


Those already exist: PK2H and UP2H. Or did you have something else in mind?

More generally, there are really two use cases for this, and we need to
be careful not to mix them up:

- transparent downgrading to 16-bit of lowp and mediump
- support for extensions that explicitly introduce 16-bit types

For lowp and mediump, the approach of just having a HalfPrecision bit on
the instructions is probably fine.

The second case is different. I don't think there are ARB extensions for
that yet, but there are AMD_gpu_shader_{int16,half_float} with
explicitly 16-bit types. (There's also NV_half_float, but that's from
earlier days without GLSL.) For those, we'd really need to provide
exactly the required operation. No special handling of TGSI temporaries
is needed: an f16vec4 is represented as a normal 4-component vector in
TGSI, just that the upper 16 bits of each component are ignored.

That looks ok to me, albeit you could choose that differently, hence why
I mentioned it (you could pack your 4 16bit members into the x/y
components of the 4x32bit vector).


I thought about this as well, but packing 4 components into x/y would 
make swizzling a nightmare.




Here's another question: What does "low precision" mean on a texture
instruction? Are the offsets low precision or is it the output? Maybe we
can punt on this for now -- at least GCN doesn't have low precision
there anyway.

To sum it up:
- I think there have to be separate flags for "this is a true 16-bit
instruction" and for "optional low precision" -- in the latter, the
driver is responsible for on-the-fly conversion between half and full types
- Apart from potential future issues with texture instructions, I think
the flags on instructions are fine. So the plan is fine for GLES
lowp/mediump.

Also, we're running out of bits here, but some of those bits can be
moved into a separate instruction flags word when the time comes.



There's still some bits left in the instruction token if you really
really need them. Type doesn't need to be 4 bits (at least one bit can
go, even 2 is sufficient at least now, albeit you'd need to change all
tokens), the same is true for NumSrcRegs, where 4 bits is at least one
too many.

I am however still wondering if it really makes sense to have both
hinted and explicit 16bit instructions (because it looks like eventually
it's going to be more work for drivers, having to handle both some day).


I know, it's not a completely clear-cut decision.

The main thing is that truly going to 16-bits may not always be 
beneficial because we need to introduce the conversion instruction(s), 
so it'd be neat to communicate the optionality to the driver.


Cheers,
Nicolai
--
Lerne, wie die Welt wirklich ist,
Ab

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 15:15, Nicolai Hähnle wrote:

On 22.08.2017 19:32, Marek Olšák wrote:
On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin  
wrote:

How do you propose defining the semantics for e.g. loading a 16-bit
value from a constbuf/ssbo? Would those get separate instructions?


st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and
drivers can replace them with MOV if HalfPrecision == 1.


You mean, if HalfPrecision == 1 for subsequent operations?

How *do* we implement this for LLVM, anyway? Downcast (fptrunc) from 
float to half whenever we're loading operands of a HalfPrecision == 1 
instruction, and then casting (fpext) back up before storing the result?


LLVM instcombine seems quite capable of seeing through that in simple 
code, but I worry about control flow.


Thinking about this some more, having the precision a property of the 
temporaries like Ilia suggested would probably help with emitting LLVM 
IR that behaves well across control flow, but complicate 
st_glsl_to_tgsi. Hard to say what the tradeoff is there.


Cheers,
Nicolai



Cheers,
Nicolai



Marek



On Tue, Aug 22, 2017 at 1:10 PM, Marek Olšák  wrote:

Hi,

I'd like to discuss 16-bit float and integer support in TGSI. I'm
proposing this:

  struct tgsi_instruction
  {
 unsigned Type   : 4;  /* TGSI_TOKEN_TYPE_INSTRUCTION */
 unsigned NrTokens   : 8;  /* UINT */
 unsigned Opcode : 8;  /* TGSI_OPCODE_ */
 unsigned Saturate   : 1;  /* BOOL */
 unsigned NumDstRegs : 2;  /* UINT */
 unsigned NumSrcRegs : 4;  /* UINT */
 unsigned Label  : 1;
 unsigned Texture: 1;
 unsigned Memory : 1;
 unsigned Precise: 1;
-   unsigned Padding: 1;
+   unsigned HalfPrecision : 1;
  };

There won't be any 16-bit TEMPs in TGSI, but each instruction will
have the HalfPrecision flag, which is a hint for drivers that they can
use a 16-bit opcode. Even texture, load, and store instructions can
set HalfPrecision, which means they can accept and return 16-bit
values.

The catch is that drivers will have to insert 16-bit <-> 32-bit
conversions manually, because they won't be present in TGSI. The
advantage is that we don't have to add 200 new opcodes for the 3 new
16-bit types.

What do you think?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev







--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Ilia Mirkin

On Wed, Aug 23, 2017 at 10:30 AM, Nicolai Hähnle  wrote:
> On 23.08.2017 15:15, Nicolai Hähnle wrote:
>>
>> On 22.08.2017 19:32, Marek Olšák wrote:
>>>
>>> On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin 
>>> wrote:

 How do you propose defining the semantics for e.g. loading a 16-bit
 value from a constbuf/ssbo? Would those get separate instructions?
>>>
>>>
>>> st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and
>>> drivers can replace them with MOV if HalfPrecision == 1.
>>
>>
>> You mean, if HalfPrecision == 1 for subsequent operations?
>>
>> How *do* we implement this for LLVM, anyway? Downcast (fptrunc) from float
>> to half whenever we're loading operands of a HalfPrecision == 1 instruction,
>> and then casting (fpext) back up before storing the result?
>>
>> LLVM instcombine seems quite capable of seeing through that in simple
>> code, but I worry about control flow.
>
>
> Thinking about this some more, having the precision a property of the
> temporaries like Ilia suggested would probably help with emitting LLVM IR
> that behaves well across control flow, but complicate st_glsl_to_tgsi. Hard
> to say what the tradeoff is there.

Why would it complicate glsl_to_tgsi? At the GLSL level, it's not the
operations that have precision, but variables. And those variables map
to temp's... we'd have to create a separate pool of high- vs
low-precision temps, but that's about it.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeon/video: Return false explicitly for HEVC if not the case

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 15:55, Leo Liu wrote:

Signed-off-by: Leo Liu 


Reviewed-by: Nicolai Hähnle 



---
  src/gallium/drivers/radeon/radeon_video.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c
index 7c2553364e..99b6676fee 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -280,6 +280,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
profile == 
PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
else if (rscreen->family >= CHIP_CARRIZO)
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
+   return false;
case PIPE_VIDEO_FORMAT_JPEG:
if (rscreen->family < CHIP_CARRIZO || rscreen->family 
>= CHIP_VEGA10)
return false;




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] mesa: only check errors when the state change in glDepthBoundsEXT()

2017-08-23 Thread Samuel Pitoiset

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/main/depth.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c
index 930f5e816f..ddd91481cd 100644
--- a/src/mesa/main/depth.c
+++ b/src/mesa/main/depth.c
@@ -146,17 +146,17 @@ _mesa_DepthBoundsEXT( GLclampd zmin, GLclampd zmax )
if (MESA_VERBOSE & VERBOSE_API)
   _mesa_debug(ctx, "glDepthBounds(%f, %f)\n", zmin, zmax);
 
-   if (zmin > zmax) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "glDepthBoundsEXT(zmin > zmax)");
-  return;
-   }
-
zmin = CLAMP(zmin, 0.0, 1.0);
zmax = CLAMP(zmax, 0.0, 1.0);
 
if (ctx->Depth.BoundsMin == zmin && ctx->Depth.BoundsMax == zmax)
   return;
 
+   if (zmin > zmax) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "glDepthBoundsEXT(zmin > zmax)");
+  return;
+   }
+
FLUSH_VERTICES(ctx, ctx->DriverFlags.NewDepth ? 0 : _NEW_DEPTH);
ctx->NewDriverState |= ctx->DriverFlags.NewDepth;
ctx->Depth.BoundsMin = (GLfloat) zmin;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] mesa: remove duplicate assignments in bind_xfb_buffers()

2017-08-23 Thread Samuel Pitoiset

Useless to do that before checking errors. It's now similar to
the other bind_XXX_buffers() helpers.

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/main/bufferobj.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 099648f419..2da2128081 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -4055,9 +4055,6 @@ bind_xfb_buffers(struct gl_context *ctx,
   GLsizeiptr size = 0;
 
   if (range) {
- offset = offsets[i];
- size = sizes[i];
-
  if (!bind_buffers_check_offset_and_size(ctx, i, offsets, sizes))
 continue;
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeon/video: Return false explicitly for HEVC if not the case

2017-08-23 Thread Leo Liu


Hi Nicolai,

Thanks for pointing out the issue. Appreciated.

Leo


On 08/23/2017 10:38 AM, Nicolai Hähnle wrote:

On 23.08.2017 15:55, Leo Liu wrote:

Signed-off-by: Leo Liu 


Reviewed-by: Nicolai Hähnle 



---
  src/gallium/drivers/radeon/radeon_video.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c

index 7c2553364e..99b6676fee 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -280,6 +280,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
  profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
  else if (rscreen->family >= CHIP_CARRIZO)
  return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
+return false;
  case PIPE_VIDEO_FORMAT_JPEG:
  if (rscreen->family < CHIP_CARRIZO || rscreen->family 
>= CHIP_VEGA10)

  return false;






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] mesa: fix debug/error messages in glColorMaski()

2017-08-23 Thread Samuel Pitoiset

Trivial. While we are at it, adjust indentation.

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/main/blend.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index 5c496d9970..01721ab615 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -990,18 +990,18 @@ _mesa_ColorMask( GLboolean red, GLboolean green,
  * For GL_EXT_draw_buffers2 and GL3
  */
 void GLAPIENTRY
-_mesa_ColorMaski( GLuint buf, GLboolean red, GLboolean green,
-GLboolean blue, GLboolean alpha )
+_mesa_ColorMaski(GLuint buf, GLboolean red, GLboolean green,
+ GLboolean blue, GLboolean alpha)
 {
GLubyte tmp[4];
GET_CURRENT_CONTEXT(ctx);
 
if (MESA_VERBOSE & VERBOSE_API)
-  _mesa_debug(ctx, "glColorMaskIndexed %u %d %d %d %d\n",
+  _mesa_debug(ctx, "glColorMaski %u %d %d %d %d\n",
   buf, red, green, blue, alpha);
 
if (buf >= ctx->Const.MaxDrawBuffers) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "glColorMaskIndexed(buf=%u)", buf);
+  _mesa_error(ctx, GL_INVALID_VALUE, "glColorMaski(buf=%u)", buf);
   return;
}
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v6.1] egl: Allow creation of per surface out fence

2017-08-23 Thread Tomasz Figa

Hi Yogesh,

Sorry for being late with review. Please see some comments inline.

On Fri, Aug 18, 2017 at 7:08 PM,   wrote:
> From: Zhongmin Wu 
>
> Add plumbing to allow creation of per display surface out fence.
>
> Currently enabled only on android, since the system expects a valid
> fd in ANativeWindow::{queue,cancel}Buffer. We pass a fd of -1 with
> which native applications such as flatland fail. The patch enables
> explicit sync on android and fixes one of the functional issue for
> apps or buffer consumers which depend upon fence and its timestamp.
>
> v2: a) Also implement the fence in cancelBuffer.
> b) The last sync fence is stored in drawable object
>rather than brw context.
> c) format clear.
>
> v3: a) Save the last fence fd in DRI Context object.
> b) Return the last fence if the batch buffer is empty and
>nothing to be flushed when _intel_batchbuffer_flush_fence
> c) Add the new interface in vbtl to set the retrieve fence
>
> v3.1 a) close fd in the new vbtl interface on none Android platform
>
> v4: a) The last fence is saved in brw context.
> b) The retrieve fd is for all the platform but not just Android
> c) Add a uniform dri2 interface to initialize the surface.
>
> v4.1: a) make some changes of variable name.
>   b) the patch is broken into two patches.
>
> v4.2: a) Add a deinit interface for surface to clear the out fence
>
> v5: a) Add enable_out_fence to init, platform sets it true or
>false
> b) Change get fd to update fd and check for fence
> c) Commit description updated
>
> v6: a) Heading and commit description updated
> b) enable_out_fence is set only if fence is supported
> c) Review comments on function names
> d) Test with standalone patch, resolves the bug
>
> v6.1 a) Check for old display fence reverted back
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101655
>
> Signed-off-by: Zhongmin Wu 
> Signed-off-by: Yogesh Marathe 
> ---
>  src/egl/drivers/dri2/egl_dri2.c | 69 
> +
>  src/egl/drivers/dri2/egl_dri2.h |  9 
>  src/egl/drivers/dri2/platform_android.c | 29 ++--
>  src/egl/drivers/dri2/platform_drm.c |  3 +-
>  src/egl/drivers/dri2/platform_surfaceless.c |  3 +-
>  src/egl/drivers/dri2/platform_wayland.c |  3 +-
>  src/egl/drivers/dri2/platform_x11.c |  3 +-
>  src/egl/drivers/dri2/platform_x11_dri3.c|  3 +-
>  8 files changed, 104 insertions(+), 18 deletions(-)
>
> diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
> index ed79e0d..04d0332 100644
> --- a/src/egl/drivers/dri2/egl_dri2.c
> +++ b/src/egl/drivers/dri2/egl_dri2.c
> @@ -1354,6 +1354,44 @@ dri2_destroy_context(_EGLDriver *drv, _EGLDisplay 
> *disp, _EGLContext *ctx)
> return EGL_TRUE;
>  }
>
> +EGLBoolean
> +dri2_init_surface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
> +_EGLConfig *conf, const EGLint *attrib_list, EGLBoolean 
> enable_out_fence)
> +{
> +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> +   struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> +
> +   dri2_surf->out_fence_fd = -1;
> +   if (dri2_dpy->fence && dri2_dpy->fence->base.version >= 2 &&
> +   dri2_dpy->fence->get_capabilities &&
> +   (dri2_dpy->fence->get_capabilities(dri2_dpy->dri_screen) &
> +__DRI_FENCE_CAP_NATIVE_FD)) {
> +  dri2_surf->enable_out_fence = enable_out_fence;
> +   }

nit: It might not change anything in practice, but it would be more
logical if the code always initialized enable_out_fence to some value.
So maybe let's add dri2_surf->enable_out_fence = 0; above the if.

> +
> +   return _eglInitSurface(surf, dpy, type, conf, attrib_list);
> +}
> +
> +static void
> +dri2_surface_set_out_fence_fd( _EGLSurface *surf, int fence_fd)
> +{
> +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> +
> +   if (dri2_surf->out_fence_fd >=0)
> +  close(dri2_surf->out_fence_fd);
> +
> +   dri2_surf->out_fence_fd = fence_fd;
> +}
> +
> +void
> +dri2_deinit_surface(_EGLSurface *surf)
> +{
> +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> +
> +   dri2_surface_set_out_fence_fd(surf, -1);
> +   dri2_surf->enable_out_fence = false;
> +}
> +
>  static EGLBoolean
>  dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
>  {
> @@ -1365,6 +1403,27 @@ dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay 
> *dpy, _EGLSurface *surf)
> return dri2_dpy->vtbl->destroy_surface(drv, dpy, surf);
>  }
>
> +static void
> +dri2_surf_update_fence_fd(_EGLContext *ctx,
> +  _EGLDisplay *dpy, _EGLSurface *surf)
> +{
> +   __DRIcontext *dri_ctx = dri2_egl_context(ctx)->dri_context;
> +   struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> +   int fence_fd = -1;
> +   void *fence;
> +
> +   if (dri2_surf->enable_out_fence) {
> +  fence

Re: [Mesa-dev] [PATCH 1/1] clover: Fix build after llvm r309911

2017-08-23 Thread Jan Vesely

On Wed, 2017-08-23 at 16:14 +0200, Vedran Miletić wrote:
> On 08/04/2017 12:22 AM, Jan Vesely wrote:
> > Signed-off-by: Jan Vesely 
> > ---
> >  src/gallium/state_trackers/clover/llvm/codegen/native.cpp | 2 +-
> >  src/gallium/state_trackers/clover/llvm/compat.hpp | 6 ++
> >  2 files changed, 7 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/gallium/state_trackers/clover/llvm/codegen/native.cpp 
> > b/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
> > index b9e6750719..12c83a92b6 100644
> > --- a/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
> > +++ b/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
> > @@ -115,7 +115,7 @@ namespace {
> >std::unique_ptr tm {
> >   t->createTargetMachine(target.triple, target.cpu, "", {},
> >  compat::default_reloc_model,
> > -::llvm::CodeModel::Default,
> > +compat::default_code_model,
> >  ::llvm::CodeGenOpt::Default) };
> >if (!tm)
> >   fail(r_log, build_error(),
> > diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp 
> > b/src/gallium/state_trackers/clover/llvm/compat.hpp
> > index 06d3b6ffe6..b22ecfa6b2 100644
> > --- a/src/gallium/state_trackers/clover/llvm/compat.hpp
> > +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
> > @@ -174,6 +174,12 @@ namespace clover {
> >  #endif
> >   }
> >  
> > +#if HAVE_LLVM >= 0x0600
> > + const auto default_code_model = ::llvm::None;
> > +#else
> > + const auto default_code_model = ::llvm::CodeModel::Default;
> > +#endif
> > +
> >  #if HAVE_LLVM >= 0x0309
> >   const auto default_reloc_model = ::llvm::None;
> >  #else
> > 
> 
> This patch ended up being ignored, though Mesa seems to compile fine.
> Did r310200 address it?

the patch has been committed as
08f44a497c97de6162a95c06c902e47bfedad5fe

Jan

> 
> Regards,
> Vedran
> 


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 16:36, Ilia Mirkin wrote:

On Wed, Aug 23, 2017 at 10:30 AM, Nicolai Hähnle  wrote:

On 23.08.2017 15:15, Nicolai Hähnle wrote:


On 22.08.2017 19:32, Marek Olšák wrote:


On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin 
wrote:


How do you propose defining the semantics for e.g. loading a 16-bit
value from a constbuf/ssbo? Would those get separate instructions?



st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and
drivers can replace them with MOV if HalfPrecision == 1.



You mean, if HalfPrecision == 1 for subsequent operations?

How *do* we implement this for LLVM, anyway? Downcast (fptrunc) from float
to half whenever we're loading operands of a HalfPrecision == 1 instruction,
and then casting (fpext) back up before storing the result?

LLVM instcombine seems quite capable of seeing through that in simple
code, but I worry about control flow.



Thinking about this some more, having the precision a property of the
temporaries like Ilia suggested would probably help with emitting LLVM IR
that behaves well across control flow, but complicate st_glsl_to_tgsi. Hard
to say what the tradeoff is there.


Why would it complicate glsl_to_tgsi? At the GLSL level, it's not the
operations that have precision, but variables. And those variables map
to temp's... we'd have to create a separate pool of high- vs
low-precision temps, but that's about it.


Well, it may not be so bad in the end. But we definitely have to be more 
careful with things like temporary remapping, peephole, etc.


Also, at least OUT variables could also be affected, right?

Cheers,
Nicolai



   -ilia




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI 16-bit support

2017-08-23 Thread Ilia Mirkin

On Wed, Aug 23, 2017 at 10:48 AM, Nicolai Hähnle  wrote:
> On 23.08.2017 16:36, Ilia Mirkin wrote:
>>
>> On Wed, Aug 23, 2017 at 10:30 AM, Nicolai Hähnle 
>> wrote:
>>>
>>> On 23.08.2017 15:15, Nicolai Hähnle wrote:


 On 22.08.2017 19:32, Marek Olšák wrote:
>
>
> On Tue, Aug 22, 2017 at 7:28 PM, Ilia Mirkin 
> wrote:
>>
>>
>> How do you propose defining the semantics for e.g. loading a 16-bit
>> value from a constbuf/ssbo? Would those get separate instructions?
>
>
>
> st/mesa should use UP2H, PK2H and similar opcodes for I16 and U16, and
> drivers can replace them with MOV if HalfPrecision == 1.



 You mean, if HalfPrecision == 1 for subsequent operations?

 How *do* we implement this for LLVM, anyway? Downcast (fptrunc) from
 float
 to half whenever we're loading operands of a HalfPrecision == 1
 instruction,
 and then casting (fpext) back up before storing the result?

 LLVM instcombine seems quite capable of seeing through that in simple
 code, but I worry about control flow.
>>>
>>>
>>>
>>> Thinking about this some more, having the precision a property of the
>>> temporaries like Ilia suggested would probably help with emitting LLVM IR
>>> that behaves well across control flow, but complicate st_glsl_to_tgsi.
>>> Hard
>>> to say what the tradeoff is there.
>>
>>
>> Why would it complicate glsl_to_tgsi? At the GLSL level, it's not the
>> operations that have precision, but variables. And those variables map
>> to temp's... we'd have to create a separate pool of high- vs
>> low-precision temps, but that's about it.
>
>
> Well, it may not be so bad in the end. But we definitely have to be more
> careful with things like temporary remapping, peephole, etc.
>
> Also, at least OUT variables could also be affected, right?

And IN as well. Note that some hardware has support for fp16 varyings
and fp16 color outputs (the GLES end of things).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] mesa: only check errors when the state change in glDepthBoundsEXT()

2017-08-23 Thread Ilia Mirkin

This is a functional change, e.g. what if

glDepthBoundsEXT(2, 1)

is called? Either way, I suspect it's fine, but just pointing it out
in case it wasn't considered.

On Wed, Aug 23, 2017 at 10:43 AM, Samuel Pitoiset
 wrote:
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/mesa/main/depth.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c
> index 930f5e816f..ddd91481cd 100644
> --- a/src/mesa/main/depth.c
> +++ b/src/mesa/main/depth.c
> @@ -146,17 +146,17 @@ _mesa_DepthBoundsEXT( GLclampd zmin, GLclampd zmax )
> if (MESA_VERBOSE & VERBOSE_API)
>_mesa_debug(ctx, "glDepthBounds(%f, %f)\n", zmin, zmax);
>
> -   if (zmin > zmax) {
> -  _mesa_error(ctx, GL_INVALID_VALUE, "glDepthBoundsEXT(zmin > zmax)");
> -  return;
> -   }
> -
> zmin = CLAMP(zmin, 0.0, 1.0);
> zmax = CLAMP(zmax, 0.0, 1.0);
>
> if (ctx->Depth.BoundsMin == zmin && ctx->Depth.BoundsMax == zmax)
>return;
>
> +   if (zmin > zmax) {
> +  _mesa_error(ctx, GL_INVALID_VALUE, "glDepthBoundsEXT(zmin > zmax)");
> +  return;
> +   }
> +
> FLUSH_VERTICES(ctx, ctx->DriverFlags.NewDepth ? 0 : _NEW_DEPTH);
> ctx->NewDriverState |= ctx->DriverFlags.NewDepth;
> ctx->Depth.BoundsMin = (GLfloat) zmin;
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] gallium: add CONSTBUF type to tgsi_file_type

2017-08-23 Thread Nicolai Hähnle


On 23.08.2017 16:03, Roland Scheidegger wrote:

Am 23.08.2017 um 15:49 schrieb Ilia Mirkin:

On Wed, Aug 23, 2017 at 9:20 AM, Nicolai Hähnle  wrote:

On 22.08.2017 16:56, Ilia Mirkin wrote:


On Tue, Aug 22, 2017 at 10:51 AM, Roland Scheidegger 
wrote:


I am probably missing something here, but why do you need a new register
file? Since you couldn't use LOAD with TGSI_FILE_CONSTANT before, can't
you just allow LOAD with TGSI_FILE_CONSTANT and achieve the same thing?
Or do you need to know how it's going to be accessed in advance?



With bindless, LOAD can take a CONST I believe [which contains the
value of the bindless id]. I think it's nice to keep those concepts
separate... having CONST sometimes mean the value and other times mean
the address is a bit weird. This way CONSTBUF[0] is the address of the
0th constbuf.



I'm still not quite convinced. The levels of indirection should clarify the
meaning, shouldn't they?

You get

   LOAD dst, CONST[0][0], IMM[0]

when loading from offset IMM[0] of a bindless buffer whose handle is at the
beginning of the buffer CONST[0].

You get

   LOAD dst, CONST[0], IMM[0]

when loading from offset IMM[0] of non-bindless buffer 0.

Is there ever really a situation where the two could be confused?


I always considered CONST[0] == CONST[0][0]. Technically they're not,
since once has the second dimension in the TGSI encoding while the
other doesn't. But practically,

MOV TEMP[0], CONST[0]

and

MOV TEMP[0], CONST[0][0]

are in every way identical. Currently st/mesa will just use CONST[0]
everywhere, never adding the 2nd dimension.

Maybe it would be worth the effort to fix this?


Would be nice. One thing that makes this a bit awkward is that older 
drivers just don't support two-dimensional CONST at all -- see 
PIPE_SHADER_CAP_MAX_CONST_BUFFERS. Giving them a shader that loads 
CONST[0][n] is going to fail.


Basically, changing this is a backward-compatible change to state 
trackers, which would have to promise not to produce one-dimensional 
CONST for the usual, vec4-based constant fetching.


On the other hand, maybe we're over-complicating this. The only 
instruction that is really affected is LOAD. And for LOAD, there 
shouldn't be a compatibility problem. Hmm...


Cheers,
Nicolai



Roland


  As such, I don't think we

should start having behavioural differences for those on some
instructions.






--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v6.1] egl: Allow creation of per surface out fence

2017-08-23 Thread Marathe, Yogesh

> -Original Message-
> From: Tomasz Figa [mailto:tf...@chromium.org]
> Sent: Wednesday, August 23, 2017 8:17 PM
> To: Marathe, Yogesh 
> Cc: ML mesa-dev ; Emil Velikov
> ; Gao, Shuo ; Liu, Zhiquan
> ; Daniel Stone ; Nicolai
> Hähnle ; Antognolli, Rafael
> ; Eric Engestrom ; Kenneth
> Graunke ; Rainer Hochecker
> ; Kondapally, Kalyan ;
> Timothy Arceri ; Varad Gautam
> ; Wu, Zhongmin 
> Subject: Re: [PATCH v6.1] egl: Allow creation of per surface out fence
> 
> Hi Yogesh,
> 
> Sorry for being late with review. Please see some comments inline.
> 

No problem.

> On Fri, Aug 18, 2017 at 7:08 PM,   wrote:
> > From: Zhongmin Wu 
> >
> > Add plumbing to allow creation of per display surface out fence.
> >
> > Currently enabled only on android, since the system expects a valid fd
> > in ANativeWindow::{queue,cancel}Buffer. We pass a fd of -1 with which
> > native applications such as flatland fail. The patch enables explicit
> > sync on android and fixes one of the functional issue for apps or
> > buffer consumers which depend upon fence and its timestamp.
> >
> > v2: a) Also implement the fence in cancelBuffer.
> > b) The last sync fence is stored in drawable object
> >rather than brw context.
> > c) format clear.
> >
> > v3: a) Save the last fence fd in DRI Context object.
> > b) Return the last fence if the batch buffer is empty and
> >nothing to be flushed when _intel_batchbuffer_flush_fence
> > c) Add the new interface in vbtl to set the retrieve fence
> >
> > v3.1 a) close fd in the new vbtl interface on none Android platform
> >
> > v4: a) The last fence is saved in brw context.
> > b) The retrieve fd is for all the platform but not just Android
> > c) Add a uniform dri2 interface to initialize the surface.
> >
> > v4.1: a) make some changes of variable name.
> >   b) the patch is broken into two patches.
> >
> > v4.2: a) Add a deinit interface for surface to clear the out fence
> >
> > v5: a) Add enable_out_fence to init, platform sets it true or
> >false
> > b) Change get fd to update fd and check for fence
> > c) Commit description updated
> >
> > v6: a) Heading and commit description updated
> > b) enable_out_fence is set only if fence is supported
> > c) Review comments on function names
> > d) Test with standalone patch, resolves the bug
> >
> > v6.1 a) Check for old display fence reverted back
> >
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101655
> >
> > Signed-off-by: Zhongmin Wu 
> > Signed-off-by: Yogesh Marathe 
> > ---
> >  src/egl/drivers/dri2/egl_dri2.c | 69 
> > +
> >  src/egl/drivers/dri2/egl_dri2.h |  9 
> >  src/egl/drivers/dri2/platform_android.c | 29 ++--
> >  src/egl/drivers/dri2/platform_drm.c |  3 +-
> >  src/egl/drivers/dri2/platform_surfaceless.c |  3 +-
> >  src/egl/drivers/dri2/platform_wayland.c |  3 +-
> >  src/egl/drivers/dri2/platform_x11.c |  3 +-
> >  src/egl/drivers/dri2/platform_x11_dri3.c|  3 +-
> >  8 files changed, 104 insertions(+), 18 deletions(-)
> >
> > diff --git a/src/egl/drivers/dri2/egl_dri2.c
> > b/src/egl/drivers/dri2/egl_dri2.c index ed79e0d..04d0332 100644
> > --- a/src/egl/drivers/dri2/egl_dri2.c
> > +++ b/src/egl/drivers/dri2/egl_dri2.c
> > @@ -1354,6 +1354,44 @@ dri2_destroy_context(_EGLDriver *drv,
> _EGLDisplay *disp, _EGLContext *ctx)
> > return EGL_TRUE;
> >  }
> >
> > +EGLBoolean
> > +dri2_init_surface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
> > +_EGLConfig *conf, const EGLint *attrib_list, EGLBoolean
> > +enable_out_fence) {
> > +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> > +   struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
> > +
> > +   dri2_surf->out_fence_fd = -1;
> > +   if (dri2_dpy->fence && dri2_dpy->fence->base.version >= 2 &&
> > +   dri2_dpy->fence->get_capabilities &&
> > +   (dri2_dpy->fence->get_capabilities(dri2_dpy->dri_screen) &
> > +__DRI_FENCE_CAP_NATIVE_FD)) {
> > +  dri2_surf->enable_out_fence = enable_out_fence;
> > +   }
> 
> nit: It might not change anything in practice, but it would be more logical 
> if the
> code always initialized enable_out_fence to some value.
> So maybe let's add dri2_surf->enable_out_fence = 0; above the if.
> 

Ok.

> > +
> > +   return _eglInitSurface(surf, dpy, type, conf, attrib_list); }
> > +
> > +static void
> > +dri2_surface_set_out_fence_fd( _EGLSurface *surf, int fence_fd) {
> > +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> > +
> > +   if (dri2_surf->out_fence_fd >=0)
> > +  close(dri2_surf->out_fence_fd);
> > +
> > +   dri2_surf->out_fence_fd = fence_fd; }
> > +
> > +void
> > +dri2_deinit_surface(_EGLSurface *surf) {
> > +   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
> > +
> > +   dri2_surface_set_out_fence_fd(surf, -1);
> > +   dri2_surf->enable_out_fence = false; }
> > +
> >  static EGLB

1 2 3 >

1 - 100 of 202 matches

Mail list logo