[Mesa-dev] [RFC v2] nir: Add a uniformity analysis pass

2017-03-03 Thread Jason Ekstrand
v2: Start with everything uniform and mark non-uniformity.  This is
required in order to properly handle loops.

Cc: Kenneth Graunke 
Cc: Connor Abbott 
---
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/nir.c|   3 +
 src/compiler/nir/nir.h|  18 +++
 src/compiler/nir/nir_analyze_uniformity.c | 188 ++
 4 files changed, 210 insertions(+)
 create mode 100644 src/compiler/nir/nir_analyze_uniformity.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 2455d4e..407197c 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -186,6 +186,7 @@ NIR_GENERATED_FILES = \
 NIR_FILES = \
nir/nir.c \
nir/nir.h \
+   nir/nir_analyze_uniformity.c \
nir/nir_array.h \
nir/nir_builder.h \
nir/nir_clone.c \
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index a9fac96..fd64ccd 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -357,6 +357,8 @@ nir_block_create(nir_shader *shader)
 
exec_list_make_empty(>instr_list);
 
+   block->uniform = false;
+
return block;
 }
 
@@ -1518,6 +1520,7 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
list_inithead(>if_uses);
def->num_components = num_components;
def->bit_size = bit_size;
+   def->uniform = false;
 
if (instr->block) {
   nir_function_impl *impl =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index dd1e407..4da318d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -462,6 +462,14 @@ typedef struct nir_ssa_def {
 
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
uint8_t bit_size;
+
+   /** True if this SSA def has the same value for all invocations
+*
+* An SSA def can be uniform even if it is defined in non-uniform
+* control-flow.  In this case, it has the same value for all invocations
+* which reach its definition.
+*/
+   bool uniform;
 } nir_ssa_def;
 
 struct nir_src;
@@ -1480,6 +1488,14 @@ typedef struct nir_block {
/** generic block index; generated by nir_index_blocks */
unsigned index;
 
+   /** True if this block is only executed uniformly
+*
+* A block is said to execute uniformly if, whenever one invocation enters
+* the block, all invocations enter the block and have all taken the same
+* path from the start block to this block.
+*/
+   bool uniform;
+
/*
 * Each block can only have up to 2 successors, so we put them in a simple
 * array - no need for anything more complicated.
@@ -2567,6 +2583,8 @@ bool nir_normalize_cubemap_coords(nir_shader *shader);
 
 void nir_live_ssa_defs_impl(nir_function_impl *impl);
 
+void nir_analyze_uniformity(nir_shader *shader);
+
 void nir_loop_analyze_impl(nir_function_impl *impl,
nir_variable_mode indirect_mask);
 
diff --git a/src/compiler/nir/nir_analyze_uniformity.c 
b/src/compiler/nir/nir_analyze_uniformity.c
new file mode 100644
index 000..42693f3
--- /dev/null
+++ b/src/compiler/nir/nir_analyze_uniformity.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * Uniform analysis
+ */
+
+static bool
+src_is_uniform(nir_src *src, void *state)
+{
+   return src->is_ssa && src->ssa->uniform;
+}
+
+static bool
+mark_def_non_uniform(nir_ssa_def *def, void *state)
+{
+   bool *progress = state;
+
+   if (def->uniform) {
+  *progress = true;
+  def->uniform = false;
+   }
+
+   return true;
+}
+
+static void
+mark_block_non_uniform(nir_block *block, bool *progress)
+{
+   if (block->uniform) {
+  *progress = true;
+  block->uniform = false;
+   }
+}
+
+static bool
+block_all_predecessors_uniform(nir_block *block)
+{
+   

Re: [Mesa-dev] [PATCH] ralloc: don't leave out the alignment factor

2017-03-03 Thread Mike Lothian
I've spent the whole night tracking down this bug

I thought it was an issue with a recent upgrade of binutils

Please add my:

Tested by: Mike Lothian 

On Sat, 4 Mar 2017 at 00:49 Grazvydas Ignotas  wrote:

> Experimentation shows that without alignment factor gcc and clang choose
> a factor of 16 even on IA-32, which doesn't match what malloc() uses (8).
> The problem is it makes gcc assume the pointer is 16 byte aligned, so
> with -O3 it starts using aligned SSE instructions that later fault,
> so always specify a suitable alignment factor.
>
> Cc: Jonas Pfeil 
> Fixes: cd2b55e5 "ralloc: Make sure ralloc() allocations match malloc()'s
> alignment."
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100049
> Signed-off-by: Grazvydas Ignotas 
> ---
> no commit access
>
>  src/util/ralloc.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/util/ralloc.c b/src/util/ralloc.c
> index 03283de..7bf192e 100644
> --- a/src/util/ralloc.c
> +++ b/src/util/ralloc.c
> @@ -59,8 +59,10 @@ _CRTIMP int _vscprintf(const char *format, va_list
> argptr);
>  struct
>  #ifdef _MSC_VER
>   __declspec(align(8))
> +#elif defined(__LP64__)
> + __attribute__((aligned(16)))
>  #else
> - __attribute__((aligned))
> + __attribute__((aligned(8)))
>  #endif
> ralloc_header
>  {
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC] nir: Add a uniformity analysis pass

2017-03-03 Thread Jason Ekstrand
Cc: Kenneth Graunke 
Cc: Connor Abbott 
---
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/nir.c|   3 +
 src/compiler/nir/nir.h|  18 +++
 src/compiler/nir/nir_analyze_uniformity.c | 211 ++
 4 files changed, 233 insertions(+)
 create mode 100644 src/compiler/nir/nir_analyze_uniformity.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 2455d4e..407197c 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -186,6 +186,7 @@ NIR_GENERATED_FILES = \
 NIR_FILES = \
nir/nir.c \
nir/nir.h \
+   nir/nir_analyze_uniformity.c \
nir/nir_array.h \
nir/nir_builder.h \
nir/nir_clone.c \
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index a9fac96..fd64ccd 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -357,6 +357,8 @@ nir_block_create(nir_shader *shader)
 
exec_list_make_empty(>instr_list);
 
+   block->uniform = false;
+
return block;
 }
 
@@ -1518,6 +1520,7 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
list_inithead(>if_uses);
def->num_components = num_components;
def->bit_size = bit_size;
+   def->uniform = false;
 
if (instr->block) {
   nir_function_impl *impl =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index dd1e407..4da318d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -462,6 +462,14 @@ typedef struct nir_ssa_def {
 
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
uint8_t bit_size;
+
+   /** True if this SSA def has the same value for all invocations
+*
+* An SSA def can be uniform even if it is defined in non-uniform
+* control-flow.  In this case, it has the same value for all invocations
+* which reach its definition.
+*/
+   bool uniform;
 } nir_ssa_def;
 
 struct nir_src;
@@ -1480,6 +1488,14 @@ typedef struct nir_block {
/** generic block index; generated by nir_index_blocks */
unsigned index;
 
+   /** True if this block is only executed uniformly
+*
+* A block is said to execute uniformly if, whenever one invocation enters
+* the block, all invocations enter the block and have all taken the same
+* path from the start block to this block.
+*/
+   bool uniform;
+
/*
 * Each block can only have up to 2 successors, so we put them in a simple
 * array - no need for anything more complicated.
@@ -2567,6 +2583,8 @@ bool nir_normalize_cubemap_coords(nir_shader *shader);
 
 void nir_live_ssa_defs_impl(nir_function_impl *impl);
 
+void nir_analyze_uniformity(nir_shader *shader);
+
 void nir_loop_analyze_impl(nir_function_impl *impl,
nir_variable_mode indirect_mask);
 
diff --git a/src/compiler/nir/nir_analyze_uniformity.c 
b/src/compiler/nir/nir_analyze_uniformity.c
new file mode 100644
index 000..e6e1f93
--- /dev/null
+++ b/src/compiler/nir/nir_analyze_uniformity.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * Uniform analysis
+ */
+
+static bool
+src_is_uniform(nir_src *src, void *state)
+{
+   return src->is_ssa && src->ssa->uniform;
+}
+
+static bool
+mark_def_uniform(nir_ssa_def *def, void *state)
+{
+   bool *progress = state;
+
+   if (!def->uniform) {
+  *progress = true;
+  def->uniform = true;
+   }
+
+   return true;
+}
+
+static void
+analyze_uniformity_block(nir_block *block, bool *progress)
+{
+   nir_foreach_instr(instr, block) {
+  switch (instr->type) {
+  case nir_instr_type_load_const:
+ /* load_const is always uniform */
+ mark_def_uniform(_instr_as_load_const(instr)->def, progress);
+ break;
+
+  case nir_instr_type_phi: {
+

[Mesa-dev] [PATCH 7/8] nir: Make image_size a variable-width intrinsic

2017-03-03 Thread Jason Ekstrand
---
 src/compiler/glsl/glsl_to_nir.cpp |  7 ---
 src/compiler/nir/nir_intrinsics.h |  2 +-
 src/compiler/spirv/spirv_to_nir.c | 18 +++---
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index fc2a2c4..3a71afd 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -865,10 +865,11 @@ nir_visitor::visit(ir_call *ir)
 
  /* Set the intrinsic destination. */
  if (ir->return_deref) {
-const nir_intrinsic_info *info =
-_intrinsic_infos[instr->intrinsic];
+unsigned num_components = ir->return_deref->type->vector_elements;
+if (instr->intrinsic == nir_intrinsic_image_size)
+   instr->num_components = num_components;
 nir_ssa_dest_init(>instr, >dest,
-  info->dest_components, 32, NULL);
+  num_components, 32, NULL);
  }
 
  if (op == nir_intrinsic_image_size ||
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 5c8f283..105c56f 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -186,7 +186,7 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 
xx, xx, xx, 0)
 INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
 INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
 INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, 
xx, 0)
-INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx,
+INTRINSIC(image_size, 0, ARR(0), true, 0, 1, 0, xx, xx, xx,
   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx,
   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index fa28576..356f19d 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1997,17 +1997,21 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
if (opcode != SpvOpImageWrite) {
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
-  nir_ssa_dest_init(>instr, >dest, 4, 32, NULL);
+
+  unsigned dest_components =
+ nir_intrinsic_infos[intrin->intrinsic].dest_components;
+  if (intrin->intrinsic == nir_intrinsic_image_size) {
+ dest_components = intrin->num_components =
+glsl_get_vector_elements(type->type);
+  }
+
+  nir_ssa_dest_init(>instr, >dest,
+dest_components, 32, NULL);
 
   nir_builder_instr_insert(>nb, >instr);
 
-  /* The image intrinsics always return 4 channels but we may not want
-   * that many.  Emit a mov to trim it down.
-   */
-  unsigned swiz[4] = {0, 1, 2, 3};
   val->ssa = vtn_create_ssa_value(b, type->type);
-  val->ssa->def = nir_swizzle(>nb, >dest.ssa, swiz,
-  glsl_get_vector_elements(type->type), false);
+  val->ssa->def = >dest.ssa;
} else {
   nir_builder_instr_insert(>nb, >instr);
}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/8] nir: Require numbers of components to always match

2017-03-03 Thread Jason Ekstrand
+correct connor

On Fri, Mar 3, 2017 at 5:12 PM, Jason Ekstrand  wrote:

> When NIR was first created, we were a bit lazy about numbers of components.
> The rule was that a source couldn't consume more components than the thing
> it was reading from.  However, this leads to a lot of confusion because you
> now have a thing sourcing from a vec4 but only reading two of the
> components.
>
> The solution to this is to disallow that case and require that the number
> of components always match.  The one exception is ALU instructions because
> they're designed to naturally swizzle things around like mad.  We already
> require this restriction for phi instructions.  This series adds it for
> intrinsics, texture instructions, and deref indirects.
>
> Cc: Kenneth Graunke 
> Cc: Connor Abbott 
>
> Jason Ekstrand (8):
>   nir/intrinsics: Make load_barycentric_input take a 2-component coor
>   nir/copy_prop: Respect the source's number of components
>   nir/spirv: Restrict the number of channels in texture coordinates
>   nir/lower_tex: Use tex_instr_dest_size for txs destinations
>   anv/apply_dynamic_offsets: Only use one channel for computed offsets
>   i965/fs: Use num_components from the SSA def in image intrinsics
>   nir: Make image_size a variable-width intrinsic
>   nir/validate: Validate that bit sizes and components always match
>
>  src/compiler/glsl/glsl_to_nir.cpp|   7 +-
>  src/compiler/nir/nir_intrinsics.h|   6 +-
>  src/compiler/nir/nir_lower_tex.c |   3 +-
>  src/compiler/nir/nir_opt_copy_propagate.c| 129
> +--
>  src/compiler/nir/nir_validate.c  | 101 +++---
>  src/compiler/spirv/spirv_to_nir.c|  21 ++--
>  src/intel/vulkan/anv_nir_apply_dynamic_offsets.c |   3 +-
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp |   3 +-
>  8 files changed, 185 insertions(+), 88 deletions(-)
>
> --
> 2.5.0.400.gff86faf
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/8] nir: Require numbers of components to always match

2017-03-03 Thread Jason Ekstrand
When NIR was first created, we were a bit lazy about numbers of components.
The rule was that a source couldn't consume more components than the thing
it was reading from.  However, this leads to a lot of confusion because you
now have a thing sourcing from a vec4 but only reading two of the
components.

The solution to this is to disallow that case and require that the number
of components always match.  The one exception is ALU instructions because
they're designed to naturally swizzle things around like mad.  We already
require this restriction for phi instructions.  This series adds it for
intrinsics, texture instructions, and deref indirects.

Cc: Kenneth Graunke 
Cc: Connor Abbott 

Jason Ekstrand (8):
  nir/intrinsics: Make load_barycentric_input take a 2-component coor
  nir/copy_prop: Respect the source's number of components
  nir/spirv: Restrict the number of channels in texture coordinates
  nir/lower_tex: Use tex_instr_dest_size for txs destinations
  anv/apply_dynamic_offsets: Only use one channel for computed offsets
  i965/fs: Use num_components from the SSA def in image intrinsics
  nir: Make image_size a variable-width intrinsic
  nir/validate: Validate that bit sizes and components always match

 src/compiler/glsl/glsl_to_nir.cpp|   7 +-
 src/compiler/nir/nir_intrinsics.h|   6 +-
 src/compiler/nir/nir_lower_tex.c |   3 +-
 src/compiler/nir/nir_opt_copy_propagate.c| 129 +--
 src/compiler/nir/nir_validate.c  | 101 +++---
 src/compiler/spirv/spirv_to_nir.c|  21 ++--
 src/intel/vulkan/anv_nir_apply_dynamic_offsets.c |   3 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp |   3 +-
 8 files changed, 185 insertions(+), 88 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] nir/intrinsics: Make load_barycentric_input take a 2-component coor

2017-03-03 Thread Jason Ekstrand
Cc: "17.0 13.0" 
---
 src/compiler/nir/nir_intrinsics.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index f45bfe2..5c8f283 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -401,7 +401,9 @@ LOAD(input, 1, 2, BASE, COMPONENT, xx, 
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINS
 /* src[] = { vertex, offset }. const_index[] = { base, component } */
 LOAD(per_vertex_input, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE 
| NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { barycoord, offset }. const_index[] = { base, component } */
-LOAD(interpolated_input, 2, 2, BASE, COMPONENT, xx, 
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
+  2, BASE, COMPONENT, xx,
+  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /* src[] = { buffer_index, offset }. No const_index */
 LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] i965/fs: Use num_components from the SSA def in image intrinsics

2017-03-03 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 3d5967a..fc85f0e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3760,8 +3760,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   type->sampler_array;
 
   /* Copy all the components. */
-  const nir_intrinsic_info *info = _intrinsic_infos[instr->intrinsic];
-  for (unsigned c = 0; c < info->dest_components; ++c) {
+  for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
  if ((int)c >= type->coordinate_components()) {
  bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
  brw_imm_d(1));
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] nir/spirv: Restrict the number of channels in texture coordinates

2017-03-03 Thread Jason Ekstrand
Some SPIR-V texturing instructions pack more than the texture coordinate
into the coordinate source.  We need to mask off the unused channels.
---
 src/compiler/spirv/spirv_to_nir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 1dd6651..fa28576 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1567,7 +1567,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
  coord_components++;
 
   coord = vtn_ssa_value(b, w[idx++])->def;
-  p->src = nir_src_for_ssa(coord);
+  p->src = nir_src_for_ssa(nir_channels(>nb, coord,
+(1 << coord_components) - 1));
   p->src_type = nir_tex_src_coord;
   p++;
   break;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/8] nir/copy_prop: Respect the source's number of components

2017-03-03 Thread Jason Ekstrand
Because we suddenly have to know how many components each source has,
this makes the pass a bit more complicated.  Fortunately, copy
propagation is the only pass that cares about the number of components
are read by any given source so it's fairly contained.

Shader-db results on Sky Lake:

   total instructions in shared programs: 13318947 -> 13320265 (0.01%)
   instructions in affected programs: 260633 -> 261951 (0.51%)
   helped: 324
   HURT: 1027

Looking through the hurt programs, about a dozen are hurt by 3
instructions and the rest are all hurt by 2 instructions.  From a
spot-check of the shaders, the story is always the same:  They get a
vec4 from somewhere (frequently an input) and use the first two or three
components as a texture coordinate.  Because of the vector component
mismatch, we have a mov or, more likely, a vecN sitting between the
texture instruction and the input.  This means that the back-end inserts
a bunch of MOVs and split_virtual_grfs() goes to town.  Because the
texture coordinate is also used by some other calculation, register
coalesce can't combine them back together and we end up with an extra 2
MOV instructions in our shader.
---
 src/compiler/nir/nir_opt_copy_propagate.c | 129 ++
 1 file changed, 96 insertions(+), 33 deletions(-)

diff --git a/src/compiler/nir/nir_opt_copy_propagate.c 
b/src/compiler/nir/nir_opt_copy_propagate.c
index c26e07f..c4001fa 100644
--- a/src/compiler/nir/nir_opt_copy_propagate.c
+++ b/src/compiler/nir/nir_opt_copy_propagate.c
@@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr)
 }
 
 static bool
-copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
+  unsigned num_components)
 {
if (!src->is_ssa) {
   if (src->reg.indirect)
- return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
+ return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1);
   return false;
}
 
@@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, 
nir_if *parent_if)
if (!is_swizzleless_move(alu_instr))
   return false;
 
-   /* Don't let copy propagation land us with a phi that has more
-* components in its source than it has in its destination.  That badly
-* messes up out-of-ssa.
-*/
-   if (parent_instr && parent_instr->type == nir_instr_type_phi) {
-  nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
-  assert(phi->dest.is_ssa);
-  if (phi->dest.ssa.num_components !=
-  alu_instr->src[0].src.ssa->num_components)
- return false;
-   }
+   if (alu_instr->src[0].src.ssa->num_components != num_components)
+  return false;
 
if (parent_instr) {
   nir_instr_rewrite_src(parent_instr, src,
@@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned 
index)
if (!src->src.is_ssa) {
   if (src->src.reg.indirect)
  return copy_prop_src(src->src.reg.indirect, _alu_instr->instr,
-  NULL);
+  NULL, 1);
   return false;
}
 
@@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, 
unsigned index)
return true;
 }
 
-typedef struct {
-   nir_instr *parent_instr;
-   bool progress;
-} copy_prop_state;
+static bool
+copy_prop_dest(nir_dest *dest, nir_instr *instr)
+{
+   if (!dest->is_ssa && dest->reg.indirect)
+  return copy_prop_src(dest->reg.indirect, instr, NULL, 1);
+
+   return false;
+}
 
 static bool
-copy_prop_src_cb(nir_src *src, void *_state)
+copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var)
 {
-   copy_prop_state *state = (copy_prop_state *) _state;
-   while (copy_prop_src(src, state->parent_instr, NULL))
-  state->progress = true;
+   if (!deref_var)
+  return false;
 
-   return true;
+   bool progress = false;
+   for (nir_deref *deref = deref_var->deref.child;
+deref; deref = deref->child) {
+  if (deref->deref_type != nir_deref_type_array)
+ continue;
+
+  nir_deref_array *arr = nir_deref_as_array(deref);
+  if (arr->deref_array_type != nir_deref_array_type_indirect)
+ continue;
+
+  while (copy_prop_src(>indirect, instr, NULL, 1))
+ progress = true;
+   }
+   return progress;
 }
 
 static bool
 copy_prop_instr(nir_instr *instr)
 {
-   if (instr->type == nir_instr_type_alu) {
+   bool progress = false;
+   switch (instr->type) {
+   case nir_instr_type_alu: {
   nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
-  bool progress = false;
 
   for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
  while (copy_prop_alu_src(alu_instr, i))
 progress = true;
 
-  if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
- while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
+  while (copy_prop_dest(_instr->dest.dest, 

[Mesa-dev] [PATCH 8/8] nir/validate: Validate that bit sizes and components always match

2017-03-03 Thread Jason Ekstrand
---
 src/compiler/nir/nir_validate.c | 101 +---
 1 file changed, 63 insertions(+), 38 deletions(-)

diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 15ba65f..16efcb2 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -126,10 +126,12 @@ log_error(validate_state *state, const char *cond, const 
char *file, int line)
  log_error(state, #cond, __FILE__, __LINE__); \
} while (0)
 
-static void validate_src(nir_src *src, validate_state *state);
+static void validate_src(nir_src *src, validate_state *state,
+ unsigned bit_size, unsigned num_components);
 
 static void
-validate_reg_src(nir_src *src, validate_state *state)
+validate_reg_src(nir_src *src, validate_state *state,
+ unsigned bit_size, unsigned num_components)
 {
validate_assert(state, src->reg.reg != NULL);
 
@@ -151,6 +153,13 @@ validate_reg_src(nir_src *src, validate_state *state)
  "using a register declared in a different function");
}
 
+   if (!src->reg.reg->is_packed) {
+  if (bit_size)
+ validate_assert(state, src->reg.reg->bit_size == bit_size);
+  if (num_components)
+ validate_assert(state, src->reg.reg->num_components == 
num_components);
+   }
+
validate_assert(state, (src->reg.reg->num_array_elems == 0 ||
   src->reg.base_offset < src->reg.reg->num_array_elems) &&
   "definitely out-of-bounds array access");
@@ -160,12 +169,13 @@ validate_reg_src(nir_src *src, validate_state *state)
   validate_assert(state, (src->reg.indirect->is_ssa ||
   src->reg.indirect->reg.indirect == NULL) &&
  "only one level of indirection allowed");
-  validate_src(src->reg.indirect, state);
+  validate_src(src->reg.indirect, state, 32, 1);
}
 }
 
 static void
-validate_ssa_src(nir_src *src, validate_state *state)
+validate_ssa_src(nir_src *src, validate_state *state,
+ unsigned bit_size, unsigned num_components)
 {
validate_assert(state, src->ssa != NULL);
 
@@ -188,11 +198,17 @@ validate_ssa_src(nir_src *src, validate_state *state)
   _mesa_set_add(def_state->if_uses, src);
}
 
+   if (bit_size)
+  validate_assert(state, src->ssa->bit_size == bit_size);
+   if (num_components)
+  validate_assert(state, src->ssa->num_components == num_components);
+
/* TODO validate that the use is dominated by the definition */
 }
 
 static void
-validate_src(nir_src *src, validate_state *state)
+validate_src(nir_src *src, validate_state *state,
+ unsigned bit_size, unsigned num_components)
 {
if (state->instr)
   validate_assert(state, src->parent_instr == state->instr);
@@ -200,9 +216,9 @@ validate_src(nir_src *src, validate_state *state)
   validate_assert(state, src->parent_if == state->if_stmt);
 
if (src->is_ssa)
-  validate_ssa_src(src, state);
+  validate_ssa_src(src, state, bit_size, num_components);
else
-  validate_reg_src(src, state);
+  validate_reg_src(src, state, bit_size, num_components);
 }
 
 static void
@@ -247,11 +263,12 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, 
validate_state *state)
   }
}
 
-   validate_src(>src, state);
+   validate_src(>src, state, 0, 0);
 }
 
 static void
-validate_reg_dest(nir_reg_dest *dest, validate_state *state)
+validate_reg_dest(nir_reg_dest *dest, validate_state *state,
+  unsigned bit_size, unsigned num_components)
 {
validate_assert(state, dest->reg != NULL);
 
@@ -270,6 +287,13 @@ validate_reg_dest(nir_reg_dest *dest, validate_state 
*state)
  "writing to a register declared in a different function");
}
 
+   if (!dest->reg->is_packed) {
+  if (bit_size)
+ validate_assert(state, dest->reg->bit_size == bit_size);
+  if (num_components)
+ validate_assert(state, dest->reg->num_components == num_components);
+   }
+
validate_assert(state, (dest->reg->num_array_elems == 0 ||
   dest->base_offset < dest->reg->num_array_elems) &&
   "definitely out-of-bounds array access");
@@ -278,7 +302,7 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state)
   validate_assert(state, dest->reg->num_array_elems != 0);
   validate_assert(state, (dest->indirect->is_ssa || 
dest->indirect->reg.indirect == NULL) &&
  "only one level of indirection allowed");
-  validate_src(dest->indirect, state);
+  validate_src(dest->indirect, state, 32, 1);
}
 }
 
@@ -307,12 +331,18 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state)
 }
 
 static void
-validate_dest(nir_dest *dest, validate_state *state)
+validate_dest(nir_dest *dest, validate_state *state,
+  unsigned bit_size, unsigned num_components)
 {
-   if (dest->is_ssa)
+   if (dest->is_ssa) {
+  if (bit_size)
+ validate_assert(state, dest->ssa.bit_size == bit_size);
+  if 

[Mesa-dev] [PATCH 5/8] anv/apply_dynamic_offsets: Only use one channel for computed offsets

2017-03-03 Thread Jason Ekstrand
The offset source of the UBO and SSBO intrinsics is only one channel.
---
 src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c 
b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
index 80ef8ee..f0f33ec 100644
--- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
+++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
@@ -83,7 +83,8 @@ apply_dynamic_offsets_block(nir_block *block, nir_builder *b,
 
   nir_src *offset_src = nir_get_io_offset_src(intrin);
   nir_ssa_def *old_offset = nir_ssa_for_src(b, *offset_src, 1);
-  nir_ssa_def *new_offset = nir_iadd(b, old_offset, 
_load->dest.ssa);
+  nir_ssa_def *new_offset =
+ nir_iadd(b, old_offset, nir_channel(b, _load->dest.ssa, 0));
   nir_instr_rewrite_src(>instr, offset_src,
 nir_src_for_ssa(new_offset));
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] nir/lower_tex: Use tex_instr_dest_size for txs destinations

2017-03-03 Thread Jason Ekstrand
Using coord_components of the source texture is correct for everything
except cube maps where it's off by one.
---
 src/compiler/nir/nir_lower_tex.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 66e2317..213406a 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -172,7 +172,8 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
txs->src[0].src_type = nir_tex_src_lod;
 
-   nir_ssa_dest_init(>instr, >dest, tex->coord_components, 32, NULL);
+   nir_ssa_dest_init(>instr, >dest,
+ nir_tex_instr_dest_size(txs), 32, NULL);
nir_builder_instr_insert(b, >instr);
 
return nir_i2f(b, >dest.ssa);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/blorp: Only set a clear color for resolves if fast-cleared

2017-03-03 Thread Jason Ekstrand
Cc: "17.0" 
---
 src/intel/vulkan/anv_blorp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index d79c5e0..72a5980 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1489,7 +1489,8 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
struct blorp_surf surf;
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
 att_state->aux_usage, );
-   surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
+   if (att_state->fast_clear)
+  surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
 
/* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
 *
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 24/24] docs: mark GL_ARB_gpu_shader_fp64 and OpenGL 4.0 as supported by i965/gen7+

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> Signed-off-by: Samuel Iglesias Gonsálvez 

Acked-by: Francisco Jerez 

> ---
>  docs/features.txt | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 5905dba9b39..bb2bf884626 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -107,7 +107,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, 
> radeonsi, llvmpipe, soft
>GL_ARB_vertex_type_2_10_10_10_rev DONE (freedreno, swr)
>  
>  
> -GL 4.0, GLSL 4.00 --- all DONE: i965/hsw+, nvc0, r600, radeonsi
> +GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi
>  
>GL_ARB_draw_buffers_blend DONE (freedreno, 
> i965/gen6+, nv50, llvmpipe, softpipe, swr)
>GL_ARB_draw_indirect  DONE (i965/gen7+, 
> llvmpipe, softpipe, swr)
> @@ -124,7 +124,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/hsw+, nvc0, r600, 
> radeonsi
>- Enhanced per-sample shading DONE ()
>- Interpolation functions DONE ()
>- New overload resolution rules   DONE
> -  GL_ARB_gpu_shader_fp64DONE (i965/hsw+, 
> llvmpipe, softpipe)
> +  GL_ARB_gpu_shader_fp64DONE (i965/gen7+, 
> llvmpipe, softpipe)
>GL_ARB_sample_shading DONE (i965/gen6+, 
> nv50)
>GL_ARB_shader_subroutine  DONE (i965/gen6+, 
> nv50, llvmpipe, softpipe, swr)
>GL_ARB_tessellation_shaderDONE (i965/gen7+)
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 23/24] i965: enable OpenGL 4.0 to Ivybridge/Baytrail

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> Signed-off-by: Samuel Iglesias Gonsálvez 

Reviewed-by: Francisco Jerez 

> ---
>  src/mesa/drivers/dri/i965/intel_extensions.c | 2 ++
>  src/mesa/drivers/dri/i965/intel_screen.c | 6 --
>  2 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 9d4b109ac3f..05bc5b92187 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -138,6 +138,8 @@ intelInitExtensions(struct gl_context *ctx)
>ctx->Const.GLSLVersion = 450;
> else if (brw->is_haswell && can_do_pipelined_register_writes(brw->screen))
>ctx->Const.GLSLVersion = 450;
> +   else if (brw->gen >= 7 && can_do_pipelined_register_writes(brw->screen))
> +  ctx->Const.GLSLVersion = 400;
> else if (brw->gen >= 6)
>ctx->Const.GLSLVersion = 330;
> else
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 83b1f3ff556..1afa74a24d6 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1545,8 +1545,10 @@ set_max_gl_versions(struct intel_screen *screen)
>dri_screen->max_gl_es2_version = has_astc ? 32 : 31;
>break;
> case 7:
> -  dri_screen->max_gl_core_version = screen->devinfo.is_haswell &&
> - can_do_pipelined_register_writes(screen) ? 45 : 33;
> +  if (can_do_pipelined_register_writes(screen))
> + dri_screen->max_gl_core_version = screen->devinfo.is_haswell ? 45 : 
> 40;
> +  else
> + dri_screen->max_gl_core_version = 33;
>dri_screen->max_gl_compat_version = 30;
>dri_screen->max_gl_es1_version = 11;
>dri_screen->max_gl_es2_version = screen->devinfo.is_haswell ? 31 : 30;
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 22/24] i965: enable ARB_gpu_shader_fp64 for Ivybridge/Baytrail

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> Signed-off-by: Samuel Iglesias Gonsálvez 

Reviewed-by: Francisco Jerez 

> ---
>  src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index f1290bf7b49..9d4b109ac3f 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -204,6 +204,7 @@ intelInitExtensions(struct gl_context *ctx)
>ctx->Extensions.ARB_derivative_control = true;
>ctx->Extensions.ARB_framebuffer_no_attachments = true;
>ctx->Extensions.ARB_gpu_shader5 = true;
> +  ctx->Extensions.ARB_gpu_shader_fp64 = true;
>ctx->Extensions.ARB_shader_atomic_counters = true;
>ctx->Extensions.ARB_shader_atomic_counter_ops = true;
>ctx->Extensions.ARB_shader_clock = true;
> @@ -237,7 +238,6 @@ intelInitExtensions(struct gl_context *ctx)
> }
>  
> if (brw->gen >= 8 || brw->is_haswell) {
> -  ctx->Extensions.ARB_gpu_shader_fp64 = true;
>ctx->Extensions.ARB_shader_precision = true;
>ctx->Extensions.ARB_stencil_texturing = true;
>ctx->Extensions.ARB_texture_stencil8 = true;
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 21/24] i965: Use correct VertStride on align16 instructions.

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: Matt Turner 
>
> In commit c35fa7a, we changed the "width" of DF source registers to 2,
> which is conceptually fine. Unfortunately a VertStride of 2 is not
> allowed by align16 instructions on IVB/BYT, and the regular VertStride
> of 4 works fine in any case.
>
> See 
> generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test
> for example:
>
> cmp.ge.f0(8)g18<1>DFg1<0>.xyxyDF-g8<2>DF{ align16 1Q 
> };
> ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
> cmp.ge.f0(8)g19<1>DFg1<0>.xyxyDF-g9<2>DF{ align16 2N 
> };
> ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
>
> v2:
> - Add spec quote (Curro).
> - Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro)
>
> Reviewed-by: Samuel Iglesias Gonsálvez 
> ---
>  src/mesa/drivers/dri/i965/brw_eu_emit.c | 44 
> +
>  1 file changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 03aaa760163..d221405db4d 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -512,13 +512,25 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, 
> struct brw_reg reg)
>   brw_inst_set_src0_da16_swiz_w(devinfo, inst,
>  BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
>  
> -  /* This is an oddity of the fact we're using the same
> -   * descriptions for registers in align_16 as align_1:
> -   */
> -  if (reg.vstride == BRW_VERTICAL_STRIDE_8)
> + if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
> +/* This is an oddity of the fact we're using the same
> + * descriptions for registers in align_16 as align_1:
> + */
> +brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
> + } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
> +reg.type == BRW_REGISTER_TYPE_DF &&
> +reg.vstride == BRW_VERTICAL_STRIDE_2) {
> +/* From HSW PRM:

This workaround is IVB-specific so the HSW PRM quotation doesn't seem
particularly relevant.  With that fixed here and below patch is:

Reviewed-by: Francisco Jerez 

> + *
> + * "For Align16 access mode, only encodings of , 0010
> + *  and 0011 are allowed. Other codes are reserved."
> + *
> + * Presumably the DevSNB behavior applies to IVB as well.
> + */
>  brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
> -  else
> + } else {
>  brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
> + }
>}
> }
>  }
> @@ -594,13 +606,25 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, 
> struct brw_reg reg)
>   brw_inst_set_src1_da16_swiz_w(devinfo, inst,
>  BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
>  
> -  /* This is an oddity of the fact we're using the same
> -   * descriptions for registers in align_16 as align_1:
> -   */
> -  if (reg.vstride == BRW_VERTICAL_STRIDE_8)
> + if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
> +/* This is an oddity of the fact we're using the same
> + * descriptions for registers in align_16 as align_1:
> + */
> +brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
> + } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
> +reg.type == BRW_REGISTER_TYPE_DF &&
> +reg.vstride == BRW_VERTICAL_STRIDE_2) {
> +/* From HSW PRM:
> + *
> + * "For Align16 access mode, only encodings of , 0010
> + *  and 0011 are allowed. Other codes are reserved."
> + *
> + * Presumably the DevSNB behavior applies to IVB as well.
> + */
>  brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
> -  else
> + } else {
>  brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
> + }
>}
> }
>  }
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100049] "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes seg fault in 32bit build

2017-03-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100049

--- Comment #1 from Grazvydas Ignotas  ---
Patch sent:
https://patchwork.freedesktop.org/patch/142123/

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ralloc: don't leave out the alignment factor

2017-03-03 Thread Grazvydas Ignotas
Experimentation shows that without alignment factor gcc and clang choose
a factor of 16 even on IA-32, which doesn't match what malloc() uses (8).
The problem is it makes gcc assume the pointer is 16 byte aligned, so
with -O3 it starts using aligned SSE instructions that later fault,
so always specify a suitable alignment factor.

Cc: Jonas Pfeil 
Fixes: cd2b55e5 "ralloc: Make sure ralloc() allocations match malloc()'s 
alignment."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100049
Signed-off-by: Grazvydas Ignotas 
---
no commit access

 src/util/ralloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index 03283de..7bf192e 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -59,8 +59,10 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr);
 struct
 #ifdef _MSC_VER
  __declspec(align(8))
+#elif defined(__LP64__)
+ __attribute__((aligned(16)))
 #else
- __attribute__((aligned))
+ __attribute__((aligned(8)))
 #endif
ralloc_header
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 19/24] i965/vec4: fix SIMD-with lowering for CMP/MOV instructions with conditional modifiers

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: "Juan A. Suarez Romero" 
>
> When splitting a CMP/MOV instruction with NULL dest, DF sources, and
> conditional modifier; we can't use directly the flag registers, as they will
> have the wrong results in IVB/BYT after the scalarization.
>
> Rather, we need to store the result in a temporary register, and then use
> that register to set proper the flags values.
>
> If a MOV has a null destination register and a conditional modifier, it
> can be replaced with a CMP against zero with the same conditional
> modifier. By doing this replacement, we can do the SIMD lowering
> without any problem.
>
> v2:
> - Fix typo (Matt)
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> Signed-off-by: Juan A. Suarez Romero  ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 80 
> +++---
>  1 file changed, 74 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index adcde085305..819674e8cb9 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -2177,6 +2177,46 @@ vec4_visitor::lower_simd_width()
> * value of the instruction's dst.
> */
>bool needs_temp = dst_src_regions_overlap(inst);
> +
> +  /* When splitting instructions with conditional modifiers and NULL
> +   * dest we can't rely directly on the flags to store the result. 
> Rather,
> +   * we need first to enqueue the result in a temporary register, and 
> then
> +   * move those values into flags.
> +   */
> +  bool inst_df_dst_null =
> + inst->dst.is_null() && get_exec_type_size(inst) == 8 &&
> + inst->conditional_mod != BRW_CONDITIONAL_NONE;
> +
> +  if (inst_df_dst_null) {
> + /* If there are other DF instructions with NULL destination,
> +  * we need to verify if we can use the temporary register or
> +  * if we need an extra lowering step.
> +  */
> + assert(inst->opcode == BRW_OPCODE_MOV ||
> +inst->opcode == BRW_OPCODE_CMP);
> +
> + /* Replace MOV.XX with null destination with the equivalent CMP.XX
> +  * with null destination, so we can lower it as explained before.
> +  */
> + if (inst->opcode == BRW_OPCODE_MOV) {
> +vec4_instruction *cmp =
> +   new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst_null_df(),
> + inst->src[0],
> + setup_imm_df(0.0, block, inst));
> +cmp->conditional_mod = inst->conditional_mod;
> +cmp->exec_size = inst->exec_size;
> +cmp->group = inst->group;
> +cmp->size_written = inst->size_written;
> +inst->insert_before(block, cmp);
> +inst->remove(block);
> +inst = cmp;
> + }
> +  }
> +  dst_reg inst_dst;
> +  if (inst_df_dst_null)
> + inst_dst =
> +retype(dst_reg(VGRF, alloc.allocate(1)), BRW_REGISTER_TYPE_F);
> +
>for (unsigned n = 0; n < inst->exec_size / lowered_width; n++)  {
>   unsigned channel_offset = lowered_width * n;
>  
> @@ -2199,7 +2239,7 @@ vec4_visitor::lower_simd_width()
>   bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0);
>   /* Compute split dst region */
>   dst_reg dst;
> - if (needs_temp || d2f_pass) {
> + if (needs_temp || d2f_pass || inst_df_dst_null) {
>  unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
>  dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
>   inst->dst.type);
> @@ -2229,24 +2269,52 @@ vec4_visitor::lower_simd_width()
>  
>   inst->insert_before(block, linst);
>  
> + dst_reg d2f_dst;
> + if (inst_df_dst_null) {
> +unsigned num_regs = DIV_ROUND_UP(lowered_width, 
> type_sz(BRW_REGISTER_TYPE_F));
> +d2f_dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)), 
> BRW_REGISTER_TYPE_F);
> +vec4_instruction *d2f = new(mem_ctx) 
> vec4_instruction(VEC4_OPCODE_FROM_DOUBLE, d2f_dst, src_reg(dst));
> +d2f->group = channel_offset;
> +d2f->exec_size = lowered_width;
> +d2f->size_written = lowered_width * type_sz(d2f_dst.type);
> +d2f->predicate = inst->predicate;
> +inst->insert_before(block, d2f);
> + }
> +
>   /* If we used a temporary to store the result of the split
>* instruction, copy the result to the original destination
>*/
> - if (needs_temp || d2f_pass) {
> + if (needs_temp || d2f_pass || inst_df_dst_null) {
>  vec4_instruction *mov;
> -if (d2f_pass)
> +if (d2f_pass) {
> mov = 

Re: [Mesa-dev] [PATCH v3 20/24] i965/vec4: Fix exec size for MOVs SET_{HIGH, LOW}_32BIT.

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: Matt Turner 
>
> Otherwise for a pack_double_2x32_split opcode, we emit:
>
>vec1 64 ssa_135 = pack_double_2x32_split ssa_133, ssa_134
> mov(8)  g5<1>UD g5<4>.xUD   { align16 1Q 
> compacted };
> mov(8)  g7<2>UD g5<4,4,1>UD { align1 1Q };
> ERROR: When the destination spans two registers, the source must span 
> two registers
>(exceptions for scalar source and packed-word to packed-dword 
> expansion)
> mov(8)  g8<2>UD g5.4<4,4,1>UD   { align1 2N };
> ERROR: The offset from the two source registers must be the same
> mov(8)  g5<1>UD g6<4>.xUD   { align16 1Q 
> compacted };
> mov(8)  g7.1<2>UD   g5<4,4,1>UD { align1 1Q };
> ERROR: When the destination spans two registers, the source must span 
> two registers
>(exceptions for scalar source and packed-word to packed-dword 
> expansion)
> mov(8)  g8.1<2>UD   g5.4<4,4,1>UD   { align1 2N };
> ERROR: The offset from the two source registers must be the same
>
> The intention was to emit mov(4)s for the instructions that have ERROR
> annotations.
>
> See tests/spec/arb_gpu_shader_fp64/execution/vs-isinf-dvec.shader_test
> for example.
>
> Reviewed-by: Samuel Iglesias Gonsálvez 
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index b570792badd..f6034bc8b76 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -2025,6 +2025,7 @@ generate_code(struct brw_codegen *p,
>   assert(type_sz(dst.type) == 8);
>  
>   brw_set_default_access_mode(p, BRW_ALIGN_1);
> + brw_set_default_exec_size(p, BRW_EXECUTE_4);
>  

NAK, we're missing a bug elsewhere if the exec_size coming in from the
IR is not accurate.  You don't happen to be doubling the execution size
of this single-precision instruction, do you?

>   dst = retype(dst, BRW_REGISTER_TYPE_UD);
>   if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
> @@ -2037,6 +2038,7 @@ generate_code(struct brw_codegen *p,
>   src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
>   brw_MOV(p, dst, src[0]);
>  
> + brw_set_default_exec_size(p, BRW_EXECUTE_8);
>   brw_set_default_access_mode(p, BRW_ALIGN_16);
>   break;
>}
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 18/24] i965/vec4: adapt setup_imm_df() to allow inserting instructions before another one

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> Add a new setup_imm_df() that allows the insertion of the instructions
> before another one. This will be used in the lowering passes for DF
> instructions.
>
> v2:
> - Adapt emission of DIM instruction too.
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.h   |  2 ++
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 26 +-
>  2 files changed, 23 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 29b203af89e..01b928ef4a7 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -328,6 +328,8 @@ public:
>brw_reg_type single_type);
>  
> src_reg setup_imm_df(double v);
> +   src_reg setup_imm_df(double v, struct bblock_t *block,
> +vec4_instruction *inst);
>  
> vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
>  bool for_write,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index 2127415be7a..e99db2cef8e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -1213,6 +1213,12 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, 
> src_reg src,
>  src_reg
>  vec4_visitor::setup_imm_df(double v)
>  {
> +   return setup_imm_df(v, NULL, NULL);
> +}
> +
> +src_reg
> +vec4_visitor::setup_imm_df(double v, struct bblock_t *block, 
> brw::vec4_instruction *inst)
> +{
> assert(devinfo->gen >= 7);
>  
> if (devinfo->gen >= 8)
> @@ -1223,7 +1229,10 @@ vec4_visitor::setup_imm_df(double v)
>  */
> if (devinfo->is_haswell) {
>dst_reg dst = retype(dst_reg(VGRF, alloc.allocate(2)), 
> BRW_REGISTER_TYPE_DF);
> -  emit(DIM(dst, brw_imm_df(v)))->force_writemask_all = true;
> +  if (block)
> + emit_before(block, inst, DIM(dst, 
> brw_imm_df(v)))->force_writemask_all = true;
> +  else
> + emit(DIM(dst, brw_imm_df(v)))->force_writemask_all = true;
>return swizzle(src_reg(retype(dst, BRW_REGISTER_TYPE_DF)), 
> BRW_SWIZZLE_);
> }
>  
> @@ -1248,10 +1257,17 @@ vec4_visitor::setup_imm_df(double v)
> const dst_reg tmp =
>retype(dst_reg(VGRF, alloc.allocate(2)), BRW_REGISTER_TYPE_UD);
> for (int n = 0; n < 2; n++) {
> -  emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), brw_imm_ud(di.i1)))
> - ->force_writemask_all = true;
> -  emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), brw_imm_ud(di.i2)))
> - ->force_writemask_all = true;
> +  if (block) {
> + emit_before(block, inst, MOV(writemask(offset(tmp, 8, n), 
> WRITEMASK_X), brw_imm_ud(di.i1)))
> +->force_writemask_all = true;
> + emit_before(block, inst, MOV(writemask(offset(tmp, 8, n), 
> WRITEMASK_Y), brw_imm_ud(di.i2)))
> +->force_writemask_all = true;
> +  } else {
> + emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), 
> brw_imm_ud(di.i1)))
> +->force_writemask_all = true;
> + emit(MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), 
> brw_imm_ud(di.i2)))
> +->force_writemask_all = true;
> +  }

This would be substantially easier if you used the builder interface,
you wouldn't even need to add any additional arguments to setup_imm_df()
and the conditional block would be unnecessary.

> }
>  
> return swizzle(src_reg(retype(tmp, BRW_REGISTER_TYPE_DF)), 
> BRW_SWIZZLE_);
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 17/24] i965/vec4: consider subregister offset in live variables

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: "Juan A. Suarez Romero" 
>
> Take into account offset values less than a full register (32 bytes)
> when getting the var from register.
>
> This is required when dealing with an operation that writes half of the
> register (like one d2x in IVB/BYT, which uses exec_size == 4).
>
> - v2: take in account this offset < 32 in liveness analysis too (Curro)
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp | 12 
>  src/mesa/drivers/dri/i965/brw_vec4_live_variables.h   |  6 --
>  2 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> index 73f658cd8fa..dc1ad21038c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> @@ -78,7 +78,8 @@ vec4_live_variables::setup_def_use()
>   if (inst->src[i].file == VGRF) {
> for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 
> 16); j++) {
>for (int c = 0; c < 4; c++) {
> - const unsigned v = var_from_reg(alloc, inst->src[i], c, 
> j);
> + const unsigned v =
> +var_from_reg(alloc, inst->src[i], c, j);

Neither this nor the four subsequent hunks seem to be doing anything,
please drop them.

>   if (!BITSET_TEST(bd->def, v))
>  BITSET_SET(bd->use, v);
>}
> @@ -101,7 +102,8 @@ vec4_live_variables::setup_def_use()
>  for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); 
> i++) {
> for (int c = 0; c < 4; c++) {
>if (inst->dst.writemask & (1 << c)) {
> - const unsigned v = var_from_reg(alloc, inst->dst, c, i);
> + const unsigned v =
> +var_from_reg(alloc, inst->dst, c, i);
>   if (!BITSET_TEST(bd->use, v))
>  BITSET_SET(bd->def, v);
>}
> @@ -257,7 +259,8 @@ vec4_visitor::calculate_live_intervals()
>if (inst->src[i].file == VGRF) {
>  for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); 
> j++) {
> for (int c = 0; c < 4; c++) {
> -  const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
> +  const unsigned v =
> + var_from_reg(alloc, inst->src[i], c, j);
>start[v] = MIN2(start[v], ip);
>end[v] = ip;
> }
> @@ -269,7 +272,8 @@ vec4_visitor::calculate_live_intervals()
>   for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) 
> {
>  for (int c = 0; c < 4; c++) {
> if (inst->dst.writemask & (1 << c)) {
> -  const unsigned v = var_from_reg(alloc, inst->dst, c, i);
> +  const unsigned v =
> + var_from_reg(alloc, inst->dst, c, i);
>start[v] = MIN2(start[v], ip);
>end[v] = ip;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h 
> b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> index 8807c453743..b23df650c11 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
> @@ -89,7 +89,8 @@ var_from_reg(const simple_allocator , const src_reg 
> ,
> const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
> unsigned result =
>8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
> -  (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize;
> +  (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize +
> +  (reg.offset % REG_SIZE) / type_sz(reg.type);

Looks bogus to me, the result is expressed in dwords not in type_sz
units (because the live analysis pass has dword granularity).  Instead
of adding new terms to the expression you could just take the
'reg.offset / REG_SIZE' term out of the first parentheses and replace it
with 'reg.offset / 4'.

> /* Do not exceed the limit for this register */
> assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
> return result;
> @@ -103,7 +104,8 @@ var_from_reg(const simple_allocator , const dst_reg 
> ,
> const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
> unsigned result =
>8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
> -  (c + k / csize * 4) * csize + k % csize;
> +  (c + k / csize * 4) * csize + k % csize +
> +  (reg.offset % REG_SIZE) / type_sz(reg.type);

Same here.

> /* Do not exceed the limit for this register */
> assert(result < 8 * (alloc.offsets[reg.nr] + alloc.sizes[reg.nr]));
> return result;
> -- 
> 2.11.0
>
> 

Re: [Mesa-dev] [PATCH v3 16/24] i965/vec4: fix SIMD-width lowering for VEC4_OPCODE_FROM_DOUBLE in IVB/BYT

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: "Juan A. Suarez Romero" 
>
> When splitting VEC4_OPCODE_FROM_DOUBLE in Ivybridge/Baytrail, the second
> part should use a temporal register, and then move the values to the
> second half of the original destination, so we get all the results in the
> same register.
>
> v2:
> - Fix typos (Matt).
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp   | 17 +
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  1 +
>  2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 64b435f3ec4..adcde085305 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -2191,9 +2191,15 @@ vec4_visitor::lower_simd_width()
>   linst->group = channel_offset;
>   linst->size_written = size_written;
>  
> + /* When splitting VEC4_OPCODE_FROM_DOUBLE on Ivybridge, the second 
> part
> +  * should use in a temporal register. Later we will move the values
> +  * to the second half of the original destination, so we get all the
> +  * results in the same register. We use d2f_pass to detect this 
> case.
> +  */
> + bool d2f_pass = (inst->opcode == VEC4_OPCODE_FROM_DOUBLE && n > 0);
>   /* Compute split dst region */
>   dst_reg dst;
> - if (needs_temp) {
> + if (needs_temp || d2f_pass) {
>  unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
>  dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
>   inst->dst.type);
> @@ -2226,9 +2232,12 @@ vec4_visitor::lower_simd_width()
>   /* If we used a temporary to store the result of the split
>* instruction, copy the result to the original destination
>*/
> - if (needs_temp) {
> -vec4_instruction *mov =
> -   MOV(offset(inst->dst, lowered_width, n), src_reg(dst));
> + if (needs_temp || d2f_pass) {
> +vec4_instruction *mov;
> +if (d2f_pass)
> +   mov = MOV(horiz_offset(inst->dst, n * 
> type_sz(inst->dst.type)), src_reg(dst));

I have no idea how this could possibly work...  horiz_offset() expects a
number of scalar components, not bytes.  Anyway I have a hunch this is
trying to workaround the bug I pointed out in PATCH 15...

> +else
> +   mov = MOV(offset(inst->dst, lowered_width, n), src_reg(dst));
>  mov->exec_size = lowered_width;
>  mov->group = channel_offset;
>  mov->size_written = size_written;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index 7fa1afc9073..b570792badd 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1532,6 +1532,7 @@ generate_code(struct brw_codegen *p,
>   is_ivb_df);
>  
>assert(inst->group % 8 == 0 ||
> + (inst->exec_size == 4 && inst->group % 4 == 0) ||
>   inst->dst.type == BRW_REGISTER_TYPE_DF ||
>   inst->src[0].type == BRW_REGISTER_TYPE_DF ||
>   inst->src[1].type == BRW_REGISTER_TYPE_DF ||
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 15/24] i965/vec4: fix VEC4_OPCODE_FROM_DOUBLE for IVB/BYT

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: "Juan A. Suarez Romero" 
>
> In the generator we must generate slightly different code for
> Ivybridge/Baytrail, because of the way the stride works in
> this hardware.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index 7bb1ab1879c..7fa1afc9073 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1948,13 +1948,28 @@ generate_code(struct brw_codegen *p,
>  
>   brw_set_default_access_mode(p, BRW_ALIGN_1);
>  
> - dst.hstride = BRW_HORIZONTAL_STRIDE_2;

How would this know whether there was enough space allocated in the
destination to hold the destination value with a stride of two?  What if
the following SIMD vector in the same GRF allocation actually contained
useful single-precision data that is getting corrupted by the oversized
strided destination?  I think we should be doing this pre-regalloc so we
can allocate a temporary large enough to hold the strided value...

> + /* When converting from DF->F, we set destination's stride as 2 as 
> an
> +  * aligment requirement. But in IVB/BYT, each DF implicitly writes
> +  * two floats, being the first one the converted value. So we don't
> +  * need to explicitly set stride 2, but 1.
> +  */
> + if (devinfo->gen == 7 && !devinfo->is_haswell)
> +dst.hstride = BRW_HORIZONTAL_STRIDE_1;
> + else
> +dst.hstride = BRW_HORIZONTAL_STRIDE_2;
> +
>   dst.width = BRW_WIDTH_4;
>   src[0].vstride = BRW_VERTICAL_STRIDE_4;
>   src[0].width = BRW_WIDTH_4;
>   brw_MOV(p, dst, src[0]);
>  
spread(dst, desired-stride) so you don't mess up the original
destination brw_reg and have to fix it up again later.

>   struct brw_reg dst_as_src = dst;
> + /* As we have set horizontal stride 1 instead of 2 in IVB/BYT, we
> +  * need to fix it here to have the expected value.
> +  */
> + if (devinfo->gen == 7 && !devinfo->is_haswell)
> +dst_as_src.hstride = BRW_HORIZONTAL_STRIDE_2;
> +
>   dst.hstride = BRW_HORIZONTAL_STRIDE_1;
>   dst.width = BRW_WIDTH_8;
>   brw_MOV(p, dst, dst_as_src);
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 14/24] i965/vec4: keep original type when dealing with null registers

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> From: "Juan A. Suarez Romero" 
>
> Keep the original type when dealing with null registers. Specially

s/specially/especially/ here and below.

> because we do no want to introduce an implicit conversion between
> types that could affect the conditional flags.
>
> This affects specially when the original type is DF, and we are working
> on Ivybridge/Baytrail.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 7080c93e550..64b435f3ec4 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1983,6 +1983,7 @@ vec4_visitor::convert_to_hw_regs()
>   case BAD_FILE:
>  /* Probably unused. */
>  reg = brw_null_reg();
> +reg.type = src.type;

You could use retype() here and below.

>  break;
>  
>   case MRF:
> @@ -2033,6 +2034,7 @@ vec4_visitor::convert_to_hw_regs()
>  
>case BAD_FILE:
>   reg = brw_null_reg();
> + reg.type = dst.type;
>   break;
>  
>case IMM:
> @@ -2205,6 +2207,8 @@ vec4_visitor::lower_simd_width()
>   } else {
>  if (inst->dst.file != ARF)
> dst = horiz_offset(inst->dst, channel_offset);
> +else
> +   dst.type = inst->dst.type;

This shouldn't be necessary if you take into account my feedback to
PATCH 13.  With that fixed:

Reviewed-by: Francisco Jerez 

>   }
>   linst->dst = dst;
>  
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 13/24] i965/vec4: split DF instructions and later double its execsize in IVB/BYT

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> We need to split DF instructions in two on IVB/BYT as it needs an
> execsize 8 to process 4 DF values (one GRF in total).
>
> v2:
> - Rename helper and make it static inline function (Matt).
> - Fix indention and add braces (Matt).
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/mesa/drivers/dri/i965/brw_ir_vec4.h  | 14 ++
>  src/mesa/drivers/dri/i965/brw_vec4.cpp   |  7 ++-
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 +--
>  3 files changed, 33 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> index 57fc6be8f89..9d29c3fb944 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
> @@ -405,6 +405,20 @@ regs_read(const vec4_instruction *inst, unsigned i)
> reg_size);
>  }
>  
> +static inline unsigned
> +get_exec_type_size(const vec4_instruction *inst)
> +{
> +   unsigned exec_type_size = 0;
> +
> +   for (int i = 0; i < 3; i++) {
> +  if (inst->src[i].type != BAD_FILE) {
> + exec_type_size = MAX2(exec_type_size, type_sz(inst->src[i].type));
> +  }
> +   }
> +
> +   return exec_type_size;
> +}
> +
>  } /* namespace brw */
>  
>  #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 5e60eb657a7..7080c93e550 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -2092,6 +2092,10 @@ get_lowered_simd_width(const struct gen_device_info 
> *devinfo,
>if (inst->opcode == BRW_OPCODE_SEL && type_sz(inst->dst.type) == 8)
>   lowered_width = MIN2(lowered_width, 4);
>  

Maybe add a short comment here explaining why you need to do this?

> +  if (devinfo->gen == 7 && !devinfo->is_haswell &&
> +  (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8))
> + lowered_width = MIN2(lowered_width, 4);
> +
>/* HSW PRM, 3D Media GPGPU Engine, Region Alignment Rules for Direct
> * Register Addressing:
> *
> @@ -2199,7 +2203,8 @@ vec4_visitor::lower_simd_width()
> inst->insert_before(block, copy);
>  }
>   } else {
> -dst = horiz_offset(inst->dst, channel_offset);
> +if (inst->dst.file != ARF)
> +   dst = horiz_offset(inst->dst, channel_offset);

This doesn't look right, you need to give the same treatment to ARF
registers as to other registers.  If what you're trying to avoid here is
shifting the null register incorrectly, I suggest you fix horiz_offset()
to return the argument unchanged if it's the null register, because the
null register logically behaves like a scalar register (this is also
consistent with the way the FS back-end handles the same situation).

>   }
>   linst->dst = dst;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index 847a01bd43c..7bb1ab1879c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1522,14 +1522,25 @@ generate_code(struct brw_codegen *p,
>brw_set_default_saturate(p, inst->saturate);
>brw_set_default_mask_control(p, inst->force_writemask_all);
>brw_set_default_acc_write_control(p, inst->writes_accumulator);
> -  brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
>  
> -  assert(inst->group % inst->exec_size == 0);
> +  bool is_ivb_df = devinfo->gen == 7 &&
> + !devinfo->is_haswell &&
> + (get_exec_type_size(inst) == 8 ||
> +  inst->dst.type == BRW_REGISTER_TYPE_DF);
> +
> +  assert(inst->group % inst->exec_size == 0 ||
> + is_ivb_df);
> +
>assert(inst->group % 8 == 0 ||
>   inst->dst.type == BRW_REGISTER_TYPE_DF ||
>   inst->src[0].type == BRW_REGISTER_TYPE_DF ||
>   inst->src[1].type == BRW_REGISTER_TYPE_DF ||
>   inst->src[2].type == BRW_REGISTER_TYPE_DF);
> +
> +  if (is_ivb_df && inst->exec_size < 8)
> + inst->exec_size *= 2;
> +  brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
> +

Same comment here as for its FS counterpart...  Please let's not modify
the IR from the generator.

>if (!inst->force_writemask_all)
>   brw_set_default_group(p, inst->group);
>  
> -- 
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2] util/disk_cache: support caches for multiple architectures

2017-03-03 Thread Timothy Arceri



On 03/03/17 23:27, Grazvydas Ignotas wrote:

On Fri, Mar 3, 2017 at 5:27 AM, Timothy Arceri  wrote:

Previously we were deleting the entire cache if a user switched
between 32 and 64 bit applications.

V2: make the check more generic, it should now work with any
platform we are likely to support.
---
 src/util/disk_cache.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 3abdec4..92528a9 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -40,20 +40,30 @@
 #include "zlib.h"

 #include "util/crc32.h"
 #include "util/u_atomic.h"
 #include "util/mesa-sha1.h"
 #include "util/ralloc.h"
 #include "main/errors.h"

 #include "disk_cache.h"

+#if defined(__ILP32__)
+#if defined(__x86_64__) || defined(__arm__)
+#define CACHE_ARCH "ilp-32"
+#else
+#define CACHE_ARCH "32"
+#endif
+#else
+#define CACHE_ARCH "64"
+#endif


That reports "64" for me on gcc -m32, I think only clang sets
__ILP32__ for non-x32 32bit build.


Well that's annoying.


I'd still suggest using sizeof(void
*) directly in the code, perhaps within some "const char
*get_arch_bitness_string()" helper, that should be more reliable.


I'm tempted to just push this:
https://patchwork.freedesktop.org/patch/141891/

And worry about issues later if that's not good enough.



Gražvydas


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] util/disk_cache: support caches for multiple architectures

2017-03-03 Thread Timothy Arceri



On 04/03/17 04:05, Alan Swanson wrote:

On Fri, 2017-03-03 at 12:24 +1100, Timothy Arceri wrote:

On 03/03/17 11:53, Marek Olšák wrote:


OK.

I also wonder if 1GB isn't too conservative. Today’s games take up
a
lot of space. My installed games occupy 480 GB. I could certainly
spare 10 GB for a shader cache if it improves gaming experience.
For
example, my ccache size is set to 27 GB, because 1 or 5 or 10 GB
wasn't enough for my use case. I assume some gamers would have a
similar attitude.


Yeah I agree that 1GB is probably too small. This was set by Carl
before
we even knew how much data we needed to cache.

I'm happy to set it at 4GB which would be a possible 8GB total.

We may need to cap it at 4GB for some platforms anyway, or at least
figure out a work around for this:
https://bugs.freedesktop.org/show_bug.cgi?id=93089


I wouldn't say that 1G was too small currently as, for example, the
cache for shader heavy DeusEx:MD is ~50M compressed per your commit
message. There is the mythical quote of 640K being enough but how many
games and applications do you need cached at once?


Well it's not like we reserve the space, we would just not be imposing a 
small limit. The Dolphin emu is an example of an app that apparently 
creates a very large amount of shaders.




A more relevant issue would then be the random eviction rather than
using LRU eviction.


Happy to accept patches. The random evict code was written before my 
time on this.




However perhaps we could dynamically scale by checking statvfs and
quotactl to choose MAX[1G, MIN[10% user home filesystem, 10% user home
quota]]?


Again patches welcome :)




--
Alan.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 07/24] i965/fs: generalize the legalization d2x pass

2017-03-03 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> Add support to SEL instruction and add an assert to detect unsupported
> instructions than do d2x conversions.
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>
> Curro, this patch legalizes SEL instruction too. If other optimizations
> modify later any SEL's (or any other instruction's) destination type
> (hence, producing a non-lowered d2x conversion), we can call it again
> around the end of fs_visitor::optimize(). Possibly together with
> lower_simd_width() just in case it was added later.
>

This sounds rather scary...  How do you make sure that this doesn't lead
to an infinite legalization-optimization loop in which copy propagation
reverses the effect of lower_d2x making double conversions illegal
again?  If you do already, why do you need to run lower_d2x multiple
times?  Wouldn't it be sufficient to run it once near the end of
optimize(), and then re-run copy propagation and possibly DCE?

> For that reason there is the inst->dst.stride > 1 condition in the
> test. This detects if either we emitted a strided destination in
> purpose or it was as a result of a previous lower_d2x run, we don't
> want to lowered it.
>
The problem with this is that if you ended up with dst.stride > 1 due to
different fields of the same scalar quantity being defined by two
separate instructions (e.g. by using subscript(dst, ..., i)), you *need*
to apply the lowering pass regardless, because otherwise the second
instruction will corrupt the data written by the first instruction.

> However, as I have not hit that case yet, I prefer to wait for your
> opinion. What do you think?
>
>
>  src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp | 57 
> ++
>  1 file changed, 41 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp
> index a2db1154615..330f2552929 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_lower_d2x.cpp
> @@ -33,17 +33,9 @@ fs_visitor::lower_d2x()
> bool progress = false;
>  
> foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
> -  if (inst->opcode != BRW_OPCODE_MOV)
> - continue;
> -
> -  if (inst->dst.type != BRW_REGISTER_TYPE_F &&
> -  inst->dst.type != BRW_REGISTER_TYPE_D &&
> -  inst->dst.type != BRW_REGISTER_TYPE_UD)
> - continue;
> -
> -  if (inst->src[0].type != BRW_REGISTER_TYPE_DF &&
> -  inst->src[0].type != BRW_REGISTER_TYPE_UQ &&
> -  inst->src[0].type != BRW_REGISTER_TYPE_Q)
> +  if (get_exec_type_size(inst) != 8 ||
> +  type_sz(inst->dst.type) >= get_exec_type_size(inst) ||

Note that some type conversion restrictions apply even if the execution
type is single-precision, and even if the destination type size is not
less than the execution type, e.g. according to the hardware docs SEL
doesn't support F->UD or F->DF conversions which the condition above
would consider okay.

> +  inst->dst.stride > 1)
>   continue;
>  
>assert(inst->dst.file == VGRF);
> @@ -61,13 +53,46 @@ fs_visitor::lower_d2x()
> * So we need to allocate a temporary that's two registers, and then do
> * a strided MOV to get the lower DWord of every Qword that has the
> * result.
> +   *
> +   * This pass legalizes all the DF conversions to narrower types.
> */
> -  fs_reg temp = ibld.vgrf(inst->src[0].type, 1);
> -  fs_reg strided_temp = subscript(temp, inst->dst.type, 0);
> -  ibld.MOV(strided_temp, inst->src[0]);
> -  ibld.MOV(dst, strided_temp);
> +  switch (inst->opcode) {

I suggest you refactor this into a helper function 'bool
supports_type_conversion(inst, dst_type, exec_type)' that returns false
for SEL and likely other things.  It might be a useful thing to have in
other places, e.g. for late optimization passes like copy propagation
where we need to make sure that no additional illegal conversions are
introduced.  If the value returned is false you'd do what you have below
for the SEL instruction, if it's true you'd do nothing unless the
instruction is double-precision and the destination type is smaller than
the execution type, in which case you'd do what you have below for
MOV/MOV_INDIRECT.

> +  case SHADER_OPCODE_MOV_INDIRECT:
> +  case BRW_OPCODE_MOV: {
> + fs_reg temp = ibld.vgrf(inst->src[0].type, 1);
> + fs_reg strided_temp = subscript(temp, inst->dst.type, 0);
> + /* We clone the original instruction as we are going to modify it
> +  * and emit another one after it.
> +  */
> + fs_inst *strided_inst = new(ibld.shader->mem_ctx) fs_inst(*inst);

Why don't you just modify the original instruction instead of cloning
it, modifying the clone, and then removing the original?

> + strided_inst->dst = strided_temp;
> + /* As it 

Re: [Mesa-dev] [PATCH RFC] clover: clone pipe-resource if root-buffer already exists

2017-03-03 Thread Francisco Jerez
Jan Vesely  writes:

> Fixes cl-api-enqueue-read_write-buffer on carrizo+topaz machine
>
> Signed-off-by: Jan Vesely 
> ---
> I'm not sure if cloning is the right thigh to do.
> Should we copy the pointer and increase ref count instead?
>
> PS: the assert did not trigger despite building mesa with --enable-debug
>
> Jan
>
>  src/gallium/state_trackers/clover/core/resource.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/clover/core/resource.cpp 
> b/src/gallium/state_trackers/clover/core/resource.cpp
> index 06fd3f6..71414f4 100644
> --- a/src/gallium/state_trackers/clover/core/resource.cpp
> +++ b/src/gallium/state_trackers/clover/core/resource.cpp
> @@ -172,7 +172,7 @@ root_resource::root_resource(clover::device , 
> memory_obj ,
>  root_resource::root_resource(clover::device , memory_obj ,
>   root_resource ) :
> resource(dev, obj) {
> -   assert(0); // XXX -- resource shared among dev and r.dev
> +   pipe = dev.pipe->resource_create(dev.pipe, r.pipe);

AFAIK this doesn't actually create a resource shared among the two
devices, which is what you need here.

>  }
>  
>  root_resource::~root_resource() {
> -- 
> 2.9.3


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Rename brw_format_for_mesa_format() to brw_isl_format_for_mesa_format()

2017-03-03 Thread Anuj Phogat
Signed-off-by: Anuj Phogat 
---
 src/mesa/drivers/dri/i965/brw_blorp.c|  2 +-
 src/mesa/drivers/dri/i965/brw_context.c  |  2 +-
 src/mesa/drivers/dri/i965/brw_meta_util.c|  2 +-
 src/mesa/drivers/dri/i965/brw_state.h|  2 +-
 src/mesa/drivers/dri/i965/brw_surface_formats.c  | 14 +++---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  4 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c|  2 +-
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 9f7ba3d..fdc9dd1 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -294,7 +294,7 @@ brw_blorp_to_isl_format(struct brw_context *brw, 
mesa_format format,
  assert(brw->format_supported_as_render_target[format]);
  return brw->render_target_format[format];
   } else {
- return brw_format_for_mesa_format(format);
+ return brw_isl_format_for_mesa_format(format);
   }
   break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 3688ba4..42dfed0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -205,7 +205,7 @@ intel_texture_view_requires_resolve(struct brw_context *brw,
!intel_miptree_is_lossless_compressed(brw, intel_tex->mt))
  return false;
 
-   const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format);
+   const uint32_t brw_format = 
brw_isl_format_for_mesa_format(intel_tex->_Format);
 
if (isl_format_supports_ccs_e(>screen->devinfo, brw_format))
   return false;
diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c 
b/src/mesa/drivers/dri/i965/brw_meta_util.c
index 07a160f..cbc2ded 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_util.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_util.c
@@ -288,7 +288,7 @@ brw_is_color_fast_clear_compatible(struct brw_context *brw,
 * this case. At least on Gen9 this really does seem to cause problems.
 */
if (brw->gen >= 9 &&
-   brw_format_for_mesa_format(mt->format) !=
+   brw_isl_format_for_mesa_format(mt->format) !=
brw->render_target_format[mt->format])
   return false;
 
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 4b7e3c2..bd05b60 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -267,7 +267,7 @@ void gen4_init_vtable_surface_functions(struct brw_context 
*brw);
 uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
 uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
 
-uint32_t brw_format_for_mesa_format(mesa_format mesa_format);
+uint32_t brw_isl_format_for_mesa_format(mesa_format mesa_format);
 
 GLuint translate_tex_target(GLenum target);
 
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 706818d..7b17e11 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -29,7 +29,7 @@
 #include "brw_defines.h"
 
 uint32_t
-brw_format_for_mesa_format(mesa_format mesa_format)
+brw_isl_format_for_mesa_format(mesa_format mesa_format)
 {
/* This table is ordered according to the enum ordering in formats.h.  We do
 * expect that enum to be extended without our explicit initialization
@@ -303,7 +303,7 @@ brw_init_surface_formats(struct brw_context *brw)
   uint32_t texture, render;
   bool is_integer = _mesa_is_format_integer_color(format);
 
-  render = texture = brw_format_for_mesa_format(format);
+  render = texture = brw_isl_format_for_mesa_format(format);
 
   /* The value of ISL_FORMAT_R32G32B32A32_FLOAT is 0, so don't skip
* it.
@@ -536,7 +536,7 @@ translate_tex_format(struct brw_context *brw,
   return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
 
case MESA_FORMAT_RGBA_FLOAT32:
-  /* The value of this BRW_SURFACEFORMAT is 0, which tricks the
+  /* The value of this ISL surface format is 0, which tricks the
* assertion below.
*/
   return ISL_FORMAT_R32G32B32A32_FLOAT;
@@ -550,7 +550,7 @@ translate_tex_format(struct brw_context *brw,
  WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n");
  mesa_format = MESA_FORMAT_RGB_DXT1;
   }
-  return brw_format_for_mesa_format(mesa_format);
+  return brw_isl_format_for_mesa_format(mesa_format);
 
case MESA_FORMAT_RGBA_ASTC_4x4:
case MESA_FORMAT_RGBA_ASTC_5x4:
@@ -566,7 +566,7 @@ translate_tex_format(struct brw_context *brw,
case MESA_FORMAT_RGBA_ASTC_10x10:
case MESA_FORMAT_RGBA_ASTC_12x10:
case MESA_FORMAT_RGBA_ASTC_12x12: {
-  GLuint brw_fmt = brw_format_for_mesa_format(mesa_format);
+  GLuint brw_fmt = brw_isl_format_for_mesa_format(mesa_format);
 
   /**
* It is 

Re: [Mesa-dev] [PATCH] nir/builder: Add an int46 immediate helper

2017-03-03 Thread Matt Turner
with s/int46/int64/,

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/int64: Properly handle imod/irem

2017-03-03 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Clamp texture buffer size to GL_MAX_TEXTURE_BUFFER_SIZE.

2017-03-03 Thread Anuj Phogat
On Fri, Mar 3, 2017 at 11:32 AM, Kenneth Graunke  wrote:
> The OpenGL 4.5 specification's description of TexBuffer says:
>
> "The number of texels in the texture image is then clamped to an
>  implementation-dependent limit, the value of MAX_TEXTURE_BUFFER_SIZE."
>
> We set GL_MAX_TEXTURE_BUFFER_SIZE to 2^27.  For buffers with a byte
> element size, this is the maximum possible size we can encode in
> SURFACE_STATE.  If you bind a buffer object larger than this as a
> texture buffer object, we'll exceed that limit and hit an isl assert:
>
>assert(num_elements <= (1ull << 27));
>
> To fix this, clamp the size in bytes to MaxTextureSize / texel_size.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 8ffbc0a5fd5..e48b1e1d2d1 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -695,6 +695,24 @@ brw_update_buffer_texture_surface(struct gl_context *ctx,
>bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
> }
>
> +   /* The ARB_texture_buffer_specification says:
> +*
> +*"The number of texels in the buffer texture's texel array is given 
> by
> +*
> +*   floor( / ( * sizeof()),
> +*
> +* where  is the size of the buffer object, in basic
> +* machine units and  and  are the element 
> count
> +* and base data type for elements, as specified in Table X.1.  The
> +* number of texels in the texel array is then clamped to the
> +* implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
> +*
> +* We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
> +* so that when ISL divides by stride to obtain the number of texels, that
> +* texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
> +*/
> +   size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) 
> texel_size);
> +
> if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
>_mesa_problem(NULL, "bad format %s for texture buffer\n",
> _mesa_get_format_name(format));
> --
> 2.11.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

LGTM.
Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/5] swr: geometry shaders

2017-03-03 Thread Cherniak, Bruce
Maybe a little cleaner working on the relnotes:  “Geometry shaders on on swr”; 
perhaps “Geometry shaders enabled on swr”?

With that…
Reviewed-by: Bruce Cherniak 

> On Mar 3, 2017, at 2:24 PM, Tim Rowley  wrote:
> 
> This patch set implements geometry shaders for the swr driver.
> 
> Probably still some problems (some hidden by tranform feedback bugs),
> but an additional 1719 piglit tests pass, and the VTK tests using
> geometry shaders now pass.
> 
> v2:
>  * swr: remove SWR_NEW_ALL, make new vs dirty gs state
>  * configure.ac: modify commit message to mention scons/automake match
>  * relnotes: mention swr gs addition, updated llvm requirements
> 
> Tim Rowley (5):
>  configure.ac: increase required swr llvm to 3.9.0
>  swr: implement geometry shaders
>  swr: [rasterizer core] fix primID provoking vertex for GS
>  docs: update features.txt for swr geometry shaders
>  relnotes: [swr] note addition of gs, increased llvm requirement
> 
> configure.ac   |   2 +-
> docs/features.txt  |  18 +-
> docs/relnotes/17.1.0.html  |   4 +
> .../drivers/swr/rasterizer/core/frontend.cpp   |   4 +-
> src/gallium/drivers/swr/swr_context.cpp|   2 +-
> src/gallium/drivers/swr/swr_context.h  |  26 +-
> src/gallium/drivers/swr/swr_draw.cpp   |  50 +--
> src/gallium/drivers/swr/swr_fence_work.cpp |  21 +
> src/gallium/drivers/swr/swr_fence_work.h   |   3 +
> src/gallium/drivers/swr/swr_scratch.cpp|   1 +
> src/gallium/drivers/swr/swr_scratch.h  |   1 +
> src/gallium/drivers/swr/swr_screen.cpp |   6 +-
> src/gallium/drivers/swr/swr_shader.cpp | 475 -
> src/gallium/drivers/swr/swr_shader.h   |  22 +
> src/gallium/drivers/swr/swr_state.cpp  | 100 -
> src/gallium/drivers/swr/swr_state.h|  50 +++
> src/gallium/drivers/swr/swr_tex_sample.cpp |   6 +
> 17 files changed, 716 insertions(+), 75 deletions(-)
> 
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Jason Ekstrand
On Fri, Mar 3, 2017 at 11:41 AM, Emil Velikov 
wrote:

> On 3 March 2017 at 18:42, Mauro Rossi  wrote:
> >> ok .. now I got some fixes for this here:
> >>
> >> https://github.com/tpalli/external-mesa/commits/move_compiler
> >>
> >> but yeah .. more work required :/ I don't understand why adding
> >> MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help.
> >
> > Hi Emil, Tapani,
> >
> > The problem is just that the  LOCAL_C_INCLUDES for new library are
> > missing one line that was present in
> > src/mesa/drivers/dri/i965/Android.mk, the following one:
> >
> >$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,
> ,)/glsl
> >
> Yeah, noticed a similar one missing on the automake/autoconf side.
>
> Jason, how would you like the fixes to squash - separate patches,
> branch with separate "squash !..." or a branch with them squashed
> already ?
>

I don't care
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] configure.ac: increase required swr llvm to 3.9.0

2017-03-03 Thread Rowley, Timothy O

> On Mar 3, 2017, at 5:55 AM, Emil Velikov  wrote:
> 
> On 3 March 2017 at 01:16, Tim Rowley  wrote:
>> GS implementation uses the masked.{gather,store} intrinsics,
>> introduced in llvm-3.9.0.
> 
> Please mention in the commit message that the SCons build already
> requires 3.9 or later.
> Can you add a note about the LLVM requirement and GS support in
> docs/relnotes/17.1.0.html, with a separate commit on top ?

Both of these are in v2 of the patch set.

> With this we have some ~20 preprocessor conditionals which want to be
> cleaned up. Look for
> $ git grep  "LLVM_.*VERSION\|HAVE_LLVM" -- src/gallium/drivers/swr/

Ah, good catch.  We’ve been ratcheting up our required llvm version without 
cleaning out some of the cruft.  Internally we’re still using 3.8 so not all of 
these can be removed.  I’ll work on that in a follow-up patch, as it’s 
unrelated to the geometry shader implementation.

-Tim

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64

2017-03-03 Thread Jason Ekstrand
On Fri, Mar 3, 2017 at 11:41 AM, Ilia Mirkin  wrote:

> On Fri, Mar 3, 2017 at 2:16 PM, Jason Ekstrand 
> wrote:
> > Hey Elie!
> >
> > On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier 
> > wrote:
> >>
> >> From: Elie Tournier 
> >>
> >> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1].
> >> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs.
> >>
> >> Each function can be independently tested using shader_runner from
> piglit.
> >> The piglit files are stored on github [2].
> >>
> >> [1]
> >> https://lists.freedesktop.org/archives/mesa-dev/2016-
> November/136718.html
> >> [2] https://github.com/Hopetech/libSoftFloat
> >
> >
> > Glad to see this finally turning into code.
> >
> > Before, we get too far into things, I'd like to talk about the approach a
> > bit.  First off, if we (Intel) are going to use this on any hardware, we
> > would really like it to be in NIR.  The reason for this is that NIR has a
> > much more powerful algebraic optimizer than GLSL IR and we would like to
> > have as few fp64 instructions as possible before we start lowering them
> to
> > piles of integer math.  I believe Ian's plan for this was that someone
> would
> > write a nir_builder back-end for the stand-alone compiler.
> Unfortunately,
> > he sort-of left that as "an exercise to the reader" and no code exists
> to my
> > knowledge.  If we're going to write things in GLSL, we really need that
> NIR
> > back-end.
>
> I'm not sure what the impetus was for developing a softfloat library
> (but I'm a big fan). but the current situation is that it will largely
> just be useful for AMD Evergreen/Northern Islands chips, which consume
> TGSI produced from GLSL. (Aside: [1].) As such, I'm not sure if a push
> towards NIR is warranted -- it would cause a more convoluted path
> towards the intended target.
>

Whether or not i965 wants softfloat is an ongoing debate.  On the one hand,
we have "hardware support" for it starting with ivy bridge.  On the other
hand, early hardware support is sufficiently terrible that softfloat may
end up being a better plan.  Also, I wouldn't be surprised if, at some
point in the future, some hardware engineer decides they can save a bunch
of power on low-power parts if they delete the fp64 hardware.  Since we
ship desktop GL on those parts, loosing 4.0 would be bad.  I don't want to
paint ourselves into a corner on fp64.


> I do agree with the larger point - the lowering should be done as late
> as possible in order to enable algebraic-style optimizations. (This is
> also why I've argued that optimizing in the frontend is too early - it
> should be all just be done in the backend, as additional calculations
> can easily make their way into the flow. I realize that's impractical
> for i965 though as the backend is not SSA though, and some opts are
> necessary in GLSL in order to perform the necessary validation.)
>

That's not really an accurate account of why we do it in NIR for i965...
By the time we get done with all the lowering we do in NIR, the NIR code
looks a lot like back-end code.  Certainly, any optimizations on fp64
operations will already have been done.  It's just that anything that looks
too much like i965 hardware will be a pain to optimize.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/5] docs: update features.txt for swr geometry shaders

2017-03-03 Thread Tim Rowley
---
 docs/features.txt | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 67d177d..df1860d 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -78,18 +78,18 @@ GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, 
nvc0, r600, radeonsi, llv
   GL_EXT_texture_snorm (Signed normalized textures) DONE ()
 
 
-GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
softpipe
+GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, 
softpipe, swr
 
   Core/compatibility profiles   DONE
   Geometry shaders  DONE ()
-  GL_ARB_vertex_array_bgra (BGRA vertex order)  DONE (freedreno, swr)
-  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (freedreno, swr)
-  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (freedreno, swr)
-  GL_ARB_provoking_vertex (Provoking vertex)DONE (freedreno, swr)
-  GL_ARB_seamless_cube_map (Seamless cubemaps)  DONE (freedreno, swr)
-  GL_ARB_texture_multisample (Multisample textures) DONE (swr)
-  GL_ARB_depth_clamp (Frag depth clamp) DONE (freedreno, swr)
-  GL_ARB_sync (Fence objects)   DONE (freedreno, swr)
+  GL_ARB_vertex_array_bgra (BGRA vertex order)  DONE (freedreno)
+  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (freedreno)
+  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (freedreno)
+  GL_ARB_provoking_vertex (Provoking vertex)DONE (freedreno)
+  GL_ARB_seamless_cube_map (Seamless cubemaps)  DONE (freedreno)
+  GL_ARB_texture_multisample (Multisample textures) DONE ()
+  GL_ARB_depth_clamp (Frag depth clamp) DONE (freedreno)
+  GL_ARB_sync (Fence objects)   DONE (freedreno)
   GLX_ARB_create_context_profileDONE
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/5] relnotes: [swr] note addition of gs, increased llvm requirement

2017-03-03 Thread Tim Rowley
---
 docs/relnotes/17.1.0.html | 4 
 1 file changed, 4 insertions(+)

diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index 9ababcc..3678173 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -47,6 +47,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
llvmpipe
 GL_ARB_transform_feedback2 on i965/gen6
 GL_ARB_transform_feedback_overflow_query on i965/gen6+
+Geometry shaders on on swr
 
 
 Bug fixes
@@ -56,8 +57,11 @@ Note: some of the new features are only available with 
certain drivers.
 
 Changes
 
+
 Removed the ilo gallium driver.
 The configure option --enable-gallium-llvm is superseded by 
--enable-llvm.
+swr driver now requires llvm = 3.9.0.
+
 
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/5] configure.ac: increase required swr llvm to 3.9.0

2017-03-03 Thread Tim Rowley
GS implementation uses the masked.{gather,store} intrinsics,
introduced in llvm-3.9.0.  swr llvm version requirement in
automake and scons now match (scons already needed >= 3.9).
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 943bc05..73bd749 100644
--- a/configure.ac
+++ b/configure.ac
@@ -107,7 +107,7 @@ LLVM_REQUIRED_OPENCL=3.6.0
 LLVM_REQUIRED_R600=3.6.0
 LLVM_REQUIRED_RADEONSI=3.6.0
 LLVM_REQUIRED_RADV=3.9.0
-LLVM_REQUIRED_SWR=3.6.0
+LLVM_REQUIRED_SWR=3.9.0
 
 dnl Check for progs
 AC_PROG_CPP
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/5] swr: [rasterizer core] fix primID provoking vertex for GS

2017-03-03 Thread Tim Rowley
---
 src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index c150c51..18728e5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -815,7 +815,7 @@ static void GeometryShaderStage(
 {
 simdvector primIdAttrib[3];
 gsPa.Assemble(VERTEX_PRIMID_SLOT, 
primIdAttrib);
-vPrimId = 
_simd_castps_si(primIdAttrib[0].x);
+vPrimId = 
_simd_castps_si(primIdAttrib[state.frontendState.topologyProvokingVertex].x);
 }
 else
 {
@@ -1682,4 +1682,4 @@ PFN_FE_WORK_FUNC GetProcessDrawFunc(
 bool HasRasterization)
 {
 return TemplateArgUnroller::GetFunc(IsIndexed, 
IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, 
HasRasterization);
-}
\ No newline at end of file
+}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/5] swr: implement geometry shaders

2017-03-03 Thread Tim Rowley
---
 src/gallium/drivers/swr/swr_context.cpp|   2 +-
 src/gallium/drivers/swr/swr_context.h  |  26 +-
 src/gallium/drivers/swr/swr_draw.cpp   |  50 +--
 src/gallium/drivers/swr/swr_fence_work.cpp |  21 ++
 src/gallium/drivers/swr/swr_fence_work.h   |   3 +
 src/gallium/drivers/swr/swr_scratch.cpp|   1 +
 src/gallium/drivers/swr/swr_scratch.h  |   1 +
 src/gallium/drivers/swr/swr_screen.cpp |   6 +-
 src/gallium/drivers/swr/swr_shader.cpp | 475 -
 src/gallium/drivers/swr/swr_shader.h   |  22 ++
 src/gallium/drivers/swr/swr_state.cpp  | 100 +-
 src/gallium/drivers/swr/swr_state.h|  50 +++
 src/gallium/drivers/swr/swr_tex_sample.cpp |   6 +
 13 files changed, 700 insertions(+), 63 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_context.cpp 
b/src/gallium/drivers/swr/swr_context.cpp
index b89ce1b..1c98ac2 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -307,7 +307,7 @@ swr_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *blit_info)
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
-   /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/
+   util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);
util_blitter_save_so_targets(
   ctx->blitter,
   ctx->num_so_targets,
diff --git a/src/gallium/drivers/swr/swr_context.h 
b/src/gallium/drivers/swr/swr_context.h
index 04e11fe..46ca611 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -40,16 +40,17 @@
 #define SWR_NEW_SAMPLER_VIEW (1 << 4)
 #define SWR_NEW_VS (1 << 5)
 #define SWR_NEW_FS (1 << 6)
-#define SWR_NEW_VSCONSTANTS (1 << 7)
-#define SWR_NEW_FSCONSTANTS (1 << 8)
-#define SWR_NEW_VERTEX (1 << 9)
-#define SWR_NEW_STIPPLE (1 << 10)
-#define SWR_NEW_SCISSOR (1 << 11)
-#define SWR_NEW_VIEWPORT (1 << 12)
-#define SWR_NEW_FRAMEBUFFER (1 << 13)
-#define SWR_NEW_CLIP (1 << 14)
-#define SWR_NEW_SO (1 << 15)
-#define SWR_NEW_ALL 0x
+#define SWR_NEW_GS (1 << 7)
+#define SWR_NEW_VSCONSTANTS (1 << 8)
+#define SWR_NEW_FSCONSTANTS (1 << 9)
+#define SWR_NEW_GSCONSTANTS (1 << 10)
+#define SWR_NEW_VERTEX (1 << 11)
+#define SWR_NEW_STIPPLE (1 << 12)
+#define SWR_NEW_SCISSOR (1 << 13)
+#define SWR_NEW_VIEWPORT (1 << 14)
+#define SWR_NEW_FRAMEBUFFER (1 << 15)
+#define SWR_NEW_CLIP (1 << 16)
+#define SWR_NEW_SO (1 << 17)
 
 namespace std
 {
@@ -85,11 +86,15 @@ struct swr_draw_context {
uint32_t num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS];
const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS];
uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS];
+   const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS];
+   uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS];
 
swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS];
swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
+   swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS];
 
float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
 
@@ -112,6 +117,7 @@ struct swr_context {
 
struct swr_vertex_shader *vs;
struct swr_fragment_shader *fs;
+   struct swr_geometry_shader *gs;
struct swr_vertex_element_state *velems;
 
/** Other rendering state */
diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
b/src/gallium/drivers/swr/swr_draw.cpp
index f764efe..c43f4a5 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -32,48 +32,6 @@
 #include "util/u_prim.h"
 
 /*
- * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY
- */
-static INLINE enum PRIMITIVE_TOPOLOGY
-swr_convert_prim_topology(const unsigned mode)
-{
-   switch (mode) {
-   case PIPE_PRIM_POINTS:
-  return TOP_POINT_LIST;
-   case PIPE_PRIM_LINES:
-  return TOP_LINE_LIST;
-   case PIPE_PRIM_LINE_LOOP:
-  return TOP_LINE_LOOP;
-   case PIPE_PRIM_LINE_STRIP:
-  return TOP_LINE_STRIP;
-   case PIPE_PRIM_TRIANGLES:
-  return TOP_TRIANGLE_LIST;
-   case PIPE_PRIM_TRIANGLE_STRIP:
-  return TOP_TRIANGLE_STRIP;
-   case PIPE_PRIM_TRIANGLE_FAN:
-  return TOP_TRIANGLE_FAN;
-   case PIPE_PRIM_QUADS:
-  return TOP_QUAD_LIST;
-   case PIPE_PRIM_QUAD_STRIP:
-  return TOP_QUAD_STRIP;
-   case PIPE_PRIM_POLYGON:
-  return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */
-   case PIPE_PRIM_LINES_ADJACENCY:
-  return TOP_LINE_LIST_ADJ;
-   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
-  return TOP_LISTSTRIP_ADJ;
-   case PIPE_PRIM_TRIANGLES_ADJACENCY:
-  return TOP_TRI_LIST_ADJ;
-   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
-  return TOP_TRI_STRIP_ADJ;
-   default:
-  assert(0 && "Unknown topology");
-  return 

[Mesa-dev] [PATCH v2 0/5] swr: geometry shaders

2017-03-03 Thread Tim Rowley
This patch set implements geometry shaders for the swr driver.

Probably still some problems (some hidden by tranform feedback bugs),
but an additional 1719 piglit tests pass, and the VTK tests using
geometry shaders now pass.

v2:
  * swr: remove SWR_NEW_ALL, make new vs dirty gs state
  * configure.ac: modify commit message to mention scons/automake match
  * relnotes: mention swr gs addition, updated llvm requirements

Tim Rowley (5):
  configure.ac: increase required swr llvm to 3.9.0
  swr: implement geometry shaders
  swr: [rasterizer core] fix primID provoking vertex for GS
  docs: update features.txt for swr geometry shaders
  relnotes: [swr] note addition of gs, increased llvm requirement

 configure.ac   |   2 +-
 docs/features.txt  |  18 +-
 docs/relnotes/17.1.0.html  |   4 +
 .../drivers/swr/rasterizer/core/frontend.cpp   |   4 +-
 src/gallium/drivers/swr/swr_context.cpp|   2 +-
 src/gallium/drivers/swr/swr_context.h  |  26 +-
 src/gallium/drivers/swr/swr_draw.cpp   |  50 +--
 src/gallium/drivers/swr/swr_fence_work.cpp |  21 +
 src/gallium/drivers/swr/swr_fence_work.h   |   3 +
 src/gallium/drivers/swr/swr_scratch.cpp|   1 +
 src/gallium/drivers/swr/swr_scratch.h  |   1 +
 src/gallium/drivers/swr/swr_screen.cpp |   6 +-
 src/gallium/drivers/swr/swr_shader.cpp | 475 -
 src/gallium/drivers/swr/swr_shader.h   |  22 +
 src/gallium/drivers/swr/swr_state.cpp  | 100 -
 src/gallium/drivers/swr/swr_state.h|  50 +++
 src/gallium/drivers/swr/swr_tex_sample.cpp |   6 +
 17 files changed, 716 insertions(+), 75 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64

2017-03-03 Thread Ilia Mirkin
On Fri, Mar 3, 2017 at 2:16 PM, Jason Ekstrand  wrote:
> Hey Elie!
>
> On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier 
> wrote:
>>
>> From: Elie Tournier 
>>
>> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1].
>> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs.
>>
>> Each function can be independently tested using shader_runner from piglit.
>> The piglit files are stored on github [2].
>>
>> [1]
>> https://lists.freedesktop.org/archives/mesa-dev/2016-November/136718.html
>> [2] https://github.com/Hopetech/libSoftFloat
>
>
> Glad to see this finally turning into code.
>
> Before, we get too far into things, I'd like to talk about the approach a
> bit.  First off, if we (Intel) are going to use this on any hardware, we
> would really like it to be in NIR.  The reason for this is that NIR has a
> much more powerful algebraic optimizer than GLSL IR and we would like to
> have as few fp64 instructions as possible before we start lowering them to
> piles of integer math.  I believe Ian's plan for this was that someone would
> write a nir_builder back-end for the stand-alone compiler.  Unfortunately,
> he sort-of left that as "an exercise to the reader" and no code exists to my
> knowledge.  If we're going to write things in GLSL, we really need that NIR
> back-end.

I'm not sure what the impetus was for developing a softfloat library
(but I'm a big fan). but the current situation is that it will largely
just be useful for AMD Evergreen/Northern Islands chips, which consume
TGSI produced from GLSL. (Aside: [1].) As such, I'm not sure if a push
towards NIR is warranted -- it would cause a more convoluted path
towards the intended target.

I do agree with the larger point - the lowering should be done as late
as possible in order to enable algebraic-style optimizations. (This is
also why I've argued that optimizing in the frontend is too early - it
should be all just be done in the backend, as additional calculations
can easily make their way into the flow. I realize that's impractical
for i965 though as the backend is not SSA though, and some opts are
necessary in GLSL in order to perform the necessary validation.)

Cheers,

  -ilia

[1] There's also an effort currently underway to implement proper
accuracy fp64 rcp/rsq/sqrt for Fermi and newer chips, but that will
likely end up as library functions in codegen, esp in part because it
will make use of nvidia-specific shader opcodes. I guess this may be
useful for the NVIDIA G200 chip to be able to expose
ARB_gpu_shader_fp64 (as it only supports addition and multiplication
natively), but I doubt there's a lot of demand for that.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Emil Velikov
On 3 March 2017 at 18:42, Mauro Rossi  wrote:
>> ok .. now I got some fixes for this here:
>>
>> https://github.com/tpalli/external-mesa/commits/move_compiler
>>
>> but yeah .. more work required :/ I don't understand why adding
>> MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help.
>
> Hi Emil, Tapani,
>
> The problem is just that the  LOCAL_C_INCLUDES for new library are
> missing one line that was present in
> src/mesa/drivers/dri/i965/Android.mk, the following one:
>
>$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl
>
Yeah, noticed a similar one missing on the automake/autoconf side.

Jason, how would you like the fixes to squash - separate patches,
branch with separate "squash !..." or a branch with them squashed
already ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] EGL/Android: Add EGL_EXT_buffer_age extension

2017-03-03 Thread Emil Velikov
On 3 March 2017 at 08:30, Tapani Pälli  wrote:
> As discussed offline, this passes all dEQP tests for the extension on
> Android-IA (let's add this to commit message when pushing).
>
> Reviewed-by: Tapani Pälli 
>
Ok Let's land this. Kalyan promised [over at #android-ia] that the
team with follow with de-duplicating the code in due time.
Atm we have [nearly] identical to this code in platform_gbm and
platform_wayland.

Acked-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gallium/targets: don't leave an empty target directory(ies)

2017-03-03 Thread Matt Turner
Reviewed-by: Matt Turner 

Thanks Emil!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Clamp texture buffer size to GL_MAX_TEXTURE_BUFFER_SIZE.

2017-03-03 Thread Kenneth Graunke
The OpenGL 4.5 specification's description of TexBuffer says:

"The number of texels in the texture image is then clamped to an
 implementation-dependent limit, the value of MAX_TEXTURE_BUFFER_SIZE."

We set GL_MAX_TEXTURE_BUFFER_SIZE to 2^27.  For buffers with a byte
element size, this is the maximum possible size we can encode in
SURFACE_STATE.  If you bind a buffer object larger than this as a
texture buffer object, we'll exceed that limit and hit an isl assert:

   assert(num_elements <= (1ull << 27));

To fix this, clamp the size in bytes to MaxTextureSize / texel_size.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8ffbc0a5fd5..e48b1e1d2d1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -695,6 +695,24 @@ brw_update_buffer_texture_surface(struct gl_context *ctx,
   bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
}
 
+   /* The ARB_texture_buffer_specification says:
+*
+*"The number of texels in the buffer texture's texel array is given by
+*
+*   floor( / ( * sizeof()),
+*
+* where  is the size of the buffer object, in basic
+* machine units and  and  are the element count
+* and base data type for elements, as specified in Table X.1.  The
+* number of texels in the texel array is then clamped to the
+* implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
+*
+* We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
+* so that when ISL divides by stride to obtain the number of texels, that
+* texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
+*/
+   size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
+
if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
   _mesa_problem(NULL, "bad format %s for texture buffer\n",
_mesa_get_format_name(format));
-- 
2.11.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] gallium/targets: don't leave an empty target directory(ies)

2017-03-03 Thread Emil Velikov
Some drivers do not support certain targets - for example nouveau
doesn't do VAAPI, while freedreno doesn't do of the video backends.

As such if we enter vdpau when building freedreno/ilo/etc, a vdpau/
folder will be created, empty library will be build and almost
immediately removed. Thus keeping an empty vdpau/ folder around.

There are two ways to fix this.

 * add substantial tracking in configure/makefiles so that we never end
up in targets/vdpau
 Downsides:
Error prone, as the configure checks and the 'include
gallium/drivers/foo/Automake.inc' can easily get out of sync.

 * remove the folder, if empty, alongside the empty library.
 Downsides:
In the latter case vdpau/ might be empty before the mesa build has
started, yet we'll remove it either way.

This patch implements the latter option, as the downside isn't that
significant, plus the patch is way shorter ;-)

v2: use has_drivers to track since TARGET_DRIVERS can contain space,
hence neither string comparison nor -n/-z works correctly.

Cc: Matt Turner 
Gentoo Bugzilla: https://bugs.gentoo.org/545230
Signed-off-by: Emil Velikov 
---
Matt, I've dropped your r-b since the method was completely reworked.
As a nice bonus we can now drop the silly stderr/strout direction and
the "|| true" part. Previously we needed them both.
---
 src/gallium/targets/dri/Makefile.am   | 4 +++-
 src/gallium/targets/vdpau/Makefile.am | 4 +++-
 src/gallium/targets/xvmc/Makefile.am  | 4 +++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/targets/dri/Makefile.am 
b/src/gallium/targets/dri/Makefile.am
index bca747faa4..bcbf5dfa9b 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -141,10 +141,12 @@ endif
 # gallium_dri.so in the set of final installed files.
 install-data-hook:
for i in $(TARGET_DRIVERS); do  \
+   has_drivers=1;  \
ln -f $(DESTDIR)$(dridir)/gallium_dri.so\
  $(DESTDIR)$(dridir)/$${i}_dri.so; \
done;   \
-   $(RM) $(DESTDIR)$(dridir)/gallium_dri.*
+   $(RM) $(DESTDIR)$(dridir)/gallium_dri.*;\
+   test $${has_drivers} -eq 1 || $(RM) -d $(DESTDIR)$(dridir)
 
 uninstall-hook:
for i in $(TARGET_DRIVERS); do  \
diff --git a/src/gallium/targets/vdpau/Makefile.am 
b/src/gallium/targets/vdpau/Makefile.am
index 97c0ab27f5..a0f310bc59 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -108,6 +108,7 @@ endif
 install-data-hook:
$(AM_V_GEN)dest_dir=$(DESTDIR)/$(vdpaudir); \
for i in $(TARGET_DRIVERS); do  \
+   has_drivers=1;  \
j=libvdpau_gallium.$(LIB_EXT);  \
k=libvdpau_$${i}.$(LIB_EXT);\
l=$${k}.$(VDPAU_MAJOR).$(VDPAU_MINOR).0;\
@@ -120,7 +121,8 @@ install-data-hook:
ln -sf $${l}\
   $${dest_dir}/$${k};  \
done;   \
-   $(RM) $${dest_dir}/libvdpau_gallium.*
+   $(RM) $${dest_dir}/libvdpau_gallium.*;  \
+   test $${has_drivers} -eq 1 || $(RM) -d $${dest_dir}
 
 uninstall-hook:
for i in $(TARGET_DRIVERS); do  \
diff --git a/src/gallium/targets/xvmc/Makefile.am 
b/src/gallium/targets/xvmc/Makefile.am
index c8bac58961..b65a191eab 100644
--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -70,6 +70,7 @@ endif
 install-data-hook:
$(AM_V_GEN)dest_dir=$(DESTDIR)/$(xvmcdir);  \
for i in $(TARGET_DRIVERS); do  \
+   has_drivers=1;  \
j=libXvMCgallium.$(LIB_EXT);\
k=libXvMC$${i}.$(LIB_EXT);  \
l=$${k}.$(XVMC_MAJOR).$(XVMC_MINOR).0;  \
@@ -82,7 +83,8 @@ install-data-hook:
ln -sf $${l}\
   $${dest_dir}/$${k};  \
done;   \
-   $(RM) $${dest_dir}/libXvMCgallium.*
+   $(RM) $${dest_dir}/libXvMCgallium.*;\
+   test $${has_drivers} -eq 1 || $(RM) -d $${dest_dir}
 
 uninstall-hook:
for i in $(TARGET_DRIVERS); do  \
-- 
2.11.1


Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64

2017-03-03 Thread Jason Ekstrand
One other comment.  I'm not sure if you've seen it but, if you haven't, you
should check out what Connor and the Igalia guys already did for NIR:

https://cgit.freedesktop.org/mesa/mesa/tree/src/compiler/nir/nir_lower_double_ops.c

It's not full soft-float but there's some very nice algorithms in there for
things such as rcp.

On Fri, Mar 3, 2017 at 11:16 AM, Jason Ekstrand 
wrote:

> Hey Elie!
>
> On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier 
> wrote:
>
>> From: Elie Tournier 
>>
>> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1].
>> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs.
>>
>> Each function can be independently tested using shader_runner from piglit.
>> The piglit files are stored on github [2].
>>
>> [1] https://lists.freedesktop.org/archives/mesa-dev/2016-Novembe
>> r/136718.html
>> [2] https://github.com/Hopetech/libSoftFloat
>>
>
> Glad to see this finally turning into code.
>
> Before, we get too far into things, I'd like to talk about the approach a
> bit.  First off, if we (Intel) are going to use this on any hardware, we
> would really like it to be in NIR.  The reason for this is that NIR has a
> much more powerful algebraic optimizer than GLSL IR and we would like to
> have as few fp64 instructions as possible before we start lowering them to
> piles of integer math.  I believe Ian's plan for this was that someone
> would write a nir_builder back-end for the stand-alone compiler.
> Unfortunately, he sort-of left that as "an exercise to the reader" and no
> code exists to my knowledge.  If we're going to write things in GLSL, we
> really need that NIR back-end.
>
> When implemneting int64 (which needs similar lowering) for the Vulkan
> driver, I took the opportunity to try doing it directly in nir_builder
> instead of writing back-end code for the stand-alone compiler.  All in all,
> I'm fairly happy with the result.  You can find my (almost finished) branch
> here:
>
> https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/nir-int64
>
> This approach had several advantages:
>
>  1. The compiler does less work.  Loops can be automatically unrolled, you
> can choose to use select instead of control-flow, it doesn't generate
> functions that have to be inlined, etc.  Now, in GLSL IR, using functions
> may actually be a requirement because it's a tree-based IR and adding stuff
> to the middle of the tree can be tricky.  Also, I'm pretty sure they're a
> requirement for control-flow.  NIR is flat so it's a bit nicer in that
> regard.
>
>  2. It doesn't require additional compiler infrastructure for converting
> GLSL to compiler code.  We've gone back-and-forth over the years about how
> much is too much codegen.  At one point, the build process built the GLSL
> compiler and used it to compile GLSL to compiler code for the built-ins and
> then built that into the compiler.  The build system for doing this was a
> mess.  The result was that Eric wrote ir_builder and all the code was moved
> over to that.  A quick look at eiether GLSL IR or NIR will show you that we
> haven't completely rejected codegen but one always has to ask if it's
> really the best solution.  Running the stand-alone compiler to generate
> code and then checking it in isn't a terrible solution, but it does seem
> like at it could be a least one too many levels of abstraction.
>
>  3. It's actually less code.  The nir_builder code is approximately 50%
> larger than the GLSL code but, because you don't have to add built-in
> functions and do all of the other plumbing per-opcode, it actually ends up
> being smaller.  Due to the way vectorization is handled (see next point),
> it also involves a lot less infastructure in the lowering pass.  Also, it
> doesn't need 750 lines of standalone compiler code.
>
>  4. Because I used the "split" pack/unpack opcodes and bcsel instead of
> "if", everything vectorizes automatically.  It turns a i64vec4 iadd, for
> instance, into a bunch of ivec4 operations and kicks out a i32vec4 result
> in the end without ever splitting into 4 int64's.  (The one exception to
> this is the if statement in the division lowering which required a little
> special care).  This means that we don't have to carry extra code to split
> all "dvec4" values into 4 "double" values because it gets handled by the
> normal nir_alu_to_scalar pass that we already have.  Also, because it uses
> entirely vector instructions, it can work on an entire dvec4 at a time on
> vec4 hardware (all geometry stages on Intel Haswell and earlier).  This
> should make it about 4x as fast on vec4 hardware.
>
> The downside, of course, to writing it nir_builder was that I duplicated
> Ian's GLSL IR pass.  I'm not a fan of duplicating code but, if int64 on
> gen8+ was all I cared about, I think the end result is nice enough that I
> don't really care about the code duplication.  If, on the other hand, we're
> going to have full 

Re: [Mesa-dev] [PATCH] genxml: Depend on Makefile.am for generated sources.

2017-03-03 Thread Emil Velikov
On 2 March 2017 at 19:06, Matt Turner  wrote:
> Depending on the generated Makefile means that all generated sources are
> recreated after ./configure.
> ---
>  src/intel/Makefile.genxml.am | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
> index 2ed2741..20e4b15 100644
> --- a/src/intel/Makefile.genxml.am
> +++ b/src/intel/Makefile.genxml.am
> @@ -39,7 +39,7 @@ $(GENXML_GENERATED_FILES): genxml/gen_pack_header.py
>  # prefer to generate our own name here, so it doesn't vary from
>  # in/out-of-tree builds.
>
> -$(GENXML_GENERATED_FILES): Makefile
> +$(GENXML_GENERATED_FILES): Makefile.am
>
Yes please
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64

2017-03-03 Thread Jason Ekstrand
Hey Elie!

On Fri, Mar 3, 2017 at 8:22 AM, Elie Tournier 
wrote:

> From: Elie Tournier 
>
> This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1].
> The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs.
>
> Each function can be independently tested using shader_runner from piglit.
> The piglit files are stored on github [2].
>
> [1] https://lists.freedesktop.org/archives/mesa-dev/2016-
> November/136718.html
> [2] https://github.com/Hopetech/libSoftFloat
>

Glad to see this finally turning into code.

Before, we get too far into things, I'd like to talk about the approach a
bit.  First off, if we (Intel) are going to use this on any hardware, we
would really like it to be in NIR.  The reason for this is that NIR has a
much more powerful algebraic optimizer than GLSL IR and we would like to
have as few fp64 instructions as possible before we start lowering them to
piles of integer math.  I believe Ian's plan for this was that someone
would write a nir_builder back-end for the stand-alone compiler.
Unfortunately, he sort-of left that as "an exercise to the reader" and no
code exists to my knowledge.  If we're going to write things in GLSL, we
really need that NIR back-end.

When implemneting int64 (which needs similar lowering) for the Vulkan
driver, I took the opportunity to try doing it directly in nir_builder
instead of writing back-end code for the stand-alone compiler.  All in all,
I'm fairly happy with the result.  You can find my (almost finished) branch
here:

https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/nir-int64

This approach had several advantages:

 1. The compiler does less work.  Loops can be automatically unrolled, you
can choose to use select instead of control-flow, it doesn't generate
functions that have to be inlined, etc.  Now, in GLSL IR, using functions
may actually be a requirement because it's a tree-based IR and adding stuff
to the middle of the tree can be tricky.  Also, I'm pretty sure they're a
requirement for control-flow.  NIR is flat so it's a bit nicer in that
regard.

 2. It doesn't require additional compiler infrastructure for converting
GLSL to compiler code.  We've gone back-and-forth over the years about how
much is too much codegen.  At one point, the build process built the GLSL
compiler and used it to compile GLSL to compiler code for the built-ins and
then built that into the compiler.  The build system for doing this was a
mess.  The result was that Eric wrote ir_builder and all the code was moved
over to that.  A quick look at eiether GLSL IR or NIR will show you that we
haven't completely rejected codegen but one always has to ask if it's
really the best solution.  Running the stand-alone compiler to generate
code and then checking it in isn't a terrible solution, but it does seem
like at it could be a least one too many levels of abstraction.

 3. It's actually less code.  The nir_builder code is approximately 50%
larger than the GLSL code but, because you don't have to add built-in
functions and do all of the other plumbing per-opcode, it actually ends up
being smaller.  Due to the way vectorization is handled (see next point),
it also involves a lot less infastructure in the lowering pass.  Also, it
doesn't need 750 lines of standalone compiler code.

 4. Because I used the "split" pack/unpack opcodes and bcsel instead of
"if", everything vectorizes automatically.  It turns a i64vec4 iadd, for
instance, into a bunch of ivec4 operations and kicks out a i32vec4 result
in the end without ever splitting into 4 int64's.  (The one exception to
this is the if statement in the division lowering which required a little
special care).  This means that we don't have to carry extra code to split
all "dvec4" values into 4 "double" values because it gets handled by the
normal nir_alu_to_scalar pass that we already have.  Also, because it uses
entirely vector instructions, it can work on an entire dvec4 at a time on
vec4 hardware (all geometry stages on Intel Haswell and earlier).  This
should make it about 4x as fast on vec4 hardware.

The downside, of course, to writing it nir_builder was that I duplicated
Ian's GLSL IR pass.  I'm not a fan of duplicating code but, if int64 on
gen8+ was all I cared about, I think the end result is nice enough that I
don't really care about the code duplication.  If, on the other hand, we're
going to have full int64 and fp64 lowering and want to provide both in both
IR's, then maybe we should reconsider. :-)  It's worth noting that, without
adding more GLSL built-ins for the split pack/unpack opcodes, point 4 above
will always be a problem if we use GLSL as the base language.

One solution is to just do it in NIR and tell people that, if they want the
lowering, they need to support NIR.  Surprisingly, I'm not the one who is
going to push too hard for this approach.  If we can come up with a
reasonable way to do it in both, I'm moderately ok with doing 

Re: [Mesa-dev] [RFC 07/11] glsl: Add "built-in" functions to do mul(fp64, fp64)

2017-03-03 Thread Matt Turner
On Fri, Mar 3, 2017 at 10:51 AM, tournier.elie  wrote:
> On 3 March 2017 at 17:46, Eric Engestrom  wrote:
>> On Friday, 2017-03-03 16:23:03 +, Elie Tournier wrote:
>>> Signed-off-by: Elie Tournier 
>>> ---
>>>  src/compiler/glsl/builtin_float64.h | 2558 
>>> +++
>>>  src/compiler/glsl/builtin_functions.cpp |4 +
>>>  src/compiler/glsl/builtin_functions.h   |3 +
>>>  src/compiler/glsl/float64.glsl  |  172 +++
>>>  4 files changed, 2737 insertions(+)
>>>
>>> diff --git a/src/compiler/glsl/builtin_float64.h 
>>> b/src/compiler/glsl/builtin_float64.h
>>> index bf0953e5d6..0a363bd27a 100644
>>> --- a/src/compiler/glsl/builtin_float64.h
>>> +++ b/src/compiler/glsl/builtin_float64.h
>> [snip]
>>> +
>>> +/* THEN INSTRUCTIONS */
>>> +body.instructions = >then_instructions;
>>> +
>>> +ir_constant_data r0EFD_data;
>>> +memset(_data, 0, sizeof(ir_constant_data));
>>> +r0EFD_data.u[0] = 4294967295;
>>> +r0EFD_data.u[1] = 4294967295;
>>
>> Looks like some debug/testing left-overs?
>
> No. It's the IR representation of "return uvec2(0xu, 0xu);"

Ah, it just looks like debugging code because it's not indented
properly. Looks like a bug in the compiler.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] android: fix outdir for gen_enum_to_str files

2017-03-03 Thread Mauro Rossi
2017-03-03 11:52 GMT+01:00 Tapani Pälli :
> when files are being generated the value of $intermediates var content can be
> completely random, this makes sure that outdir is the wanted one.

The value of intermediates variable is local to the module and is set
at the line:

intermediates := $(call local-generated-sources-dir)

For confirmation, why is $(vulkan_api_xml) variable ok
and $(intermediates) not ok in the generation rules?

Mauro

>
> Fixes: 3f2cb699 ("android: vulkan: add support for libmesa_vulkan_util")
> Signed-off-by: Tapani Pälli 
> ---
>  src/vulkan/Android.mk | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk
> index 9f71d8f..7653f34 100644
> --- a/src/vulkan/Android.mk
> +++ b/src/vulkan/Android.mk
> @@ -1,4 +1,5 @@
>  # Copyright © 2017 Mauro Rossi 
> +# Copyright © 2017 Intel Corporation
>  #
>  # Permission is hereby granted, free of charge, to any person obtaining a
>  # copy of this software and associated documentation files (the "Software"),
> @@ -45,7 +46,7 @@ vulkan_api_xml = $(MESA_TOP)/src/vulkan/registry/vk.xml
>  $(LOCAL_GENERATED_SOURCES): $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
> $(vulkan_api_xml)
> @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))"
> @mkdir -p $(dir $@)
> -   $(hide) $(MESA_PYTHON2) 
> $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py --xml $(vulkan_api_xml) 
> --outdir $(intermediates)/util
> +   $(hide) $(MESA_PYTHON2) 
> $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py --xml $(vulkan_api_xml) 
> --outdir $(dir $@)
>
>  LOCAL_EXPORT_C_INCLUDE_DIRS := \
>  $(intermediates)
> --
> 2.9.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 07/11] glsl: Add "built-in" functions to do mul(fp64, fp64)

2017-03-03 Thread tournier.elie
On 3 March 2017 at 17:46, Eric Engestrom  wrote:
> On Friday, 2017-03-03 16:23:03 +, Elie Tournier wrote:
>> Signed-off-by: Elie Tournier 
>> ---
>>  src/compiler/glsl/builtin_float64.h | 2558 
>> +++
>>  src/compiler/glsl/builtin_functions.cpp |4 +
>>  src/compiler/glsl/builtin_functions.h   |3 +
>>  src/compiler/glsl/float64.glsl  |  172 +++
>>  4 files changed, 2737 insertions(+)
>>
>> diff --git a/src/compiler/glsl/builtin_float64.h 
>> b/src/compiler/glsl/builtin_float64.h
>> index bf0953e5d6..0a363bd27a 100644
>> --- a/src/compiler/glsl/builtin_float64.h
>> +++ b/src/compiler/glsl/builtin_float64.h
> [snip]
>> +
>> +/* THEN INSTRUCTIONS */
>> +body.instructions = >then_instructions;
>> +
>> +ir_constant_data r0EFD_data;
>> +memset(_data, 0, sizeof(ir_constant_data));
>> +r0EFD_data.u[0] = 4294967295;
>> +r0EFD_data.u[1] = 4294967295;
>
> Looks like some debug/testing left-overs?

No. It's the IR representation of "return uvec2(0xu, 0xu);"
Multiply 0 by Inf (or Inf by 0) is an invalid operation.
So when we have this kind of operation,  we return a NaN.

IEEE 754:
" 7.2 Invalid operation

For operations producing results in floating-point format, the default
result of an operation that signals the
invalid operation exception shall be a quiet NaN that should provide
some diagnostic information. "


>
> [snip]
>> +
>> +   /* THEN INSTRUCTIONS */
>> +   body.instructions = >then_instructions;
>> +
>> +   ir_constant_data r0F26_data;
>> +   memset(_data, 0, sizeof(ir_constant_data));
>> +r0F26_data.u[0] = 4294967295;
>> +r0F26_data.u[1] = 4294967295;
>
> Ditto
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Mauro Rossi
> ok .. now I got some fixes for this here:
>
> https://github.com/tpalli/external-mesa/commits/move_compiler
>
> but yeah .. more work required :/ I don't understand why adding
> MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help.

Hi Emil, Tapani,

The problem is just that the  LOCAL_C_INCLUDES for new library are
missing one line that was present in
src/mesa/drivers/dri/i965/Android.mk, the following one:

   $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl

Mauro
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving i965 driver to genxml commands and structures

2017-03-03 Thread Kenneth Graunke
On Friday, March 3, 2017 10:27:51 AM PST Kenneth Graunke wrote:
> On Friday, March 3, 2017 10:16:57 AM PST Jason Ekstrand wrote:
> > Also, you can find most of the gen4-5 XML here:
> > 
> > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/blorp-gen4
> > 
> > That should save you some typing. :-)
> 
> I typed up Gen4-4.5 XML a few months ago, and Jason fixed a bunch of
> things, but we never landed it as we weren't using it for anything yet.
> I fixed a few more things beyond the above branch, and pushed what we
> have to master.  Hopefully it saves you some effort.
> 
> Gen5 XML is still missing.  The Gen5 docs are in really bad shape,
> so it's probably trickier.
> 
> Also, here's the code I started in case it's useful as a reference
> (it definitely won't apply anymore):
> https://cgit.freedesktop.org/~kwg/mesa/log/?h=brwxml
> 
> and also the code Lionel started a while back, for reference:
> https://github.com/djdeath/mesa/commits/i965-genxml

Also...in case the gen4-4.5 XML I pushed *isn't* useful, and you
already had a better version typed up...feel free to throw it out
and replace it with your copy.

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving i965 driver to genxml commands and structures

2017-03-03 Thread Kenneth Graunke
On Friday, March 3, 2017 10:16:57 AM PST Jason Ekstrand wrote:
> Also, you can find most of the gen4-5 XML here:
> 
> https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/blorp-gen4
> 
> That should save you some typing. :-)

I typed up Gen4-4.5 XML a few months ago, and Jason fixed a bunch of
things, but we never landed it as we weren't using it for anything yet.
I fixed a few more things beyond the above branch, and pushed what we
have to master.  Hopefully it saves you some effort.

Gen5 XML is still missing.  The Gen5 docs are in really bad shape,
so it's probably trickier.

Also, here's the code I started in case it's useful as a reference
(it definitely won't apply anymore):
https://cgit.freedesktop.org/~kwg/mesa/log/?h=brwxml

and also the code Lionel started a while back, for reference:
https://github.com/djdeath/mesa/commits/i965-genxml


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving i965 driver to genxml commands and structures

2017-03-03 Thread Jason Ekstrand
Also, you can find most of the gen4-5 XML here:

https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/blorp-gen4

That should save you some typing. :-)

On Fri, Mar 3, 2017 at 8:57 AM, Jason Ekstrand  wrote:

> Lois-Francis,
>
> Thanks for taking a crack at this!  There are a couple other people (I put
> them in the Cc) who have talked about doing this so thank you for sending
> the announcement!
>
> My primary request as you dive into this would be that you do things in
> such a way that we can have a fairly tight feedback loop.  I think both Ken
> and myself have a pretty clear idea of what things should look like when
> you're done and I'd like to avoid any chance that you send a 50 patch
> series and then get told to go back through the whole thing and make some
> mechanical change.
>
> One other comment: look at blorp.  It's already using genxml with the i965
> driver although there's a but of an abstraction layer there.  It should
> give you a decent idea about how to make a brw_emit() macro by combining
> things and removing the abstraction.
>
> --Jason
>
> Sent with AquaMail for Android
> http://www.aqua-mail.com
>
>
>
> On March 3, 2017 6:41:59 AM Louis-Francis Ratté-Boulianne <
> l...@collabora.com> wrote:
>
> Hi,
>>
>> As to avoid any duplicate work, I want to inform everyone that I'm in
>> the process of modifying the i965 driver so that it uses the same
>> genxml infrastructure that the one used by the Intel Vulkan driver. The
>> task has been proposed by Jason Ekstrand.
>>
>> I will probably post my first patchset today that will contain the
>> basic call to replace the BEGIN_BATCH/ADVANCE_BATCH macros and the XML
>> specification files for generations 4, 4.5 and 5.
>>
>> Let me know if you have any question or suggestion related to this
>> work.
>>
>> --
>> Louis-Francis Ratté-Boulianne
>>
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] util/disk_cache: support caches for multiple architectures

2017-03-03 Thread Alan Swanson
On Fri, 2017-03-03 at 12:24 +1100, Timothy Arceri wrote:
> On 03/03/17 11:53, Marek Olšák wrote:
> > 
> > OK.
> > 
> > I also wonder if 1GB isn't too conservative. Today’s games take up
> > a
> > lot of space. My installed games occupy 480 GB. I could certainly
> > spare 10 GB for a shader cache if it improves gaming experience.
> > For
> > example, my ccache size is set to 27 GB, because 1 or 5 or 10 GB
> > wasn't enough for my use case. I assume some gamers would have a
> > similar attitude.
> 
> Yeah I agree that 1GB is probably too small. This was set by Carl
> before 
> we even knew how much data we needed to cache.
> 
> I'm happy to set it at 4GB which would be a possible 8GB total.
> 
> We may need to cap it at 4GB for some platforms anyway, or at least 
> figure out a work around for this:
> https://bugs.freedesktop.org/show_bug.cgi?id=93089

I wouldn't say that 1G was too small currently as, for example, the
cache for shader heavy DeusEx:MD is ~50M compressed per your commit
message. There is the mythical quote of 640K being enough but how many
games and applications do you need cached at once?

A more relevant issue would then be the random eviction rather than
using LRU eviction.

However perhaps we could dynamically scale by checking statvfs and
quotactl to choose MAX[1G, MIN[10% user home filesystem, 10% user home
quota]]?

-- 
Alan.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 07/11] glsl: Add "built-in" functions to do mul(fp64, fp64)

2017-03-03 Thread Eric Engestrom
On Friday, 2017-03-03 16:23:03 +, Elie Tournier wrote:
> Signed-off-by: Elie Tournier 
> ---
>  src/compiler/glsl/builtin_float64.h | 2558 
> +++
>  src/compiler/glsl/builtin_functions.cpp |4 +
>  src/compiler/glsl/builtin_functions.h   |3 +
>  src/compiler/glsl/float64.glsl  |  172 +++
>  4 files changed, 2737 insertions(+)
> 
> diff --git a/src/compiler/glsl/builtin_float64.h 
> b/src/compiler/glsl/builtin_float64.h
> index bf0953e5d6..0a363bd27a 100644
> --- a/src/compiler/glsl/builtin_float64.h
> +++ b/src/compiler/glsl/builtin_float64.h
[snip]
> +
> +/* THEN INSTRUCTIONS */
> +body.instructions = >then_instructions;
> +
> +ir_constant_data r0EFD_data;
> +memset(_data, 0, sizeof(ir_constant_data));
> +r0EFD_data.u[0] = 4294967295;
> +r0EFD_data.u[1] = 4294967295;

Looks like some debug/testing left-overs?

[snip]
> +
> +   /* THEN INSTRUCTIONS */
> +   body.instructions = >then_instructions;
> +
> +   ir_constant_data r0F26_data;
> +   memset(_data, 0, sizeof(ir_constant_data));
> +r0F26_data.u[0] = 4294967295;
> +r0F26_data.u[1] = 4294967295;

Ditto
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Any updates on threaded GL dispatch?

2017-03-03 Thread gregory hainaut
On Fri, 3 Mar 2017 16:46:24 +0100
Marek Olšák  wrote:

> On Fri, Mar 3, 2017 at 10:19 AM, Timothy Arceri  wrote:
> > On 02/03/17 22:18, Marek Olšák wrote:
> >>
> >> The bad news is my involvement is currently on hold due to other
> >> projects and responsibilities.
> >
> >
> > I can probably spend some time on this. Seems like Gregory has taken care of
> > most of the problems and it just needs someone to push it over the line.
> 
> There are also plenty of unresolved review comments from Emil and others.
> 
> Marek

Yes I only fixed the piglit bad/crash regression on Nouveau. I added 
also the perf optimization for PCSX2 ;) I did a basic test of EGL 
(i.e. PCSX2) and it seems to work.

By the way, I don't know how costly is the remaining synchronization 
but there are 2 potential optimizations
* glUniform for double seems to sync whereas standard float
  are asynchronous. Maybe it misses the scale parameter in XML.
  I guess double was added after glthread. It might worth to check
  the behavior of glProgramUniform (introduce by SSO) too.
* GL3 glClearBuffer functions are synchronous due to the pointer to
  the single pixel value. However there are maybe tricks to find the 
  correct size of the pixel.

Cheers,
Gregory
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)

2017-03-03 Thread Ilia Mirkin
On Fri, Mar 3, 2017 at 11:57 AM, tournier.elie  wrote:
> On 3 March 2017 at 16:29, Ilia Mirkin  wrote:
>> On Fri, Mar 3, 2017 at 11:22 AM, Elie Tournier  
>> wrote:
>>> Signed-off-by: Elie Tournier 
>>> ---
>>>  src/compiler/glsl/builtin_float64.h | 19 +++
>>>  src/compiler/glsl/builtin_functions.cpp |  4 
>>>  src/compiler/glsl/builtin_functions.h   |  3 +++
>>>  src/compiler/glsl/float64.glsl  | 10 ++
>>>  4 files changed, 36 insertions(+)
>>>
>>> diff --git a/src/compiler/glsl/builtin_float64.h 
>>> b/src/compiler/glsl/builtin_float64.h
>>> index c1ec89d210..6df91e10f5 100644
>>> --- a/src/compiler/glsl/builtin_float64.h
>>> +++ b/src/compiler/glsl/builtin_float64.h
>>> @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail)
>>> sig->replace_parameters(_parameters);
>>> return sig;
>>>  }
>>> +ir_function_signature *
>>> +fneg64(void *mem_ctx, builtin_available_predicate avail)
>>> +{
>>> +   ir_function_signature *const sig =
>>> +  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
>>> +   ir_factory body(>body, mem_ctx);
>>> +   sig->is_defined = true;
>>> +
>>> +   exec_list sig_parameters;
>>> +
>>> +   ir_variable *const r000C = new(mem_ctx) 
>>> ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in);
>>> +   sig_parameters.push_tail(r000C);
>>> +   body.emit(assign(r000C, bit_xor(swizzle_x(r000C), 
>>> body.constant(2147483648u)), 0x01));
>>> +
>>> +   body.emit(ret(r000C));
>>> +
>>> +   sig->replace_parameters(_parameters);
>>> +   return sig;
>>> +}
>>> diff --git a/src/compiler/glsl/builtin_functions.cpp 
>>> b/src/compiler/glsl/builtin_functions.cpp
>>> index b0b1781725..a189b84190 100644
>>> --- a/src/compiler/glsl/builtin_functions.cpp
>>> +++ b/src/compiler/glsl/builtin_functions.cpp
>>> @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins()
>>>  generate_ir::fabs64(mem_ctx, integer_functions_supported),
>>>  NULL);
>>>
>>> +   add_function("__builtin_fneg64",
>>> +generate_ir::fneg64(mem_ctx, integer_functions_supported),
>>> +NULL);
>>> +
>>>  #undef F
>>>  #undef FI
>>>  #undef FIUD_VEC
>>> diff --git a/src/compiler/glsl/builtin_functions.h 
>>> b/src/compiler/glsl/builtin_functions.h
>>> index abe02d97b6..37c6cc33c2 100644
>>> --- a/src/compiler/glsl/builtin_functions.h
>>> +++ b/src/compiler/glsl/builtin_functions.h
>>> @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail);
>>>  ir_function_signature *
>>>  fabs64(void *mem_ctx, builtin_available_predicate avail);
>>>
>>> +ir_function_signature *
>>> +fneg64(void *mem_ctx, builtin_available_predicate avail);
>>> +
>>>  }
>>>
>>>  #endif /* BULITIN_FUNCTIONS_H */
>>> diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
>>> index b8f0c2e444..82875e9407 100644
>>> --- a/src/compiler/glsl/float64.glsl
>>> +++ b/src/compiler/glsl/float64.glsl
>>> @@ -26,3 +26,13 @@ fabs64( uvec2 a )
>>>  a.x &= 0x7FFFu;
>>>  return a;
>>>  }
>>> +
>>> +/* Negate value of a Float64 :
>>> + * Toggle the sign bit
>>> + */
>>> +uvec2
>>> +fneg64( uvec2 a )
>>> +{
>>> +a.x ^= (1u<<31);
>>
>> Is this right for NaN? Presumably neg(NaN) should == NaN.
>
> The IEEE 754 standard say :
>
> " 6.3 The sign bit
>
> When either an input or result is NaN, this standard does not
> interpret the sign of a NaN. Note, however,
> that operations on bit strings copy, negate, abs, copySign specify the
> sign bit of a NaN result,
> sometimes based upon the sign bit of a NaN operand. The logical
> predicate totalOrder is also affected by
> the sign bit of a NaN operand. For all other operations, this standard
> does not specify the sign bit of a NaN
> result, even when there is only one input NaN, or when the NaN is
> produced from an invalid operation. "
>
> So neg(NaN) == NaN

Right ... I guess I meant that there's a certain amount of
normalization that's required to be applied to results of floating
point operations. I was wondering if the sign bit of a NaN had to be
cleared, so that it would be in canonical form. (Just like you'd set
all of the mantissa bits, even though setting any of them results in a
NaN when exp == 0x7ff.) You've been digging in the FP64 standard a lot
more than I have, so if you think it's acceptable to have a fp64
function return a NaN with the sign bit set, that's fine by me.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving i965 driver to genxml commands and structures

2017-03-03 Thread Jason Ekstrand

Lois-Francis,

Thanks for taking a crack at this!  There are a couple other people (I put 
them in the Cc) who have talked about doing this so thank you for sending 
the announcement!


My primary request as you dive into this would be that you do things in 
such a way that we can have a fairly tight feedback loop.  I think both Ken 
and myself have a pretty clear idea of what things should look like when 
you're done and I'd like to avoid any chance that you send a 50 patch 
series and then get told to go back through the whole thing and make some 
mechanical change.


One other comment: look at blorp.  It's already using genxml with the i965 
driver although there's a but of an abstraction layer there.  It should 
give you a decent idea about how to make a brw_emit() macro by combining 
things and removing the abstraction.


--Jason

Sent with AquaMail for Android
http://www.aqua-mail.com


On March 3, 2017 6:41:59 AM Louis-Francis Ratté-Boulianne 
 wrote:



Hi,

As to avoid any duplicate work, I want to inform everyone that I'm in
the process of modifying the i965 driver so that it uses the same
genxml infrastructure that the one used by the Intel Vulkan driver. The
task has been proposed by Jason Ekstrand. 

I will probably post my first patchset today that will contain the
basic call to replace the BEGIN_BATCH/ADVANCE_BATCH macros and the XML
specification files for generations 4, 4.5 and 5.

Let me know if you have any question or suggestion related to this
work.

--
Louis-Francis Ratté-Boulianne



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)

2017-03-03 Thread tournier.elie
On 3 March 2017 at 16:29, Ilia Mirkin  wrote:
> On Fri, Mar 3, 2017 at 11:22 AM, Elie Tournier  
> wrote:
>> Signed-off-by: Elie Tournier 
>> ---
>>  src/compiler/glsl/builtin_float64.h | 19 +++
>>  src/compiler/glsl/builtin_functions.cpp |  4 
>>  src/compiler/glsl/builtin_functions.h   |  3 +++
>>  src/compiler/glsl/float64.glsl  | 10 ++
>>  4 files changed, 36 insertions(+)
>>
>> diff --git a/src/compiler/glsl/builtin_float64.h 
>> b/src/compiler/glsl/builtin_float64.h
>> index c1ec89d210..6df91e10f5 100644
>> --- a/src/compiler/glsl/builtin_float64.h
>> +++ b/src/compiler/glsl/builtin_float64.h
>> @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail)
>> sig->replace_parameters(_parameters);
>> return sig;
>>  }
>> +ir_function_signature *
>> +fneg64(void *mem_ctx, builtin_available_predicate avail)
>> +{
>> +   ir_function_signature *const sig =
>> +  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
>> +   ir_factory body(>body, mem_ctx);
>> +   sig->is_defined = true;
>> +
>> +   exec_list sig_parameters;
>> +
>> +   ir_variable *const r000C = new(mem_ctx) 
>> ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in);
>> +   sig_parameters.push_tail(r000C);
>> +   body.emit(assign(r000C, bit_xor(swizzle_x(r000C), 
>> body.constant(2147483648u)), 0x01));
>> +
>> +   body.emit(ret(r000C));
>> +
>> +   sig->replace_parameters(_parameters);
>> +   return sig;
>> +}
>> diff --git a/src/compiler/glsl/builtin_functions.cpp 
>> b/src/compiler/glsl/builtin_functions.cpp
>> index b0b1781725..a189b84190 100644
>> --- a/src/compiler/glsl/builtin_functions.cpp
>> +++ b/src/compiler/glsl/builtin_functions.cpp
>> @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins()
>>  generate_ir::fabs64(mem_ctx, integer_functions_supported),
>>  NULL);
>>
>> +   add_function("__builtin_fneg64",
>> +generate_ir::fneg64(mem_ctx, integer_functions_supported),
>> +NULL);
>> +
>>  #undef F
>>  #undef FI
>>  #undef FIUD_VEC
>> diff --git a/src/compiler/glsl/builtin_functions.h 
>> b/src/compiler/glsl/builtin_functions.h
>> index abe02d97b6..37c6cc33c2 100644
>> --- a/src/compiler/glsl/builtin_functions.h
>> +++ b/src/compiler/glsl/builtin_functions.h
>> @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail);
>>  ir_function_signature *
>>  fabs64(void *mem_ctx, builtin_available_predicate avail);
>>
>> +ir_function_signature *
>> +fneg64(void *mem_ctx, builtin_available_predicate avail);
>> +
>>  }
>>
>>  #endif /* BULITIN_FUNCTIONS_H */
>> diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
>> index b8f0c2e444..82875e9407 100644
>> --- a/src/compiler/glsl/float64.glsl
>> +++ b/src/compiler/glsl/float64.glsl
>> @@ -26,3 +26,13 @@ fabs64( uvec2 a )
>>  a.x &= 0x7FFFu;
>>  return a;
>>  }
>> +
>> +/* Negate value of a Float64 :
>> + * Toggle the sign bit
>> + */
>> +uvec2
>> +fneg64( uvec2 a )
>> +{
>> +a.x ^= (1u<<31);
>
> Is this right for NaN? Presumably neg(NaN) should == NaN.

The IEEE 754 standard say :

" 6.3 The sign bit

When either an input or result is NaN, this standard does not
interpret the sign of a NaN. Note, however,
that operations on bit strings copy, negate, abs, copySign specify the
sign bit of a NaN result,
sometimes based upon the sign bit of a NaN operand. The logical
predicate totalOrder is also affected by
the sign bit of a NaN operand. For all other operations, this standard
does not specify the sign bit of a NaN
result, even when there is only one input NaN, or when the NaN is
produced from an invalid operation. "

So neg(NaN) == NaN

>
>> +return a;
>> +}
>> --
>> 2.11.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] swr: implement geometry shaders

2017-03-03 Thread Cherniak, Bruce
Set looks great with a couple suggestions:

* In swr_update_derived() it seems like the GS should also depend on SWR_NEW_VS 
dirty since swr_generate_gs_key() references ctx->vs->info.base.

*In swr_context.h the SWR_NEW_ALL mask should be expanded by 2-bits to account 
for the new dirty flags.  Or, just remove SWR_NEW_ALL since it’s a construct we 
never used.

Bruce


> On Mar 2, 2017, at 7:17 PM, Tim Rowley  wrote:
> 
> ---
> src/gallium/drivers/swr/swr_context.cpp|   2 +-
> src/gallium/drivers/swr/swr_context.h  |  25 +-
> src/gallium/drivers/swr/swr_draw.cpp   |  50 +--
> src/gallium/drivers/swr/swr_fence_work.cpp |  21 ++
> src/gallium/drivers/swr/swr_fence_work.h   |   3 +
> src/gallium/drivers/swr/swr_scratch.cpp|   1 +
> src/gallium/drivers/swr/swr_scratch.h  |   1 +
> src/gallium/drivers/swr/swr_screen.cpp |   6 +-
> src/gallium/drivers/swr/swr_shader.cpp | 475 -
> src/gallium/drivers/swr/swr_shader.h   |  22 ++
> src/gallium/drivers/swr/swr_state.cpp  |  99 +-
> src/gallium/drivers/swr/swr_state.h|  50 +++
> src/gallium/drivers/swr/swr_tex_sample.cpp |   6 +
> 13 files changed, 699 insertions(+), 62 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_context.cpp 
> b/src/gallium/drivers/swr/swr_context.cpp
> index b89ce1b..1c98ac2 100644
> --- a/src/gallium/drivers/swr/swr_context.cpp
> +++ b/src/gallium/drivers/swr/swr_context.cpp
> @@ -307,7 +307,7 @@ swr_blit(struct pipe_context *pipe, const struct 
> pipe_blit_info *blit_info)
>util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
>util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
>util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
> -   /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/
> +   util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);
>util_blitter_save_so_targets(
>   ctx->blitter,
>   ctx->num_so_targets,
> diff --git a/src/gallium/drivers/swr/swr_context.h 
> b/src/gallium/drivers/swr/swr_context.h
> index 04e11fe..624e069 100644
> --- a/src/gallium/drivers/swr/swr_context.h
> +++ b/src/gallium/drivers/swr/swr_context.h
> @@ -40,15 +40,17 @@
> #define SWR_NEW_SAMPLER_VIEW (1 << 4)
> #define SWR_NEW_VS (1 << 5)
> #define SWR_NEW_FS (1 << 6)
> -#define SWR_NEW_VSCONSTANTS (1 << 7)
> -#define SWR_NEW_FSCONSTANTS (1 << 8)
> -#define SWR_NEW_VERTEX (1 << 9)
> -#define SWR_NEW_STIPPLE (1 << 10)
> -#define SWR_NEW_SCISSOR (1 << 11)
> -#define SWR_NEW_VIEWPORT (1 << 12)
> -#define SWR_NEW_FRAMEBUFFER (1 << 13)
> -#define SWR_NEW_CLIP (1 << 14)
> -#define SWR_NEW_SO (1 << 15)
> +#define SWR_NEW_GS (1 << 7)
> +#define SWR_NEW_VSCONSTANTS (1 << 8)
> +#define SWR_NEW_FSCONSTANTS (1 << 9)
> +#define SWR_NEW_GSCONSTANTS (1 << 10)
> +#define SWR_NEW_VERTEX (1 << 11)
> +#define SWR_NEW_STIPPLE (1 << 12)
> +#define SWR_NEW_SCISSOR (1 << 13)
> +#define SWR_NEW_VIEWPORT (1 << 14)
> +#define SWR_NEW_FRAMEBUFFER (1 << 15)
> +#define SWR_NEW_CLIP (1 << 16)
> +#define SWR_NEW_SO (1 << 17)
> #define SWR_NEW_ALL 0x
> 
> namespace std
> @@ -85,11 +87,15 @@ struct swr_draw_context {
>uint32_t num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS];
>const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS];
>uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS];
> +   const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS];
> +   uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS];
> 
>swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS];
>swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
> +   swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
> +   swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS];
> 
>float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
> 
> @@ -112,6 +118,7 @@ struct swr_context {
> 
>struct swr_vertex_shader *vs;
>struct swr_fragment_shader *fs;
> +   struct swr_geometry_shader *gs;
>struct swr_vertex_element_state *velems;
> 
>/** Other rendering state */
> diff --git a/src/gallium/drivers/swr/swr_draw.cpp 
> b/src/gallium/drivers/swr/swr_draw.cpp
> index f764efe..c43f4a5 100644
> --- a/src/gallium/drivers/swr/swr_draw.cpp
> +++ b/src/gallium/drivers/swr/swr_draw.cpp
> @@ -32,48 +32,6 @@
> #include "util/u_prim.h"
> 
> /*
> - * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY
> - */
> -static INLINE enum PRIMITIVE_TOPOLOGY
> -swr_convert_prim_topology(const unsigned mode)
> -{
> -   switch (mode) {
> -   case PIPE_PRIM_POINTS:
> -  return TOP_POINT_LIST;
> -   case PIPE_PRIM_LINES:
> -  return TOP_LINE_LIST;
> -   case PIPE_PRIM_LINE_LOOP:
> -  return TOP_LINE_LOOP;
> -   case PIPE_PRIM_LINE_STRIP:
> -  return TOP_LINE_STRIP;
> -   case PIPE_PRIM_TRIANGLES:
> -  return TOP_TRIANGLE_LIST;
> -   case PIPE_PRIM_TRIANGLE_STRIP:
> -  return 

Re: [Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)

2017-03-03 Thread Ilia Mirkin
On Fri, Mar 3, 2017 at 11:22 AM, Elie Tournier  wrote:
> Signed-off-by: Elie Tournier 
> ---
>  src/compiler/glsl/builtin_float64.h | 19 +++
>  src/compiler/glsl/builtin_functions.cpp |  4 
>  src/compiler/glsl/builtin_functions.h   |  3 +++
>  src/compiler/glsl/float64.glsl  | 10 ++
>  4 files changed, 36 insertions(+)
>
> diff --git a/src/compiler/glsl/builtin_float64.h 
> b/src/compiler/glsl/builtin_float64.h
> index c1ec89d210..6df91e10f5 100644
> --- a/src/compiler/glsl/builtin_float64.h
> +++ b/src/compiler/glsl/builtin_float64.h
> @@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail)
> sig->replace_parameters(_parameters);
> return sig;
>  }
> +ir_function_signature *
> +fneg64(void *mem_ctx, builtin_available_predicate avail)
> +{
> +   ir_function_signature *const sig =
> +  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
> +   ir_factory body(>body, mem_ctx);
> +   sig->is_defined = true;
> +
> +   exec_list sig_parameters;
> +
> +   ir_variable *const r000C = new(mem_ctx) 
> ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in);
> +   sig_parameters.push_tail(r000C);
> +   body.emit(assign(r000C, bit_xor(swizzle_x(r000C), 
> body.constant(2147483648u)), 0x01));
> +
> +   body.emit(ret(r000C));
> +
> +   sig->replace_parameters(_parameters);
> +   return sig;
> +}
> diff --git a/src/compiler/glsl/builtin_functions.cpp 
> b/src/compiler/glsl/builtin_functions.cpp
> index b0b1781725..a189b84190 100644
> --- a/src/compiler/glsl/builtin_functions.cpp
> +++ b/src/compiler/glsl/builtin_functions.cpp
> @@ -3133,6 +3133,10 @@ builtin_builder::create_builtins()
>  generate_ir::fabs64(mem_ctx, integer_functions_supported),
>  NULL);
>
> +   add_function("__builtin_fneg64",
> +generate_ir::fneg64(mem_ctx, integer_functions_supported),
> +NULL);
> +
>  #undef F
>  #undef FI
>  #undef FIUD_VEC
> diff --git a/src/compiler/glsl/builtin_functions.h 
> b/src/compiler/glsl/builtin_functions.h
> index abe02d97b6..37c6cc33c2 100644
> --- a/src/compiler/glsl/builtin_functions.h
> +++ b/src/compiler/glsl/builtin_functions.h
> @@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail);
>  ir_function_signature *
>  fabs64(void *mem_ctx, builtin_available_predicate avail);
>
> +ir_function_signature *
> +fneg64(void *mem_ctx, builtin_available_predicate avail);
> +
>  }
>
>  #endif /* BULITIN_FUNCTIONS_H */
> diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
> index b8f0c2e444..82875e9407 100644
> --- a/src/compiler/glsl/float64.glsl
> +++ b/src/compiler/glsl/float64.glsl
> @@ -26,3 +26,13 @@ fabs64( uvec2 a )
>  a.x &= 0x7FFFu;
>  return a;
>  }
> +
> +/* Negate value of a Float64 :
> + * Toggle the sign bit
> + */
> +uvec2
> +fneg64( uvec2 a )
> +{
> +a.x ^= (1u<<31);

Is this right for NaN? Presumably neg(NaN) should == NaN.

> +return a;
> +}
> --
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 10/11] glsl: Add "built-in" functions to do fp64_to_fp32(fp64)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 589 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  | 133 
 4 files changed, 729 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index b50ebc2dc2..dad5811289 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -23652,3 +23652,592 @@ fp32_to_fp64(void *mem_ctx, 
builtin_available_predicate avail)
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+packFloat32(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r13A7 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_function_in);
+   sig_parameters.push_tail(r13A7);
+   ir_variable *const r13A8 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zExp", ir_var_function_in);
+   sig_parameters.push_tail(r13A8);
+   ir_variable *const r13A9 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFrac", ir_var_function_in);
+   sig_parameters.push_tail(r13A9);
+   ir_expression *const r13AA = lshift(r13A7, body.constant(int(31)));
+   ir_expression *const r13AB = lshift(r13A8, body.constant(int(23)));
+   ir_expression *const r13AC = add(r13AA, r13AB);
+   ir_expression *const r13AD = add(r13AC, r13A9);
+   body.emit(ret(r13AD));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+shift32RightJamming(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::void_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r13AE = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r13AE);
+   ir_variable *const r13AF = new(mem_ctx) ir_variable(glsl_type::int_type, 
"count", ir_var_function_in);
+   sig_parameters.push_tail(r13AF);
+   ir_variable *const r13B0 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zPtr", ir_var_function_inout);
+   sig_parameters.push_tail(r13B0);
+   ir_variable *const r13B1 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"z", ir_var_auto);
+   body.emit(r13B1);
+   /* IF CONDITION */
+   ir_expression *const r13B3 = equal(r13AF, body.constant(int(0)));
+   ir_if *f13B2 = new(mem_ctx) ir_if(operand(r13B3).val);
+   exec_list *const f13B2_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = >then_instructions;
+
+  body.emit(assign(r13B1, r13AE, 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = >else_instructions;
+
+  /* IF CONDITION */
+  ir_expression *const r13B5 = less(r13AF, body.constant(int(32)));
+  ir_if *f13B4 = new(mem_ctx) ir_if(operand(r13B5).val);
+  exec_list *const f13B4_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = >then_instructions;
+
+ ir_expression *const r13B6 = rshift(r13AE, r13AF);
+ ir_expression *const r13B7 = neg(r13AF);
+ ir_expression *const r13B8 = bit_and(r13B7, body.constant(int(31)));
+ ir_expression *const r13B9 = lshift(r13AE, r13B8);
+ ir_expression *const r13BA = nequal(r13B9, body.constant(0u));
+ ir_expression *const r13BB = expr(ir_unop_b2i, r13BA);
+ ir_expression *const r13BC = expr(ir_unop_i2u, r13BB);
+ body.emit(assign(r13B1, bit_or(r13B6, r13BC), 0x01));
+
+
+ /* ELSE INSTRUCTIONS */
+ body.instructions = >else_instructions;
+
+ ir_expression *const r13BD = nequal(r13AE, body.constant(0u));
+ ir_expression *const r13BE = expr(ir_unop_b2i, r13BD);
+ body.emit(assign(r13B1, expr(ir_unop_i2u, r13BE), 0x01));
+
+
+  body.instructions = f13B4_parent_instructions;
+  body.emit(f13B4);
+
+  /* END IF */
+
+
+   body.instructions = f13B2_parent_instructions;
+   body.emit(f13B2);
+
+   /* END IF */
+
+   body.emit(assign(r13B0, r13B1, 0x01));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+roundAndPackFloat32(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r13BF = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zSign", ir_var_function_in);
+   sig_parameters.push_tail(r13BF);
+   ir_variable *const r13C0 = 

[Mesa-dev] [RFC 11/11] glsl: Add fp64 functions to the parser.

2017-03-03 Thread Elie Tournier
Like we use two integers to store our fp64, the functions are available with
MESA_shader_integer_functions.

Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index e113253061..84dfc81c80 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2349,6 +2349,16 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  add_builtin_define(parser, "__have_builtin_builtin_umod64", 1);
  add_builtin_define(parser, "__have_builtin_builtin_idiv64", 1);
  add_builtin_define(parser, "__have_builtin_builtin_imod64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fabs64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fneg64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_feq64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fle64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_flt64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fadd64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fmul64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fdiv64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fp32_to_fp64", 1);
+ add_builtin_define(parser, "__have_builtin_builtin_fp64_to_fp32", 1);
   }
}
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] i965: Add script to gen code for OA counter queries

2017-03-03 Thread Emil Velikov
On 2 March 2017 at 21:25, Robert Bragg  wrote:
>
>
> On Mar 2, 2017 7:32 PM, "Emil Velikov"  wrote:
>
> On 2 March 2017 at 18:58, Robert Bragg  wrote:
>> Adds R/b from Dylan and Makefile fixups from Emil, including fixing race
>> with
>> parallel make builds (thanks). Just holding fast on the use of #pragma
>> once
>> though :-)
>>
> I think you want the "required=True" for all the fields, but feel free
> to do at a later stage.
>
>
> The header and code args are checked like:
>
>> if args.header:
>>header_file = open(args.header, 'w')
>>
>> if args.code:
>>c_file = open(args.code, 'w')
>
Mostly food for thought, feel free to _not_ reply.

- why would we want to generate only the header and not the source
file or vise-versa?
- we can call the generator with no output files - i.e. both --header
and --code can be empty, right ?
- what is going to happen if we do not pass the xml filename as an arg ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 02/11] glsl: Add "built-in" function to do neg(fp64)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 19 +++
 src/compiler/glsl/builtin_functions.cpp |  4 
 src/compiler/glsl/builtin_functions.h   |  3 +++
 src/compiler/glsl/float64.glsl  | 10 ++
 4 files changed, 36 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index c1ec89d210..6df91e10f5 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -17,3 +17,22 @@ fabs64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+fneg64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r000C);
+   body.emit(assign(r000C, bit_xor(swizzle_x(r000C), 
body.constant(2147483648u)), 0x01));
+
+   body.emit(ret(r000C));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index b0b1781725..a189b84190 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3133,6 +3133,10 @@ builtin_builder::create_builtins()
 generate_ir::fabs64(mem_ctx, integer_functions_supported),
 NULL);
 
+   add_function("__builtin_fneg64",
+generate_ir::fneg64(mem_ctx, integer_functions_supported),
+NULL);
+
 #undef F
 #undef FI
 #undef FIUD_VEC
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index abe02d97b6..37c6cc33c2 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -66,6 +66,9 @@ sign64(void *mem_ctx, builtin_available_predicate avail);
 ir_function_signature *
 fabs64(void *mem_ctx, builtin_available_predicate avail);
 
+ir_function_signature *
+fneg64(void *mem_ctx, builtin_available_predicate avail);
+
 }
 
 #endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index b8f0c2e444..82875e9407 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -26,3 +26,13 @@ fabs64( uvec2 a )
 a.x &= 0x7FFFu;
 return a;
 }
+
+/* Negate value of a Float64 :
+ * Toggle the sign bit
+ */
+uvec2
+fneg64( uvec2 a )
+{
+a.x ^= (1u<<31);
+return a;
+}
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 04/11] glsl: Add "built-in" functions to do le(fp64, fp64)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 179 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  52 ++
 4 files changed, 238 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index e614374d75..f8ceacdabf 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -155,3 +155,182 @@ feq64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+extractFloat64Sign(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r002D = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r002D);
+   ir_expression *const r002E = rshift(swizzle_x(r002D), 
body.constant(int(31)));
+   body.emit(ret(r002E));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+le64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r002F = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a0", ir_var_function_in);
+   sig_parameters.push_tail(r002F);
+   ir_variable *const r0030 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a1", ir_var_function_in);
+   sig_parameters.push_tail(r0030);
+   ir_variable *const r0031 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b0", ir_var_function_in);
+   sig_parameters.push_tail(r0031);
+   ir_variable *const r0032 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b1", ir_var_function_in);
+   sig_parameters.push_tail(r0032);
+   ir_expression *const r0033 = less(r002F, r0031);
+   ir_expression *const r0034 = equal(r002F, r0031);
+   ir_expression *const r0035 = lequal(r0030, r0032);
+   ir_expression *const r0036 = logic_and(r0034, r0035);
+   ir_expression *const r0037 = logic_or(r0033, r0036);
+   body.emit(ret(r0037));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+fle64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0038 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0038);
+   ir_variable *const r0039 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r0039);
+   ir_variable *const r003A = body.make_temp(glsl_type::bool_type, 
"return_value");
+   ir_variable *const r003B = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isbNaN", ir_var_auto);
+   body.emit(r003B);
+   ir_variable *const r003C = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isaNaN", ir_var_auto);
+   body.emit(r003C);
+   ir_variable *const r003D = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r003D, bit_and(swizzle_x(r0038), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r003D, swizzle_y(r0038), 0x02));
+
+   ir_variable *const r003E = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r003E, bit_and(swizzle_x(r0039), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r003E, swizzle_y(r0039), 0x02));
+
+   ir_expression *const r003F = rshift(swizzle_x(r0038), 
body.constant(int(20)));
+   ir_expression *const r0040 = bit_and(r003F, body.constant(2047u));
+   ir_expression *const r0041 = equal(r0040, body.constant(2047u));
+   ir_expression *const r0042 = bit_or(swizzle_x(r003D), swizzle_y(r0038));
+   ir_expression *const r0043 = nequal(r0042, body.constant(0u));
+   body.emit(assign(r003C, logic_and(r0041, r0043), 0x01));
+
+   ir_expression *const r0044 = rshift(swizzle_x(r0039), 
body.constant(int(20)));
+   ir_expression *const r0045 = bit_and(r0044, body.constant(2047u));
+   ir_expression *const r0046 = equal(r0045, body.constant(2047u));
+   ir_expression *const r0047 = bit_or(swizzle_x(r003E), swizzle_y(r0039));
+   ir_expression *const r0048 = nequal(r0047, body.constant(0u));
+   body.emit(assign(r003B, logic_and(r0046, r0048), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r004A = logic_or(r003C, r003B);
+   ir_if *f0049 = new(mem_ctx) ir_if(operand(r004A).val);
+   exec_list *const f0049_parent_instructions = body.instructions;
+
+  /* 

[Mesa-dev] [RFC 03/11] glsl: Add "built-in" functions to do eq(fp64, fp64)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 119 
 src/compiler/glsl/builtin_functions.cpp |   4 ++
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  41 +++
 4 files changed, 167 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 6df91e10f5..e614374d75 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -36,3 +36,122 @@ fneg64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+extractFloat64Frac(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r000D = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r000D);
+   ir_variable *const r000E = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r000E, bit_and(swizzle_x(r000D), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r000E, swizzle_y(r000D), 0x02));
+
+   body.emit(ret(r000E));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+extractFloat64Exp(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uint_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r000F = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r000F);
+   ir_expression *const r0010 = rshift(swizzle_x(r000F), 
body.constant(int(20)));
+   ir_expression *const r0011 = bit_and(r0010, body.constant(2047u));
+   body.emit(ret(r0011));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+feq64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0012 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0012);
+   ir_variable *const r0013 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r0013);
+   ir_variable *const r0014 = body.make_temp(glsl_type::bool_type, 
"return_value");
+   ir_variable *const r0015 = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isbNaN", ir_var_auto);
+   body.emit(r0015);
+   ir_variable *const r0016 = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isaNaN", ir_var_auto);
+   body.emit(r0016);
+   ir_variable *const r0017 = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r0017, bit_and(swizzle_x(r0012), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r0017, swizzle_y(r0012), 0x02));
+
+   ir_variable *const r0018 = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r0018, bit_and(swizzle_x(r0013), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r0018, swizzle_y(r0013), 0x02));
+
+   ir_expression *const r0019 = rshift(swizzle_x(r0012), 
body.constant(int(20)));
+   ir_expression *const r001A = bit_and(r0019, body.constant(2047u));
+   ir_expression *const r001B = equal(r001A, body.constant(2047u));
+   ir_expression *const r001C = bit_or(swizzle_x(r0017), swizzle_y(r0012));
+   ir_expression *const r001D = nequal(r001C, body.constant(0u));
+   body.emit(assign(r0016, logic_and(r001B, r001D), 0x01));
+
+   ir_expression *const r001E = rshift(swizzle_x(r0013), 
body.constant(int(20)));
+   ir_expression *const r001F = bit_and(r001E, body.constant(2047u));
+   ir_expression *const r0020 = equal(r001F, body.constant(2047u));
+   ir_expression *const r0021 = bit_or(swizzle_x(r0018), swizzle_y(r0013));
+   ir_expression *const r0022 = nequal(r0021, body.constant(0u));
+   body.emit(assign(r0015, logic_and(r0020, r0022), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0024 = logic_or(r0016, r0015);
+   ir_if *f0023 = new(mem_ctx) ir_if(operand(r0024).val);
+   exec_list *const f0023_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = >then_instructions;
+
+  body.emit(assign(r0014, body.constant(false), 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = >else_instructions;
+
+  ir_expression *const r0025 = equal(swizzle_y(r0012), swizzle_y(r0013));
+  ir_expression *const r0026 = equal(swizzle_x(r0012), swizzle_x(r0013));
+  

[Mesa-dev] [RFC 00/11] GL_ARB_gpu_shader_fp64

2017-03-03 Thread Elie Tournier
From: Elie Tournier 

This series is based on Ian's work about GL_ARB_gpu_shader_int64 [1].
The goal is to expose GL_ARB_shader_fp64 to OpenGL 3.0 GPUs.

Each function can be independently tested using shader_runner from piglit.
The piglit files are stored on github [2].

[1] https://lists.freedesktop.org/archives/mesa-dev/2016-November/136718.html
[2] https://github.com/Hopetech/libSoftFloat

Elie Tournier (11):
  glsl: Add "built-in" function to do abs(fp64)
  glsl: Add "built-in" function to do neg(fp64)
  glsl: Add "built-in" functions to do eq(fp64,fp64)
  glsl: Add "built-in" functions to do le(fp64,fp64)
  glsl: Add "built-in" functions to do lt(fp64,fp64)
  glsl: Add "built-in" functions to do add(fp64,fp64)
  glsl: Add "built-in" functions to do mul(fp64,fp64)
  glsl: Add "built-in" functions to do div(fp64,fp64)
  glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
  glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
  glsl: Add fp64 functions to the parser.

 src/compiler/Makefile.sources   | 1 +
 src/compiler/glsl/builtin_float64.h | 24243 ++
 src/compiler/glsl/builtin_functions.cpp |40 +
 src/compiler/glsl/builtin_functions.h   |30 +
 src/compiler/glsl/float64.glsl  |  1378 ++
 src/compiler/glsl/generate_ir.cpp   | 1 +
 src/compiler/glsl/glcpp/glcpp-parse.y   |10 +
 7 files changed, 25703 insertions(+)
 create mode 100644 src/compiler/glsl/builtin_float64.h
 create mode 100644 src/compiler/glsl/float64.glsl

-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 09/11] glsl: Add "built-in" functions to do fp32_to_fp64(fp32)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 490 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  77 +
 4 files changed, 574 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index a795d404c1..b50ebc2dc2 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -23162,3 +23162,493 @@ r1189_data.u[1] = 4294967295;
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+normalizeFloat32Subnormal(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::void_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r1354 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFrac", ir_var_function_in);
+   sig_parameters.push_tail(r1354);
+   ir_variable *const r1355 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zExpPtr", ir_var_function_inout);
+   sig_parameters.push_tail(r1355);
+   ir_variable *const r1356 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"zFracPtr", ir_var_function_inout);
+   sig_parameters.push_tail(r1356);
+   ir_variable *const r1357 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"shiftCount", ir_var_auto);
+   body.emit(r1357);
+   ir_variable *const r1358 = body.make_temp(glsl_type::uint_type, "a");
+   body.emit(assign(r1358, r1354, 0x01));
+
+   ir_variable *const r1359 = body.make_temp(glsl_type::uint_type, 
"return_value");
+   ir_variable *const r135A = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"shiftCount", ir_var_auto);
+   body.emit(r135A);
+   /* IF CONDITION */
+   ir_expression *const r135C = equal(r1354, body.constant(0u));
+   ir_if *f135B = new(mem_ctx) ir_if(operand(r135C).val);
+   exec_list *const f135B_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = >then_instructions;
+
+  body.emit(assign(r1359, body.constant(32u), 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = >else_instructions;
+
+  body.emit(assign(r135A, body.constant(0u), 0x01));
+
+  /* IF CONDITION */
+  ir_expression *const r135E = bit_and(r1354, body.constant(4294901760u));
+  ir_expression *const r135F = equal(r135E, body.constant(0u));
+  ir_if *f135D = new(mem_ctx) ir_if(operand(r135F).val);
+  exec_list *const f135D_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = >then_instructions;
+
+ body.emit(assign(r135A, body.constant(16u), 0x01));
+
+ body.emit(assign(r1358, lshift(r1354, body.constant(int(16))), 0x01));
+
+
+  body.instructions = f135D_parent_instructions;
+  body.emit(f135D);
+
+  /* END IF */
+
+  /* IF CONDITION */
+  ir_expression *const r1361 = bit_and(r1358, body.constant(4278190080u));
+  ir_expression *const r1362 = equal(r1361, body.constant(0u));
+  ir_if *f1360 = new(mem_ctx) ir_if(operand(r1362).val);
+  exec_list *const f1360_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = >then_instructions;
+
+ body.emit(assign(r135A, add(r135A, body.constant(8u)), 0x01));
+
+ body.emit(assign(r1358, lshift(r1358, body.constant(int(8))), 0x01));
+
+
+  body.instructions = f1360_parent_instructions;
+  body.emit(f1360);
+
+  /* END IF */
+
+  /* IF CONDITION */
+  ir_expression *const r1364 = bit_and(r1358, body.constant(4026531840u));
+  ir_expression *const r1365 = equal(r1364, body.constant(0u));
+  ir_if *f1363 = new(mem_ctx) ir_if(operand(r1365).val);
+  exec_list *const f1363_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = >then_instructions;
+
+ body.emit(assign(r135A, add(r135A, body.constant(4u)), 0x01));
+
+ body.emit(assign(r1358, lshift(r1358, body.constant(int(4))), 0x01));
+
+
+  body.instructions = f1363_parent_instructions;
+  body.emit(f1363);
+
+  /* END IF */
+
+  /* IF CONDITION */
+  ir_expression *const r1367 = bit_and(r1358, body.constant(3221225472u));
+  ir_expression *const r1368 = equal(r1367, body.constant(0u));
+  ir_if *f1366 = new(mem_ctx) ir_if(operand(r1368).val);
+  exec_list *const f1366_parent_instructions = body.instructions;
+
+ /* THEN INSTRUCTIONS */
+ body.instructions = >then_instructions;
+
+ body.emit(assign(r135A, add(r135A, body.constant(2u)), 0x01));
+
+ body.emit(assign(r1358, lshift(r1358, body.constant(int(2))), 0x01));
+
+
+  body.instructions = f1366_parent_instructions;
+  body.emit(f1366);
+
+  /* END 

[Mesa-dev] [RFC 05/11] glsl: Add "built-in" functions to do lt(fp64, fp64)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/glsl/builtin_float64.h | 161 
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl  |  45 +
 4 files changed, 213 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index f8ceacdabf..e825536466 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -334,3 +334,164 @@ fle64(void *mem_ctx, builtin_available_predicate avail)
sig->replace_parameters(_parameters);
return sig;
 }
+ir_function_signature *
+lt64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0060 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a0", ir_var_function_in);
+   sig_parameters.push_tail(r0060);
+   ir_variable *const r0061 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"a1", ir_var_function_in);
+   sig_parameters.push_tail(r0061);
+   ir_variable *const r0062 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b0", ir_var_function_in);
+   sig_parameters.push_tail(r0062);
+   ir_variable *const r0063 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"b1", ir_var_function_in);
+   sig_parameters.push_tail(r0063);
+   ir_expression *const r0064 = less(r0060, r0062);
+   ir_expression *const r0065 = equal(r0060, r0062);
+   ir_expression *const r0066 = less(r0061, r0063);
+   ir_expression *const r0067 = logic_and(r0065, r0066);
+   ir_expression *const r0068 = logic_or(r0064, r0067);
+   body.emit(ret(r0068));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
+ir_function_signature *
+flt64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::bool_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r0069 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r0069);
+   ir_variable *const r006A = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"b", ir_var_function_in);
+   sig_parameters.push_tail(r006A);
+   ir_variable *const r006B = body.make_temp(glsl_type::bool_type, 
"return_value");
+   ir_variable *const r006C = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isbNaN", ir_var_auto);
+   body.emit(r006C);
+   ir_variable *const r006D = new(mem_ctx) ir_variable(glsl_type::bool_type, 
"isaNaN", ir_var_auto);
+   body.emit(r006D);
+   ir_variable *const r006E = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r006E, bit_and(swizzle_x(r0069), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r006E, swizzle_y(r0069), 0x02));
+
+   ir_variable *const r006F = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+   body.emit(assign(r006F, bit_and(swizzle_x(r006A), body.constant(1048575u)), 
0x01));
+
+   body.emit(assign(r006F, swizzle_y(r006A), 0x02));
+
+   ir_expression *const r0070 = rshift(swizzle_x(r0069), 
body.constant(int(20)));
+   ir_expression *const r0071 = bit_and(r0070, body.constant(2047u));
+   ir_expression *const r0072 = equal(r0071, body.constant(2047u));
+   ir_expression *const r0073 = bit_or(swizzle_x(r006E), swizzle_y(r0069));
+   ir_expression *const r0074 = nequal(r0073, body.constant(0u));
+   body.emit(assign(r006D, logic_and(r0072, r0074), 0x01));
+
+   ir_expression *const r0075 = rshift(swizzle_x(r006A), 
body.constant(int(20)));
+   ir_expression *const r0076 = bit_and(r0075, body.constant(2047u));
+   ir_expression *const r0077 = equal(r0076, body.constant(2047u));
+   ir_expression *const r0078 = bit_or(swizzle_x(r006F), swizzle_y(r006A));
+   ir_expression *const r0079 = nequal(r0078, body.constant(0u));
+   body.emit(assign(r006C, logic_and(r0077, r0079), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r007B = logic_or(r006D, r006C);
+   ir_if *f007A = new(mem_ctx) ir_if(operand(r007B).val);
+   exec_list *const f007A_parent_instructions = body.instructions;
+
+  /* THEN INSTRUCTIONS */
+  body.instructions = >then_instructions;
+
+  body.emit(assign(r006B, body.constant(false), 0x01));
+
+
+  /* ELSE INSTRUCTIONS */
+  body.instructions = >else_instructions;
+
+  ir_variable *const r007C = body.make_temp(glsl_type::uint_type, 
"extractFloat64Sign_retval");
+  body.emit(assign(r007C, rshift(swizzle_x(r0069), 
body.constant(int(31))), 0x01));
+
+  ir_variable *const r007D = body.make_temp(glsl_type::uint_type, 
"extractFloat64Sign_retval");
+  body.emit(assign(r007D, rshift(swizzle_x(r006A), 
body.constant(int(31))), 0x01));
+
+  /* IF CONDITION 

[Mesa-dev] [RFC 01/11] glsl: Add "built-in" function to do abs(fp64)

2017-03-03 Thread Elie Tournier
Signed-off-by: Elie Tournier 
---
 src/compiler/Makefile.sources   |  1 +
 src/compiler/glsl/builtin_float64.h | 19 +++
 src/compiler/glsl/builtin_functions.cpp |  4 
 src/compiler/glsl/builtin_functions.h   |  3 +++
 src/compiler/glsl/float64.glsl  | 28 
 src/compiler/glsl/generate_ir.cpp   |  1 +
 6 files changed, 56 insertions(+)
 create mode 100644 src/compiler/glsl/builtin_float64.h
 create mode 100644 src/compiler/glsl/float64.glsl

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 643a0181d8..b67834246f 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -22,6 +22,7 @@ LIBGLSL_FILES = \
glsl/builtin_functions.cpp \
glsl/builtin_functions.h \
glsl/builtin_int64.h \
+   glsl/builtin_float64.h \
glsl/builtin_types.cpp \
glsl/builtin_variables.cpp \
glsl/generate_ir.cpp \
diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
new file mode 100644
index 00..c1ec89d210
--- /dev/null
+++ b/src/compiler/glsl/builtin_float64.h
@@ -0,0 +1,19 @@
+ir_function_signature *
+fabs64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+  new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(>body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r000B = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r000B);
+   body.emit(assign(r000B, bit_and(swizzle_x(r000B), 
body.constant(2147483647u)), 0x01));
+
+   body.emit(ret(r000B));
+
+   sig->replace_parameters(_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index e03a50c843..b0b1781725 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3129,6 +3129,10 @@ builtin_builder::create_builtins()
 generate_ir::umul64(mem_ctx, integer_functions_supported),
 NULL);
 
+   add_function("__builtin_fabs64",
+generate_ir::fabs64(mem_ctx, integer_functions_supported),
+NULL);
+
 #undef F
 #undef FI
 #undef FIUD_VEC
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index 7ae211b48a..abe02d97b6 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -63,6 +63,9 @@ umul64(void *mem_ctx, builtin_available_predicate avail);
 ir_function_signature *
 sign64(void *mem_ctx, builtin_available_predicate avail);
 
+ir_function_signature *
+fabs64(void *mem_ctx, builtin_available_predicate avail);
+
 }
 
 #endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
new file mode 100644
index 00..b8f0c2e444
--- /dev/null
+++ b/src/compiler/glsl/float64.glsl
@@ -0,0 +1,28 @@
+/* Compile with:
+ *
+ * glsl_compiler --version 130 --dump-builder float64.glsl > builtin_float64.h
+ *
+ */
+
+#version 130
+
+/* Software IEEE floating-point rounding mode.
+ * GLSL spec section "4.7.1 Range and Precision":
+ * The rounding mode cannot be set and is undefined.
+ * But here, we are able to define the rounding mode at the compilation time.
+ */
+#define FLOAT_ROUND_NEAREST_EVEN0
+#define FLOAT_ROUND_TO_ZERO 1
+#define FLOAT_ROUND_DOWN2
+#define FLOAT_ROUND_UP  3
+#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN
+
+/* Absolute value of a Float64 :
+ * Clear the sign bit
+ */
+uvec2
+fabs64( uvec2 a )
+{
+a.x &= 0x7FFFu;
+return a;
+}
diff --git a/src/compiler/glsl/generate_ir.cpp 
b/src/compiler/glsl/generate_ir.cpp
index 255b0484f2..e6ece4860f 100644
--- a/src/compiler/glsl/generate_ir.cpp
+++ b/src/compiler/glsl/generate_ir.cpp
@@ -29,5 +29,6 @@ using namespace ir_builder;
 namespace generate_ir {
 
 #include "builtin_int64.h"
+#include "builtin_float64.h"
 
 }
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Any updates on threaded GL dispatch?

2017-03-03 Thread Marek Olšák
On Fri, Mar 3, 2017 at 10:19 AM, Timothy Arceri  wrote:
> On 02/03/17 22:18, Marek Olšák wrote:
>>
>> The bad news is my involvement is currently on hold due to other
>> projects and responsibilities.
>
>
> I can probably spend some time on this. Seems like Gregory has taken care of
> most of the problems and it just needs someone to push it over the line.

There are also plenty of unresolved review comments from Emil and others.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 0/7] gallium: Video postprocessor rework

2017-03-03 Thread Thomas Hellstrom
So after an off-line chat with Christian we decided to drop this series,
since I don't have the cycles to fix this up.

For completeness, if anybody decides at some point to (re)use some of
the code, some comments below.

On 03/03/2017 01:32 PM, Christian König wrote:
> Hi Thomas,
>
> Am 02.03.2017 um 21:00 schrieb Thomas Hellstrom:
>> This patch series introduces a postprocessor abstraction. It could be
>> promoted
>> to a gallium interface but for now it's implemented as a utility.
>
> Well, first of all use a gallium interface for this. Putting this into
> utility doesn't look like it makes much sense when it is supposed to
> be a driver interface.
>
>> The idea
>> is that a driver wanting to implement the whole or part of the
>> postprocessing
>> pipeline using video hardware can do that. It also significantly
>> simplifies
>> implementing a future full postprocessor pipeline also in the libVA
>> state tracker.
>>
>> Some highlights:
>> - The postprocessor abstraction is very similar to the vl compositor
>> API,
>> except it add filters.
>
> NAK on that, we already had that design and it was a pain to support.
>
> If you want to implement parts of the post processing pipeline in the
> driver make each pipeline step a separate object.

The compositor API very well suited our needs and was very easy and
straightforward to work with. I guess mostly because it resembles the
postprocessing pipeline of both VAAPI, VDPAU and DXVA/D3D11.

>
>> - The bounce buffer cache. Caches buffers where we know more about
>> the usage.
>> This may duplicate driver functionality but for drivers with strongly
>> typed
>> buffers, it's a big win.
>
> That's similar to what I've done for the video buffers, but in general
> I think that this doesn't belong into the state tracker.
>
> The driver should implement buffer reuse if it can do this without
> problems, but how many buffers should be in there and when they are
> reclaimed is strongly hardware dependent.

True. Although for drivers with strongly typed buffers, that becomes
difficult since caches needs to be quite large to facilitate hits. The
caching needs for this pipeline is also special in that the number of
cached buffers is small very predictable and there's no need to wait for
idle before reusing buffers...


>
>> - Moving the bicubic filter. Avoids duplicated code and makes things
>> like
>> rotation and blending straightforward.
>
> A BIG NAK on that as well. The bicubic filter doesn't work on all
> hardware, so creating the shaders all the time while it isn't use is
> clearly not an option.
>
> Additional to that I would rather remove functionality from the
> compositor than stuffing even more into it.

Compiling on-demand is an easy fix.

Having all scaling options in one place logically and physically seemed
attractive and helped avoiding unwanted behaviour like filtering
overlaid  or already scaled surfaces and video-background borders,
double scaling of overlay surfaces. Also enabled blending and rotation.


Thanks,
Thomas

>
>> - A g3dvl implementation. Mostly replicates the old implementation
>> using the
>> vl compositor, but with provisions to replace part of the postprocessing
>> pipeline.
>> - State tracker modifications for VDPAU and VA.
>>
>> - Tested with mplayer -vo vdpau software playback on Nouveau.
>
> Please test that with radeon hardware as well if possible. Nouveau
> doesn't fully support all the functionality and project like Kodi
> don't test with it.
>
> Regards,
> Christian.
>
>>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev=DwICaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=GDt3ZtrRjpa7Se4bsMYXF18gBs6IXAqtSW_3Y3df0Bs=-HnyAt6aoAvWpesLw7KAHyyQmiJVv43cJNL7VmIjOh0=
>
>
>


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Moving i965 driver to genxml commands and structures

2017-03-03 Thread Louis-Francis Ratté-Boulianne
Hi,

As to avoid any duplicate work, I want to inform everyone that I'm in
the process of modifying the i965 driver so that it uses the same
genxml infrastructure that the one used by the Intel Vulkan driver. The
task has been proposed by Jason Ekstrand. 

I will probably post my first patchset today that will contain the
basic call to replace the BEGIN_BATCH/ADVANCE_BATCH macros and the XML
specification files for generations 4, 4.5 and 5.

Let me know if you have any question or suggestion related to this
work.

--
Louis-Francis Ratté-Boulianne
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Emil Velikov
On 3 March 2017 at 11:50, Tapani Pälli  wrote:
>
>
> On 03/03/2017 01:30 PM, Tapani Pälli wrote:
>>
>>
>>
>> On 03/03/2017 01:21 PM, Tapani Pälli wrote:
>>>
>>>
>>>
>>> On 03/02/2017 03:41 PM, Emil Velikov wrote:

 Cc: Mauro Rossi 
 Cc: Tapani Pälli 
 Cc: Jason Ekstrand 
 ---
 All, here is a 5min attempt to fix the Android build. Tapani, Mauro do
 give it a test since I've done an educated guess here.

 Pull Jason's branch and apply on top.
 https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler
>>>
>>>
>>> when compiling blorp and isl it says:
>>>
>>> fatal error: 'brw_compiler.h' file not found
>>>
>>> this can be fixed by putting 'src/intel/compiler' include path for blorp
>>> and isl libraries:
>>>
>>>
>>> https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3
>>>
>>>
>>>
>>> but when linking i965_dri library then there is quite a big explosion of
>>> missing symbols, I can take a look at this but a bit later.
>>
>>
>> doh sorry, it looks like I did not apply your patch after all in that
>> tree ... ignore the noise, will try again
>
>
> ok .. now I got some fixes for this here:
>
> https://github.com/tpalli/external-mesa/commits/move_compiler
>
> but yeah .. more work required :/ I don't understand why adding
> MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help.
>
Thanks for the fixup, but I think my earlier suggestion is coming to bite us.

We really want to stop the stop the copy/pasta of the existing rules -
just move them to a common place add use them.
See below for an explicit example. One at a time of course.

To resolve the dependency tracking one can use a) libmesa_genxml
approach or b) have a short parser in the top-level Android.mk that
goes through the BUILT_SOURCES and runs the generation rule(s).

-Emil


With the risk of sounding like a knob I'll repeat the plan, one more time.

 - move the Makefile.am rule, [as-is] to separate file
 - where applicable/needed add $srcdir and define that for Android
 - avoid adding path prefix for the generated files -
 - above might lead to files being generated in a non !$intermediates
path, if so
cd/update instructions to ensure they don't end all over the place.
 - update CleanSpec.mk if needed
 - throw the includes into Makefile.sources/other file
 - ...
 - profit


Here is roughly how things might look - do use better better names as
you see fit.

$ cat src/util/Android.mk

LOCAL_PATH := $(call my-dir)

# We'll need to use srcdir/top_srcdir/others in the next file
# If doing that in MESA_COMMON_MK does not work, because it's too late
- use another file and include it here.

PYTHON_GEN = $(hide)$(MESA_PYTHON2)

include $(LOCAL_PATH)/Makefile.gen // has generation rule(s)
include $(LOCAL_PATH)/Makefile.sources // has sources list(s) and C/CPP/CXXFLAGS

include $(CLEAR_VARS)

LOCAL_SRC_FILES := $(MESA_UTIL_FILES)

LOCAL_CFLAGS := $(MESA_CPP_FLAGS)

LOCAL_MODULE := libmesa_util

LOCAL_MODULE_CLASS := STATIC_LIBRARIES

# Generated sources
# Files are implicitly added the to LOCAL_SRC_FILES by the Android build system
# Generation rules are in Makefile.gen

LOCAL_GENERATED_SOURCES := $(MESA_UTIL_GENERATED_FILES)

include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)

$ cat src/util/Makefile.am

PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

include Makefile.gen
include Makefile.sources

noinst_LTLIBRARIES = libmesautil.la

libmesautil_la_CPPFLAGS =  $(MESA_CPP_FLAGS)

libmesautil_la_SOURCES = \
   $(MESA_UTIL_FILES) \
   $(MESA_UTIL_GENERATED_FILES)

# test/automake specific bits
...

$ cat src/util/Makefile.sources

MESA_CPP_FLAGS := \
   $(DEFINES) \
   -I$(top_srcdir)/include \
   -I$(top_srcdir)/src \
   -I$(top_srcdir)/src/mapi \
   -I$(top_srcdir)/src/mesa \
   -I$(top_srcdir)/src/gallium/include \
   -I$(top_srcdir)/src/gallium/auxiliary \
   $(VISIBILITY_CFLAGS) \
   $(MSVC2013_COMPAT_CFLAGS)

# existing SOURCES lists
...

$cat src/util/Makefile.gen

format_srgb.c: $(srcdir)/format_srgb.py
   $(PYTHON_GEN) $(srcdir)/format_srgb.py > $@
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100049] "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes seg fault in 32bit build

2017-03-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100049

Bug ID: 100049
   Summary: "ralloc: Make sure ralloc() allocations match
malloc()'s alignment." causes seg fault in 32bit build
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: raffa...@zoho.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 130046
  --> https://bugs.freedesktop.org/attachment.cgi?id=130046=edit
glxinfo 32bit backtrace

commit cd2b55e536dc806f9358f71db438dd9c246cdb14 "ralloc: Make sure ralloc()
allocations match malloc()'s alignment." causes segmentation fault in 32bit
build with -O3, -O2 works fine.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] android: fix outdir for gen_enum_to_str files

2017-03-03 Thread Eric Engestrom
On Friday, 2017-03-03 12:52:56 +0200, Tapani Pälli wrote:
> when files are being generated the value of $intermediates var content can be
> completely random, this makes sure that outdir is the wanted one.
> 
> Fixes: 3f2cb699 ("android: vulkan: add support for libmesa_vulkan_util")
> Signed-off-by: Tapani Pälli 

Reviewed-by: Eric Engestrom 

This can probably be applied to a lot of other places; even though it
might not be fixing bugs there, it's still cleaner to reuse the var than
copying the path.

> ---
>  src/vulkan/Android.mk | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk
> index 9f71d8f..7653f34 100644
> --- a/src/vulkan/Android.mk
> +++ b/src/vulkan/Android.mk
> @@ -1,4 +1,5 @@
>  # Copyright © 2017 Mauro Rossi 
> +# Copyright © 2017 Intel Corporation
>  #
>  # Permission is hereby granted, free of charge, to any person obtaining a
>  # copy of this software and associated documentation files (the "Software"),
> @@ -45,7 +46,7 @@ vulkan_api_xml = $(MESA_TOP)/src/vulkan/registry/vk.xml
>  $(LOCAL_GENERATED_SOURCES): $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
> $(vulkan_api_xml)
>   @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))"
>   @mkdir -p $(dir $@)
> - $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
> --xml $(vulkan_api_xml) --outdir $(intermediates)/util
> + $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
> --xml $(vulkan_api_xml) --outdir $(dir $@)
>  
>  LOCAL_EXPORT_C_INCLUDE_DIRS := \
>  $(intermediates)
> -- 
> 2.9.3
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 0/7] gallium: Video postprocessor rework

2017-03-03 Thread Christian König

Hi Thomas,

Am 02.03.2017 um 21:00 schrieb Thomas Hellstrom:

This patch series introduces a postprocessor abstraction. It could be promoted
to a gallium interface but for now it's implemented as a utility.


Well, first of all use a gallium interface for this. Putting this into 
utility doesn't look like it makes much sense when it is supposed to be 
a driver interface.



The idea
is that a driver wanting to implement the whole or part of the postprocessing
pipeline using video hardware can do that. It also significantly simplifies
implementing a future full postprocessor pipeline also in the libVA
state tracker.

Some highlights:
- The postprocessor abstraction is very similar to the vl compositor API,
except it add filters.


NAK on that, we already had that design and it was a pain to support.

If you want to implement parts of the post processing pipeline in the 
driver make each pipeline step a separate object.



- The bounce buffer cache. Caches buffers where we know more about the usage.
This may duplicate driver functionality but for drivers with strongly typed
buffers, it's a big win.


That's similar to what I've done for the video buffers, but in general I 
think that this doesn't belong into the state tracker.


The driver should implement buffer reuse if it can do this without 
problems, but how many buffers should be in there and when they are 
reclaimed is strongly hardware dependent.



- Moving the bicubic filter. Avoids duplicated code and makes things like
rotation and blending straightforward.


A BIG NAK on that as well. The bicubic filter doesn't work on all 
hardware, so creating the shaders all the time while it isn't use is 
clearly not an option.


Additional to that I would rather remove functionality from the 
compositor than stuffing even more into it.



- A g3dvl implementation. Mostly replicates the old implementation using the
vl compositor, but with provisions to replace part of the postprocessing
pipeline.
- State tracker modifications for VDPAU and VA.

- Tested with mplayer -vo vdpau software playback on Nouveau.


Please test that with radeon hardware as well if possible. Nouveau 
doesn't fully support all the functionality and project like Kodi don't 
test with it.


Regards,
Christian.




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2] util/disk_cache: support caches for multiple architectures

2017-03-03 Thread Grazvydas Ignotas
On Fri, Mar 3, 2017 at 5:27 AM, Timothy Arceri  wrote:
> Previously we were deleting the entire cache if a user switched
> between 32 and 64 bit applications.
>
> V2: make the check more generic, it should now work with any
> platform we are likely to support.
> ---
>  src/util/disk_cache.c | 19 +++
>  1 file changed, 19 insertions(+)
>
> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
> index 3abdec4..92528a9 100644
> --- a/src/util/disk_cache.c
> +++ b/src/util/disk_cache.c
> @@ -40,20 +40,30 @@
>  #include "zlib.h"
>
>  #include "util/crc32.h"
>  #include "util/u_atomic.h"
>  #include "util/mesa-sha1.h"
>  #include "util/ralloc.h"
>  #include "main/errors.h"
>
>  #include "disk_cache.h"
>
> +#if defined(__ILP32__)
> +#if defined(__x86_64__) || defined(__arm__)
> +#define CACHE_ARCH "ilp-32"
> +#else
> +#define CACHE_ARCH "32"
> +#endif
> +#else
> +#define CACHE_ARCH "64"
> +#endif

That reports "64" for me on gcc -m32, I think only clang sets
__ILP32__ for non-x32 32bit build. I'd still suggest using sizeof(void
*) directly in the code, perhaps within some "const char
*get_arch_bitness_string()" helper, that should be more reliable.

Gražvydas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] configure.ac: require pthread-stubs only where available

2017-03-03 Thread Emil Velikov
On 2 March 2017 at 19:34, Gary Wong  wrote:
> On Thu, Mar 02, 2017 at 07:02:44PM +, Emil Velikov wrote:
>> Jeremy, others,
>>
>> Afaict pthread-stubs expands to a simple .pc on your platforms, but a
>> confirmation will be greatly appreciated.
>
> Hurd is not really "my" platform, but yes, I agree that with its
> current libc, pthreads-stubs is merely a dummy .pc.  This was not
> always the case:
>
> https://www.gnu.org/software/hurd/open_issues/libpthread_dlopen.html
>
Right. My train of thought was that even if I got it wrong, people
will still have greater knowledge than me where to look ;-)

Thanks for the confirmation !
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] configure.ac: increase required swr llvm to 3.9.0

2017-03-03 Thread Emil Velikov
Hi Tim,

On 3 March 2017 at 01:16, Tim Rowley  wrote:
> GS implementation uses the masked.{gather,store} intrinsics,
> introduced in llvm-3.9.0.

Please mention in the commit message that the SCons build already
requires 3.9 or later.
Can you add a note about the LLVM requirement and GS support in
docs/relnotes/17.1.0.html, with a separate commit on top ?

With this we have some ~20 preprocessor conditionals which want to be
cleaned up. Look for
$ git grep  "LLVM_.*VERSION\|HAVE_LLVM" -- src/gallium/drivers/swr/

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Tapani Pälli



On 03/03/2017 01:30 PM, Tapani Pälli wrote:



On 03/03/2017 01:21 PM, Tapani Pälli wrote:



On 03/02/2017 03:41 PM, Emil Velikov wrote:

Cc: Mauro Rossi 
Cc: Tapani Pälli 
Cc: Jason Ekstrand 
---
All, here is a 5min attempt to fix the Android build. Tapani, Mauro do
give it a test since I've done an educated guess here.

Pull Jason's branch and apply on top.
https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler


when compiling blorp and isl it says:

fatal error: 'brw_compiler.h' file not found

this can be fixed by putting 'src/intel/compiler' include path for blorp
and isl libraries:

https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3



but when linking i965_dri library then there is quite a big explosion of
missing symbols, I can take a look at this but a bit later.


doh sorry, it looks like I did not apply your patch after all in that
tree ... ignore the noise, will try again


ok .. now I got some fixes for this here:

https://github.com/tpalli/external-mesa/commits/move_compiler

but yeah .. more work required :/ I don't understand why adding 
MESA_GEN_GLSL_H to LOCAL_GENERATED_SOURCES does not help.






---
 .../Android.gen.mk => intel/Android.compiler.mk}   | 34
++
 src/intel/Android.mk   |  1 +
 src/mesa/drivers/dri/i965/Android.mk   | 30
++-
 3 files changed, 31 insertions(+), 34 deletions(-)
 rename src/{mesa/drivers/dri/i965/Android.gen.mk =>
intel/Android.compiler.mk} (64%)

diff --git a/src/mesa/drivers/dri/i965/Android.gen.mk
b/src/intel/Android.compiler.mk
similarity index 64%
rename from src/mesa/drivers/dri/i965/Android.gen.mk
rename to src/intel/Android.compiler.mk
index c2b8dae339..2d4ba91396 100644
--- a/src/mesa/drivers/dri/i965/Android.gen.mk
+++ b/src/intel/Android.compiler.mk
@@ -1,4 +1,7 @@
 #
+# Copyright (C) 2011 Intel Corporation
+# Copyright (C) 2010-2011 Chia-I Wu 
+# Copyright (C) 2010-2011 LunarG
 # Copyright (C) 2016 Linaro, Ltd., Rob Herring 
 #
 # Permission is hereby granted, free of charge, to any person
obtaining a
@@ -20,21 +23,40 @@
 # DEALINGS IN THE SOFTWARE.
 #

-ifeq ($(LOCAL_MODULE_CLASS),)
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-endif
+# ---
+# Build libmesa_intel_compiler
+# ---
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_intel_compiler
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+
+LOCAL_SRC_FILES := \
+$(COMPILER_FILES)
+
+LOCAL_C_INCLUDES := \
+$(MESA_TOP)/src/intel \
+$(MESA_TOP)/src/compiler/nir \
+$(call
generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir
+
+LOCAL_SHARED_LIBRARIES := \
+libdrm_intel

 brw_nir_trig_workarounds_deps := \
-$(LOCAL_PATH)/brw_nir_trig_workarounds.py \
+$(LOCAL_PATH)/compiler/brw_nir_trig_workarounds.py \
 $(MESA_TOP)/src/compiler/nir/nir_algebraic.py

 intermediates := $(call local-generated-sources-dir)

-$(intermediates)/brw_nir_trig_workarounds.c:
$(brw_nir_trig_workarounds_deps)
+$(intermediates)/compiler/brw_nir_trig_workarounds.c:
$(brw_nir_trig_workarounds_deps)
 @mkdir -p $(dir $@)
 $(hide) PYTHONPATH=$(MESA_TOP)/src/compiler/nir $(MESA_PYTHON2)
$< > $@

 LOCAL_STATIC_LIBRARIES = libmesa_genxml

 LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
-$(i965_compiler_GENERATED_FILES))
+$(COMPILER_GENERATED_FILES))
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/intel/Android.mk b/src/intel/Android.mk
index 7cb2bb9f8d..db6c770f35 100644
--- a/src/intel/Android.mk
+++ b/src/intel/Android.mk
@@ -27,6 +27,7 @@ include $(LOCAL_PATH)/Makefile.sources

 include $(LOCAL_PATH)/Android.blorp.mk
 include $(LOCAL_PATH)/Android.common.mk
+include $(LOCAL_PATH)/Android.compiler.mk
 include $(LOCAL_PATH)/Android.genxml.mk
 include $(LOCAL_PATH)/Android.isl.mk
 include $(LOCAL_PATH)/Android.vulkan.mk
diff --git a/src/mesa/drivers/dri/i965/Android.mk
b/src/mesa/drivers/dri/i965/Android.mk
index 7dea3c2507..708851c866 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -152,32 +152,6 @@ include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)

 # ---
-# Build libmesa_i965_compiler
-# ---
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := libmesa_i965_compiler
-LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
-LOCAL_SRC_FILES := \
-$(i965_compiler_FILES)
-
-LOCAL_C_INCLUDES := \
-$(MESA_DRI_C_INCLUDES) \
-$(MESA_TOP)/src/intel \
-$(MESA_TOP)/src/compiler/nir \
-$(call
generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
-$(call
generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl
-
-LOCAL_SHARED_LIBRARIES := \
-libdrm_intel
-
-include $(LOCAL_PATH)/Android.gen.mk
-include 

Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Tapani Pälli



On 03/03/2017 01:21 PM, Tapani Pälli wrote:



On 03/02/2017 03:41 PM, Emil Velikov wrote:

Cc: Mauro Rossi 
Cc: Tapani Pälli 
Cc: Jason Ekstrand 
---
All, here is a 5min attempt to fix the Android build. Tapani, Mauro do
give it a test since I've done an educated guess here.

Pull Jason's branch and apply on top.
https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler


when compiling blorp and isl it says:

fatal error: 'brw_compiler.h' file not found

this can be fixed by putting 'src/intel/compiler' include path for blorp
and isl libraries:

https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3


but when linking i965_dri library then there is quite a big explosion of
missing symbols, I can take a look at this but a bit later.


doh sorry, it looks like I did not apply your patch after all in that 
tree ... ignore the noise, will try again






---
 .../Android.gen.mk => intel/Android.compiler.mk}   | 34
++
 src/intel/Android.mk   |  1 +
 src/mesa/drivers/dri/i965/Android.mk   | 30
++-
 3 files changed, 31 insertions(+), 34 deletions(-)
 rename src/{mesa/drivers/dri/i965/Android.gen.mk =>
intel/Android.compiler.mk} (64%)

diff --git a/src/mesa/drivers/dri/i965/Android.gen.mk
b/src/intel/Android.compiler.mk
similarity index 64%
rename from src/mesa/drivers/dri/i965/Android.gen.mk
rename to src/intel/Android.compiler.mk
index c2b8dae339..2d4ba91396 100644
--- a/src/mesa/drivers/dri/i965/Android.gen.mk
+++ b/src/intel/Android.compiler.mk
@@ -1,4 +1,7 @@
 #
+# Copyright (C) 2011 Intel Corporation
+# Copyright (C) 2010-2011 Chia-I Wu 
+# Copyright (C) 2010-2011 LunarG
 # Copyright (C) 2016 Linaro, Ltd., Rob Herring 
 #
 # Permission is hereby granted, free of charge, to any person
obtaining a
@@ -20,21 +23,40 @@
 # DEALINGS IN THE SOFTWARE.
 #

-ifeq ($(LOCAL_MODULE_CLASS),)
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-endif
+# ---
+# Build libmesa_intel_compiler
+# ---
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_intel_compiler
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+
+LOCAL_SRC_FILES := \
+$(COMPILER_FILES)
+
+LOCAL_C_INCLUDES := \
+$(MESA_TOP)/src/intel \
+$(MESA_TOP)/src/compiler/nir \
+$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir
+
+LOCAL_SHARED_LIBRARIES := \
+libdrm_intel

 brw_nir_trig_workarounds_deps := \
-$(LOCAL_PATH)/brw_nir_trig_workarounds.py \
+$(LOCAL_PATH)/compiler/brw_nir_trig_workarounds.py \
 $(MESA_TOP)/src/compiler/nir/nir_algebraic.py

 intermediates := $(call local-generated-sources-dir)

-$(intermediates)/brw_nir_trig_workarounds.c:
$(brw_nir_trig_workarounds_deps)
+$(intermediates)/compiler/brw_nir_trig_workarounds.c:
$(brw_nir_trig_workarounds_deps)
 @mkdir -p $(dir $@)
 $(hide) PYTHONPATH=$(MESA_TOP)/src/compiler/nir $(MESA_PYTHON2)
$< > $@

 LOCAL_STATIC_LIBRARIES = libmesa_genxml

 LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
-$(i965_compiler_GENERATED_FILES))
+$(COMPILER_GENERATED_FILES))
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/intel/Android.mk b/src/intel/Android.mk
index 7cb2bb9f8d..db6c770f35 100644
--- a/src/intel/Android.mk
+++ b/src/intel/Android.mk
@@ -27,6 +27,7 @@ include $(LOCAL_PATH)/Makefile.sources

 include $(LOCAL_PATH)/Android.blorp.mk
 include $(LOCAL_PATH)/Android.common.mk
+include $(LOCAL_PATH)/Android.compiler.mk
 include $(LOCAL_PATH)/Android.genxml.mk
 include $(LOCAL_PATH)/Android.isl.mk
 include $(LOCAL_PATH)/Android.vulkan.mk
diff --git a/src/mesa/drivers/dri/i965/Android.mk
b/src/mesa/drivers/dri/i965/Android.mk
index 7dea3c2507..708851c866 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -152,32 +152,6 @@ include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)

 # ---
-# Build libmesa_i965_compiler
-# ---
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := libmesa_i965_compiler
-LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
-LOCAL_SRC_FILES := \
-$(i965_compiler_FILES)
-
-LOCAL_C_INCLUDES := \
-$(MESA_DRI_C_INCLUDES) \
-$(MESA_TOP)/src/intel \
-$(MESA_TOP)/src/compiler/nir \
-$(call
generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
-$(call
generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl
-
-LOCAL_SHARED_LIBRARIES := \
-libdrm_intel
-
-include $(LOCAL_PATH)/Android.gen.mk
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
-# ---
 # Build i965_dri
 # ---

@@ -209,9 +183,9 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
 $(MESA_DRI_WHOLE_STATIC_LIBRARIES) \
 $(I965_PERGEN_LIBS) 

Re: [Mesa-dev] [PATCH] fixup! i965: Move the back-end compiler to src/intel/compiler

2017-03-03 Thread Tapani Pälli



On 03/02/2017 03:41 PM, Emil Velikov wrote:

Cc: Mauro Rossi 
Cc: Tapani Pälli 
Cc: Jason Ekstrand 
---
All, here is a 5min attempt to fix the Android build. Tapani, Mauro do
give it a test since I've done an educated guess here.

Pull Jason's branch and apply on top.
https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/move-compiler


when compiling blorp and isl it says:

fatal error: 'brw_compiler.h' file not found

this can be fixed by putting 'src/intel/compiler' include path for blorp 
and isl libraries:


https://github.com/tpalli/external-mesa/commit/4ffef80a45bd93731b2d2af0cb532687f11ae8d3

but when linking i965_dri library then there is quite a big explosion of 
missing symbols, I can take a look at this but a bit later.




---
 .../Android.gen.mk => intel/Android.compiler.mk}   | 34 ++
 src/intel/Android.mk   |  1 +
 src/mesa/drivers/dri/i965/Android.mk   | 30 ++-
 3 files changed, 31 insertions(+), 34 deletions(-)
 rename src/{mesa/drivers/dri/i965/Android.gen.mk => intel/Android.compiler.mk} 
(64%)

diff --git a/src/mesa/drivers/dri/i965/Android.gen.mk 
b/src/intel/Android.compiler.mk
similarity index 64%
rename from src/mesa/drivers/dri/i965/Android.gen.mk
rename to src/intel/Android.compiler.mk
index c2b8dae339..2d4ba91396 100644
--- a/src/mesa/drivers/dri/i965/Android.gen.mk
+++ b/src/intel/Android.compiler.mk
@@ -1,4 +1,7 @@
 #
+# Copyright (C) 2011 Intel Corporation
+# Copyright (C) 2010-2011 Chia-I Wu 
+# Copyright (C) 2010-2011 LunarG
 # Copyright (C) 2016 Linaro, Ltd., Rob Herring 
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
@@ -20,21 +23,40 @@
 # DEALINGS IN THE SOFTWARE.
 #

-ifeq ($(LOCAL_MODULE_CLASS),)
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-endif
+# ---
+# Build libmesa_intel_compiler
+# ---
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_intel_compiler
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+
+LOCAL_SRC_FILES := \
+   $(COMPILER_FILES)
+
+LOCAL_C_INCLUDES := \
+   $(MESA_TOP)/src/intel \
+   $(MESA_TOP)/src/compiler/nir \
+   $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir
+
+LOCAL_SHARED_LIBRARIES := \
+   libdrm_intel

 brw_nir_trig_workarounds_deps := \
-   $(LOCAL_PATH)/brw_nir_trig_workarounds.py \
+   $(LOCAL_PATH)/compiler/brw_nir_trig_workarounds.py \
$(MESA_TOP)/src/compiler/nir/nir_algebraic.py

 intermediates := $(call local-generated-sources-dir)

-$(intermediates)/brw_nir_trig_workarounds.c: $(brw_nir_trig_workarounds_deps)
+$(intermediates)/compiler/brw_nir_trig_workarounds.c: 
$(brw_nir_trig_workarounds_deps)
@mkdir -p $(dir $@)
$(hide) PYTHONPATH=$(MESA_TOP)/src/compiler/nir $(MESA_PYTHON2) $< > $@

 LOCAL_STATIC_LIBRARIES = libmesa_genxml

 LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
-   $(i965_compiler_GENERATED_FILES))
+   $(COMPILER_GENERATED_FILES))
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/intel/Android.mk b/src/intel/Android.mk
index 7cb2bb9f8d..db6c770f35 100644
--- a/src/intel/Android.mk
+++ b/src/intel/Android.mk
@@ -27,6 +27,7 @@ include $(LOCAL_PATH)/Makefile.sources

 include $(LOCAL_PATH)/Android.blorp.mk
 include $(LOCAL_PATH)/Android.common.mk
+include $(LOCAL_PATH)/Android.compiler.mk
 include $(LOCAL_PATH)/Android.genxml.mk
 include $(LOCAL_PATH)/Android.isl.mk
 include $(LOCAL_PATH)/Android.vulkan.mk
diff --git a/src/mesa/drivers/dri/i965/Android.mk 
b/src/mesa/drivers/dri/i965/Android.mk
index 7dea3c2507..708851c866 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -152,32 +152,6 @@ include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)

 # ---
-# Build libmesa_i965_compiler
-# ---
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := libmesa_i965_compiler
-LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
-LOCAL_SRC_FILES := \
-   $(i965_compiler_FILES)
-
-LOCAL_C_INCLUDES := \
-   $(MESA_DRI_C_INCLUDES) \
-   $(MESA_TOP)/src/intel \
-   $(MESA_TOP)/src/compiler/nir \
-   $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
-   $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,)/glsl
-
-LOCAL_SHARED_LIBRARIES := \
-   libdrm_intel
-
-include $(LOCAL_PATH)/Android.gen.mk
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
-# ---
 # Build i965_dri
 # ---

@@ -209,9 +183,9 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
$(MESA_DRI_WHOLE_STATIC_LIBRARIES) \
$(I965_PERGEN_LIBS) \
libmesa_intel_common \
-   libmesa_blorp \
libmesa_isl \
-   

[Mesa-dev] [PATCH] android: fix outdir for gen_enum_to_str files

2017-03-03 Thread Tapani Pälli
when files are being generated the value of $intermediates var content can be
completely random, this makes sure that outdir is the wanted one.

Fixes: 3f2cb699 ("android: vulkan: add support for libmesa_vulkan_util")
Signed-off-by: Tapani Pälli 
---
 src/vulkan/Android.mk | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk
index 9f71d8f..7653f34 100644
--- a/src/vulkan/Android.mk
+++ b/src/vulkan/Android.mk
@@ -1,4 +1,5 @@
 # Copyright © 2017 Mauro Rossi 
+# Copyright © 2017 Intel Corporation
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -45,7 +46,7 @@ vulkan_api_xml = $(MESA_TOP)/src/vulkan/registry/vk.xml
 $(LOCAL_GENERATED_SOURCES): $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
$(vulkan_api_xml)
@echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))"
@mkdir -p $(dir $@)
-   $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
--xml $(vulkan_api_xml) --outdir $(intermediates)/util
+   $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/vulkan/util/gen_enum_to_str.py 
--xml $(vulkan_api_xml) --outdir $(dir $@)
 
 LOCAL_EXPORT_C_INCLUDE_DIRS := \
 $(intermediates)
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] configure.ac: require pthread-stubs only where available

2017-03-03 Thread Eric Engestrom
On Thursday, 2017-03-02 19:02:44 +, Emil Velikov wrote:
> From: Emil Velikov 
> 
> The project is a thing only for BSD platforms. Or in other words - for
> any other platforms building/installing pthread-stubs results only in a
> pthread-stub.pc file.
> 
> And even where it provides a DSO, there's a fundamental design issue
> with it - see the pthread-stubs mailing list for the specifics.
> 
> Cc: Jeremy Huddleston Sequoia 
> CC: Gary Wong 
> Cc: Randy Fishel 
> Cc: Niveditha Rau 
> Signed-off-by: Emil Velikov 

This patch has been on ArchLinux for a while, and is therefore
Tested-by: Eric Engestrom 
(as well as thousands of other people)

> ---
> Jeremy, others,
> 
> Afaict pthread-stubs expands to a simple .pc on your platforms, but a
> confirmation will be greatly appreciated.
> ---
>  configure.ac | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/configure.ac b/configure.ac
> index a3d1a00bdd..e94e46a0b8 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -799,7 +799,7 @@ fi
>  
>  dnl pthread-stubs is mandatory on targets where it exists
>  case "$host_os" in
> -cygwin* )
> +linux* | cygwin* | darwin* | solaris* | gnu*)
>  pthread_stubs_possible="no"
>  ;;
>  * )
> -- 
> 2.11.1
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Any updates on threaded GL dispatch?

2017-03-03 Thread Timothy Arceri

On 02/03/17 22:18, Marek Olšák wrote:

The bad news is my involvement is currently on hold due to other
projects and responsibilities.


I can probably spend some time on this. Seems like Gregory has taken 
care of most of the problems and it just needs someone to push it over 
the line.


Tim


The good news is I'm testing some ideas
on my new threaded Gallium dispatch. If I can make that improve perf
on all CPU-bound apps, I can do the same with glthread.

Marek

On Thu, Mar 2, 2017 at 3:54 AM, Dieter Nützel  wrote:

Hello Gregory and Marek,

are there any updates on threaded GL dispatch?
I mean this:

[-]


As a quick summary:
* there are now only 2 minors fail on piglit with my latest patches  (sent
to Marek)
* I have a pending patch to allow asynchronous PBO transfer
* Now that piglit is crash free I will give a try to both glxgear and
glmark. Hopefully they will be both good.

Gregory



And this:


The number of tests run doesn't necessarily correspond to the amount
of test coverage. 10 tests doing different things can be more useful
than 1 tests doing the same thing.

Marek



Fair point.

As a side note, I tested both glxgear and glmark2 which are now crash-free
:)



Where can I grep 'latest' code with these patches?
GREAT success!

-Dieter

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 99987] Mesa 13+ breaks Xvnc (and similar X servers)

2017-03-03 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99987

--- Comment #5 from Pierre Ossman  ---
The analysis on bug 99027 seems to be only about why it is crashing, and not
why Mesa has changed its requirements on the X server. There is some talk about
8 bit depth, but the issue occurs on standard bit depths as well.

Digging further is also on my todo list, but unfortunately not near the top.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] EGL/Android: Add EGL_EXT_buffer_age extension

2017-03-03 Thread Tapani Pälli
As discussed offline, this passes all dEQP tests for the extension on 
Android-IA (let's add this to commit message when pushing).


Reviewed-by: Tapani Pälli 

On 02/08/2017 04:46 AM, Xiaosong Wei wrote:

This patch implements the EGL_EXT_buffer_age extension for Android.
https://www.khronos.org/registry/EGL/extensions/EXT/EGL_EXT_buffer_age.txt
---
 src/egl/drivers/dri2/egl_dri2.h |  9 ++
 src/egl/drivers/dri2/platform_android.c | 51 -
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index f3d09dc..992e5b3 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -295,6 +295,15 @@ struct dri2_egl_surface

/* EGL-owned buffers */
__DRIbuffer   *local_buffers[__DRI_BUFFER_COUNT];
+
+   /* Used to record all the buffers created by ANativeWindow and their ages.
+* Usually Android uses at most triple buffers in ANativeWindow
+* so hardcode the number of color_buffers to 3.
+*/
+   struct {
+  struct ANativeWindowBuffer *buffer;
+  int age;
+   } color_buffers[3], *back;
 #endif

 #if defined(HAVE_SURFACELESS_PLATFORM)
diff --git a/src/egl/drivers/dri2/platform_android.c 
b/src/egl/drivers/dri2/platform_android.c
index 79fe81a..109b419 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -165,6 +165,33 @@ droid_window_dequeue_buffer(struct dri2_egl_surface 
*dri2_surf)
dri2_surf->window->lockBuffer(dri2_surf->window, dri2_surf->buffer);
 #endif

+   /* Record all the buffers created by ANativeWindow and update back buffer
+* for updating buffer's age in swap_buffers.
+*/
+   EGLBoolean updated = EGL_FALSE;
+   for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
+  if (!dri2_surf->color_buffers[i].buffer) {
+ dri2_surf->color_buffers[i].buffer = dri2_surf->buffer;
+  }
+  if (dri2_surf->color_buffers[i].buffer == dri2_surf->buffer) {
+ dri2_surf->back = _surf->color_buffers[i];
+ updated = EGL_TRUE;
+ break;
+  }
+   }
+
+   if (!updated) {
+  /* In case of all the buffers were recreated by ANativeWindow, reset
+   * the color_buffers
+   */
+  for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
+ dri2_surf->color_buffers[i].buffer = NULL;
+ dri2_surf->color_buffers[i].age = 0;
+  }
+  dri2_surf->color_buffers[0].buffer = dri2_surf->buffer;
+  dri2_surf->back = _surf->color_buffers[0];
+   }
+
return EGL_TRUE;
 }

@@ -201,6 +228,7 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct 
dri2_egl_surface *dri2_sur

dri2_surf->buffer->common.decRef(_surf->buffer->common);
dri2_surf->buffer = NULL;
+   dri2_surf->back = NULL;

mtx_lock(>Mutex);

@@ -547,6 +575,20 @@ droid_image_get_buffers(__DRIdrawable *driDrawable,
return 1;
 }

+static EGLint
+droid_query_buffer_age(_EGLDriver *drv,
+  _EGLDisplay *disp, _EGLSurface *surface)
+{
+   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface);
+
+   if (update_buffers(dri2_surf) < 0) {
+  _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age");
+  return 0;
+   }
+
+   return dri2_surf->back->age;
+}
+
 static EGLBoolean
 droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
 {
@@ -556,6 +598,12 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *draw)
if (dri2_surf->base.Type != EGL_WINDOW_BIT)
   return EGL_TRUE;

+   for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
+  if (dri2_surf->color_buffers[i].age > 0)
+ dri2_surf->color_buffers[i].age++;
+   }
+   dri2_surf->back->age = 1;
+
dri2_flush_drawable_for_swapbuffers(disp, draw);

if (dri2_surf->buffer)
@@ -989,7 +1037,7 @@ static struct dri2_egl_display_vtbl droid_display_vtbl = {
.swap_buffers_region = dri2_fallback_swap_buffers_region,
.post_sub_buffer = dri2_fallback_post_sub_buffer,
.copy_buffers = dri2_fallback_copy_buffers,
-   .query_buffer_age = dri2_fallback_query_buffer_age,
+   .query_buffer_age = droid_query_buffer_age,
.query_surface = droid_query_surface,
.create_wayland_buffer_from_image = 
dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri2_fallback_get_sync_values,
@@ -1080,6 +1128,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
dpy->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
dpy->Extensions.ANDROID_recordable = EGL_TRUE;
+   dpy->Extensions.EXT_buffer_age = EGL_TRUE;

/* Fill vtbl last to prevent accidentally calling virtual function during
 * initialization.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev