Re: [Mesa-dev] [PATCH] gbm: Fix the alpha masks in the GBM format table.

2018-02-23 Thread Daniel Stone
Reviewed-by: Daniel Stone You can probably tell I only tested XRGB. Sorry!___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/9] anv/blorp: Allow indirect clear colors on blorp sources on gen7

2018-02-23 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_blorp_exec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c
index f956715..ac6e736 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -205,8 +205,8 @@ genX(blorp_exec)(struct blorp_batch *batch,
 * indirect fast-clear colors can cause GPU hangs if we don't stall first.
 * See genX(cmd_buffer_mi_memcpy) for more details.
 */
-   assert(params->src.clear_color_addr.buffer == NULL);
-   if (params->dst.clear_color_addr.buffer)
+   if (params->src.clear_color_addr.buffer ||
+   params->dst.clear_color_addr.buffer)
   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
 #endif
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/9] anv/blorp: Pass the clear address to blorp for subpass MSAA resolves

2018-02-23 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_blorp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index e71d90a..8f29bc8 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1325,6 +1325,12 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer 
*cmd_buffer)
   VK_IMAGE_ASPECT_COLOR_BIT,
   ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
   src_aux_usage, &src_surf);
+ if (src_aux_usage == ISL_AUX_USAGE_MCS) {
+src_surf.clear_color_addr = anv_to_blorp_address(
+   anv_image_get_clear_color_addr(cmd_buffer->device,
+  src_iview->image,
+  VK_IMAGE_ASPECT_COLOR_BIT));
+ }
  get_blorp_surf_for_anv_image(cmd_buffer->device, dst_iview->image,
   VK_IMAGE_ASPECT_COLOR_BIT,
   ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 7/9] anv/cmd_buffer: Add helpers for computing resolve predicates

2018-02-23 Thread Jason Ekstrand
We'll want to re-use the complex resolve predicate computations for MCS
resolves so it's nice to have them as helper functions.
---
 src/intel/vulkan/genX_cmd_buffer.c | 74 --
 1 file changed, 64 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 3d886b0..47542ea 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -534,19 +534,21 @@ mi_alu(uint32_t opcode, uint32_t operand1, uint32_t 
operand2)
 
 #define CS_GPR(n) (0x2600 + (n) * 8)
 
+/* This is only really practical on haswell and above because it requires
+ * MI math in order to get it correct.
+ */
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
 static void
-anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
-   const struct anv_image *image,
-   VkImageAspectFlagBits aspect,
-   uint32_t level, uint32_t array_layer,
-   enum isl_aux_op resolve_op,
-   enum anv_fast_clear_type fast_clear_supported)
+anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
+  const struct anv_image *image,
+  VkImageAspectFlagBits aspect,
+  uint32_t level, uint32_t array_layer,
+  enum isl_aux_op resolve_op,
+  enum anv_fast_clear_type 
fast_clear_supported)
 {
-   const uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
struct anv_address fast_clear_type_addr =
   anv_image_get_fast_clear_type_addr(cmd_buffer->device, image, aspect);
 
-#if GEN_GEN >= 9
/* Name some registers */
const int image_fc_reg = MI_ALU_REG0;
const int fc_imm_reg = MI_ALU_REG1;
@@ -653,7 +655,38 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer 
*cmd_buffer,
   return;
}
 
-#else /* GEN_GEN <= 8 */
+   /* We use the first half of src0 for the actual predicate.  Set the second
+* half of src0 and all of src1 to 0 as the predicate operation will be
+* doing an implicit src0 != src1.
+*/
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+  mip.LoadOperation= LOAD_LOADINV;
+  mip.CombineOperation = COMBINE_SET;
+  mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
+}
+#endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
+
+#if GEN_GEN <= 8
+static void
+anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ VkImageAspectFlagBits aspect,
+ uint32_t level, uint32_t array_layer,
+ enum isl_aux_op resolve_op,
+ enum anv_fast_clear_type fast_clear_supported)
+{
+   struct anv_address fast_clear_type_addr =
+  anv_image_get_fast_clear_type_addr(cmd_buffer->device, image, aspect);
+
+   /* This only works for partial resolves and only when the clear color is
+* all or nothing.  On the upside, this emits less command streamer code
+* and works on Ivybridge and Bay Trail.
+*/
assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
assert(fast_clear_supported != ANV_FAST_CLEAR_ANY);
 
@@ -673,7 +706,6 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer 
*cmd_buffer,
   sdi.Address  = fast_clear_type_addr;
   sdi.ImmediateData= 0;
}
-#endif
 
/* We use the first half of src0 for the actual predicate.  Set the second
 * half of src0 and all of src1 to 0 as the predicate operation will be
@@ -688,6 +720,28 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer 
*cmd_buffer,
   mip.CombineOperation = COMBINE_SET;
   mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
+}
+#endif /* GEN_GEN <= 8 */
+
+static void
+anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
+   const struct anv_image *image,
+   VkImageAspectFlagBits aspect,
+   uint32_t level, uint32_t array_layer,
+   enum isl_aux_op resolve_op,
+   enum anv_fast_clear_type fast_clear_supported)
+{
+   const uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+
+#if GEN_GEN >= 9
+   anv_cmd_compute_resolve_predicate(cmd_buffer, image,
+ aspect, level, array_layer,
+ resolve_op, fast_clear_supported);
+#else /* GEN_GEN <= 8 */
+   anv_cmd_simple_resolve_predicate(cmd_buffer, image,
+aspect, level, arra

[Mesa-dev] [PATCH v2 6/9] anv/cmd_buffer: Handle MCS identical to CCS_E in compute_aux_usage

2018-02-23 Thread Jason Ekstrand
This doesn't actually do anything because att_state->fast_clear is
determined based on the return value of anv_layout_to_fast_clear_type
which currently returns NONE for multisampled images.
---
 src/intel/vulkan/genX_cmd_buffer.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index ce54624..3d886b0 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -235,14 +235,9 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
 */
assert(att_state->aux_usage != ISL_AUX_USAGE_NONE);
 
-   if (att_state->aux_usage == ISL_AUX_USAGE_MCS) {
-  att_state->input_aux_usage = ISL_AUX_USAGE_MCS;
-  att_state->fast_clear = false;
-  return;
-   }
-
-   if (att_state->aux_usage == ISL_AUX_USAGE_CCS_E) {
-  att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
+   if (att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
+   att_state->aux_usage == ISL_AUX_USAGE_MCS) {
+  att_state->input_aux_usage = att_state->aux_usage;
} else {
   /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
*
@@ -274,7 +269,8 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
   }
}
 
-   assert(iview->image->planes[0].aux_surface.isl.usage & 
ISL_SURF_USAGE_CCS_BIT);
+   assert(iview->image->planes[0].aux_surface.isl.usage &
+  (ISL_SURF_USAGE_CCS_BIT | ISL_SURF_USAGE_MCS_BIT));
 
const struct isl_format_layout *view_fmtl =
   isl_format_get_layout(iview->planes[0].isl.format);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 9/9] anv: Enable MSAA fast-clears

2018-02-23 Thread Jason Ekstrand
This speeds up the Sascha Willems multisampling demo by around 25% when
using 8x or 16x MSAA.
---
 src/intel/vulkan/anv_image.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index a2bae7b..922c469 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -952,8 +952,10 @@ anv_layout_to_fast_clear_type(const struct gen_device_info 
* const devinfo,
 
assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
 
-   /* Multisample fast-clear is not yet supported. */
-   if (image->samples > 1)
+   /* We don't support MSAA fast-clears on Ivybridge or Bay Trail because they
+* lack the MI ALU which we need to determine the predicates.
+*/
+   if (devinfo->gen == 7 && !devinfo->is_haswell && image->samples > 1)
   return ANV_FAST_CLEAR_NONE;
 
switch (layout) {
@@ -964,12 +966,13 @@ anv_layout_to_fast_clear_type(const struct 
gen_device_info * const devinfo,
   return ANV_FAST_CLEAR_NONE;
 
default:
-  /* If the image has CCS_E enabled all the time then we can use
+  /* If the image has MCS or CCS_E enabled all the time then we can use
* fast-clear as long as the clear color is the default value of zero
* since this is the default value we program into every surface state
* used for texturing.
*/
-  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
+  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_MCS ||
+  image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
  return ANV_FAST_CLEAR_DEFAULT_VALUE;
   else
  return ANV_FAST_CLEAR_NONE;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/9] intel/blorp: Add indirect clear color support to mcs_partial_resolve

2018-02-23 Thread Jason Ekstrand
This is a bit complicated because we have to get the indirect clear
color in there somehow.  In order to not do any more work in the shader
than needed, we set it up as it's own vertex binding which points
directly at the clear color address specified by the client.
---
 src/intel/blorp/blorp_clear.c | 25 +-
 src/intel/blorp/blorp_genX_exec.h | 54 ---
 src/intel/blorp/blorp_priv.h  |  1 +
 3 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index dde116f..832e8ee 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -833,9 +833,18 @@ blorp_ccs_resolve(struct blorp_batch *batch,
batch->blorp->exec(batch, ¶ms);
 }
 
+static nir_ssa_def *
+blorp_nir_bit(nir_builder *b, nir_ssa_def *src, unsigned bit)
+{
+   return nir_iand(b, nir_ushr(b, src, nir_imm_int(b, bit)),
+  nir_imm_int(b, 1));
+}
+
 struct blorp_mcs_partial_resolve_key
 {
enum blorp_shader_type shader_type;
+   bool indirect_clear_color;
+   bool int_format;
uint32_t num_samples;
 };
 
@@ -845,6 +854,8 @@ blorp_params_get_mcs_partial_resolve_kernel(struct 
blorp_context *blorp,
 {
const struct blorp_mcs_partial_resolve_key blorp_key = {
   .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
+  .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL,
+  .int_format = isl_format_has_int_channel(params->dst.view.format),
   .num_samples = params->num_samples,
};
 
@@ -879,7 +890,18 @@ blorp_params_get_mcs_partial_resolve_kernel(struct 
blorp_context *blorp,
discard->src[0] = nir_src_for_ssa(nir_inot(&b, is_clear));
nir_builder_instr_insert(&b, &discard->instr);
 
-   nir_copy_var(&b, frag_color, v_color);
+   nir_ssa_def *clear_color = nir_load_var(&b, v_color);
+   if (blorp_key.indirect_clear_color && blorp->isl_dev->info->gen <= 8) {
+  /* Gen7-8 clear colors are stored as single 0/1 bits */
+  clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
+ blorp_nir_bit(&b, clear_color, 30),
+ blorp_nir_bit(&b, clear_color, 29),
+ blorp_nir_bit(&b, clear_color, 28));
+
+  if (!blorp_key.int_format)
+ clear_color = nir_i2f32(&b, clear_color);
+   }
+   nir_store_var(&b, frag_color, clear_color, 0xf);
 
struct brw_wm_prog_key wm_key;
brw_blorp_init_wm_prog_key(&wm_key);
@@ -925,6 +947,7 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
 
params.num_samples = params.dst.surf.samples;
params.num_layers = num_layers;
+   params.dst_clear_color_as_input = surf->clear_color_addr.buffer != NULL;
 
memcpy(¶ms.wm_inputs.clear_color,
   surf->clear_color.f32, sizeof(float) * 4);
diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index cea514e..cc408ca 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -297,7 +297,7 @@ static void
 blorp_emit_vertex_buffers(struct blorp_batch *batch,
   const struct blorp_params *params)
 {
-   struct GENX(VERTEX_BUFFER_STATE) vb[2];
+   struct GENX(VERTEX_BUFFER_STATE) vb[3];
memset(vb, 0, sizeof(vb));
 
struct blorp_address addr;
@@ -308,12 +308,20 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch,
blorp_emit_input_varying_data(batch, params, &addr, &size);
blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0);
 
-   const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2;
+   uint32_t num_vbs = 2;
+   if (params->dst_clear_color_as_input) {
+  blorp_fill_vertex_buffer_state(batch, vb, num_vbs++,
+ params->dst.clear_color_addr,
+ 
batch->blorp->isl_dev->ss.clear_value_size,
+ 0);
+   }
+
+   const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
if (!dw)
   return;
 
-   for (unsigned i = 0; i < 2; i++) {
+   for (unsigned i = 0; i < num_vbs; i++) {
   GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
   dw += GENX(VERTEX_BUFFER_STATE_length);
}
@@ -440,21 +448,49 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
};
slot++;
 
-   for (unsigned i = 0; i < num_varyings; ++i) {
+   if (params->dst_clear_color_as_input) {
+  /* If the caller wants the destination indirect clear color, redirect
+   * to vertex buffer 2 where we stored it earlier.  The only users of
+   * an indirect clear color source have that as their only vertex
+   * attribute.
+   */
+  assert(num_varyings == 1);
   ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
- .VertexBufferIndex = 1,
+ .VertexBufferIndex = 2,
  .Valid = true,
- .S

[Mesa-dev] [PATCH v2 8/9] anv/cmd_buffer: Add support for MCS fast-clears and resolves

2018-02-23 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_cmd_buffer.c | 44 +-
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 47542ea..98e58ca 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -754,6 +754,29 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer 
*cmd_buffer,
 array_layer, 1, resolve_op, true);
 }
 
+static void
+anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
+   const struct anv_image *image,
+   VkImageAspectFlagBits aspect,
+   uint32_t array_layer,
+   enum isl_aux_op resolve_op,
+   enum anv_fast_clear_type fast_clear_supported)
+{
+   assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
+   assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
+
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+   anv_cmd_compute_resolve_predicate(cmd_buffer, image,
+ aspect, 0, array_layer,
+ resolve_op, fast_clear_supported);
+
+   anv_image_mcs_op(cmd_buffer, image, aspect,
+array_layer, 1, resolve_op, true);
+#else
+   unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail");
+#endif
+}
+
 void
 genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
 const struct anv_image *image,
@@ -1096,9 +1119,15 @@ transition_color_buffer(struct anv_cmd_buffer 
*cmd_buffer,
 
   for (uint32_t a = 0; a < level_layer_count; a++) {
  uint32_t array_layer = base_layer + a;
- anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect,
-level, array_layer, resolve_op,
-final_fast_clear);
+ if (image->samples == 1) {
+anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect,
+   level, array_layer, resolve_op,
+   final_fast_clear);
+ } else {
+anv_cmd_predicated_mcs_resolve(cmd_buffer, image, aspect,
+   array_layer, resolve_op,
+   final_fast_clear);
+ }
   }
}
 
@@ -3448,8 +3477,13 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer 
*cmd_buffer,
 assert(iview->planes[0].isl.base_level == 0);
 assert(iview->planes[0].isl.base_array_layer == 0);
 
-anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
- 0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false);
+if (iview->image->samples == 1) {
+   anv_image_ccs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+0, 0, 1, ISL_AUX_OP_FAST_CLEAR, false);
+} else {
+   anv_image_mcs_op(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
+0, 1, ISL_AUX_OP_FAST_CLEAR, false);
+}
 base_clear_layer++;
 clear_layer_count--;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/9] anv: Enable fast-clears for multisampled images

2018-02-23 Thread Jason Ekstrand
We've had multisample compression support for some time and we've had
single-sampled fast-clears but multisampled fast clears haven't been all
that high on anyone's priority list.  I did send out a patch series for it
back in November: https://patchwork.freedesktop.org/series/33727/
Unfortunately, shortly after that, I ended up doing significant reworks to
the way that CCS fast-clears and resolves work which made hash of the
original series.

This series is accomplishes the same thing as the last for patches of the
original series.  The last two patches of the original series have been
completely rewritten and exploded into 7 patches.  Much of that explosion
is because I've broken it up to be a bit more reviewable.

Jason Ekstrand (9):
  intel/blorp: Add a helper for filling out VERTEX_BUFFER_STATE
  intel/blorp: Add indirect clear color support to mcs_partial_resolve
  anv/blorp: Add partial clear support to anv_image_mcs_op
  anv/blorp: Allow indirect clear colors on blorp sources on gen7
  anv/blorp: Pass the clear address to blorp for subpass MSAA resolves
  anv/cmd_buffer: Handle MCS identical to CCS_E in compute_aux_usage
  anv/cmd_buffer: Add helpers for computing resolve predicates
  anv/cmd_buffer: Add support for MCS fast-clears and resolves
  anv: Enable MSAA fast-clears

 src/intel/blorp/blorp_clear.c  |  25 ++-
 src/intel/blorp/blorp_genX_exec.h  | 121 +-
 src/intel/blorp/blorp_priv.h   |   1 +
 src/intel/vulkan/anv_blorp.c   |  21 +-
 src/intel/vulkan/anv_image.c   |  11 ++--
 src/intel/vulkan/genX_blorp_exec.c |   4 +-
 src/intel/vulkan/genX_cmd_buffer.c | 132 ++---
 7 files changed, 239 insertions(+), 76 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/9] anv/blorp: Add partial clear support to anv_image_mcs_op

2018-02-23 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_blorp.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index efa2ced..e71d90a 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1606,6 +1606,16 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
 ISL_AUX_USAGE_MCS, &surf);
 
+   if (mcs_op == ISL_AUX_OP_PARTIAL_RESOLVE) {
+  /* If we're doing a partial resolve, then we need the indirect clear
+   * color.  The clear operation just stomps the CCS to a particular value
+   * and don't care about format or clear value.
+   */
+  const struct anv_address clear_color_addr =
+ anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
+  surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
+   }
+
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
 *
 *"After Render target fast clear, pipe-control with color cache
@@ -1630,8 +1640,11 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
0, base_layer, layer_count,
0, 0, image->extent.width, image->extent.height);
   break;
-   case ISL_AUX_OP_FULL_RESOLVE:
case ISL_AUX_OP_PARTIAL_RESOLVE:
+  blorp_mcs_partial_resolve(&batch, &surf, surf.surf->format,
+base_layer, layer_count);
+  break;
+   case ISL_AUX_OP_FULL_RESOLVE:
case ISL_AUX_OP_AMBIGUATE:
default:
   unreachable("Unsupported MCS operation");
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/9] intel/blorp: Add a helper for filling out VERTEX_BUFFER_STATE

2018-02-23 Thread Jason Ekstrand
There are enough #ifs in there that it's kind-of pointless to duplicate
it for each buffer.
---
 src/intel/blorp/blorp_genX_exec.h | 69 +++
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 737720a..cea514e 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -263,53 +263,50 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
 }
 
 static void
-blorp_emit_vertex_buffers(struct blorp_batch *batch,
-  const struct blorp_params *params)
+blorp_fill_vertex_buffer_state(struct blorp_batch *batch,
+   struct GENX(VERTEX_BUFFER_STATE) *vb,
+   unsigned idx,
+   struct blorp_address addr, uint32_t size,
+   uint32_t stride)
 {
-   struct GENX(VERTEX_BUFFER_STATE) vb[2];
-   memset(vb, 0, sizeof(vb));
+   vb[idx].VertexBufferIndex = idx;
+   vb[idx].BufferStartingAddress = addr;
+   vb[idx].BufferPitch = stride;
 
-   uint32_t size;
-   blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size);
-   vb[0].VertexBufferIndex = 0;
-   vb[0].BufferPitch = 3 * sizeof(float);
 #if GEN_GEN >= 6
-   vb[0].VertexBufferMOCS = vb[0].BufferStartingAddress.mocs;
-#endif
-#if GEN_GEN >= 7
-   vb[0].AddressModifyEnable = true;
-#endif
-#if GEN_GEN >= 8
-   vb[0].BufferSize = size;
-#elif GEN_GEN >= 5
-   vb[0].BufferAccessType = VERTEXDATA;
-   vb[0].EndAddress = vb[0].BufferStartingAddress;
-   vb[0].EndAddress.offset += size - 1;
-#elif GEN_GEN == 4
-   vb[0].BufferAccessType = VERTEXDATA;
-   vb[0].MaxIndex = 2;
+   vb[idx].VertexBufferMOCS = addr.mocs;
 #endif
 
-   blorp_emit_input_varying_data(batch, params,
- &vb[1].BufferStartingAddress, &size);
-   vb[1].VertexBufferIndex = 1;
-   vb[1].BufferPitch = 0;
-#if GEN_GEN >= 6
-   vb[1].VertexBufferMOCS = vb[1].BufferStartingAddress.mocs;
-#endif
 #if GEN_GEN >= 7
-   vb[1].AddressModifyEnable = true;
+   vb[idx].AddressModifyEnable = true;
 #endif
+
 #if GEN_GEN >= 8
-   vb[1].BufferSize = size;
+   vb[idx].BufferSize = size;
 #elif GEN_GEN >= 5
-   vb[1].BufferAccessType = INSTANCEDATA;
-   vb[1].EndAddress = vb[1].BufferStartingAddress;
-   vb[1].EndAddress.offset += size - 1;
+   vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
+   vb[idx].EndAddress = vb[idx].BufferStartingAddress;
+   vb[idx].EndAddress.offset += size - 1;
 #elif GEN_GEN == 4
-   vb[1].BufferAccessType = INSTANCEDATA;
-   vb[1].MaxIndex = 0;
+   vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
+   vb[idx].MaxIndex = stride > 0 ? size / stride : 0;
 #endif
+}
+
+static void
+blorp_emit_vertex_buffers(struct blorp_batch *batch,
+  const struct blorp_params *params)
+{
+   struct GENX(VERTEX_BUFFER_STATE) vb[2];
+   memset(vb, 0, sizeof(vb));
+
+   struct blorp_address addr;
+   uint32_t size;
+   blorp_emit_vertex_data(batch, params, &addr, &size);
+   blorp_fill_vertex_buffer_state(batch, vb, 0, addr, size, 3 * sizeof(float));
+
+   blorp_emit_input_varying_data(batch, params, &addr, &size);
+   blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0);
 
const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2;
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/14] intel/blorp: Add indirect clear color support to mcs_partial_resolve

2018-02-23 Thread Jason Ekstrand
On Tue, Nov 14, 2017 at 9:25 PM, Jason Ekstrand 
wrote:

> On Tue, Nov 14, 2017 at 3:28 PM, Lionel Landwerlin <
> lionel.g.landwer...@intel.com> wrote:
>
>> On 13/11/17 16:12, Jason Ekstrand wrote:
>>
>>> This is a bit complicated because we have to get the indirect clear
>>> color in there somehow.  In order to not do any more work in the shader
>>> than needed, we set it up as it's own vertex binding which points
>>> directly at the clear color address specified by the client.
>>> ---
>>>   src/intel/blorp/blorp_clear.c | 25 +-
>>>   src/intel/blorp/blorp_genX_exec.h | 54 ++
>>> ++---
>>>   src/intel/blorp/blorp_priv.h  |  1 +
>>>   3 files changed, 70 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/src/intel/blorp/blorp_clear.c
>>> b/src/intel/blorp/blorp_clear.c
>>> index 8e7bc9f..ac582e7 100644
>>> --- a/src/intel/blorp/blorp_clear.c
>>> +++ b/src/intel/blorp/blorp_clear.c
>>> @@ -780,9 +780,18 @@ blorp_ccs_resolve(struct blorp_batch *batch,
>>>  batch->blorp->exec(batch, ¶ms);
>>>   }
>>>   +static nir_ssa_def *
>>> +blorp_nir_bit(nir_builder *b, nir_ssa_def *src, unsigned bit)
>>> +{
>>> +   return nir_iand(b, nir_ushr(b, src, nir_imm_int(b, bit)),
>>> +  nir_imm_int(b, 1));
>>> +}
>>> +
>>>   struct blorp_mcs_partial_resolve_key
>>>   {
>>>  enum blorp_shader_type shader_type;
>>> +   bool indirect_clear_color;
>>> +   bool int_format;
>>>  uint32_t num_samples;
>>>   };
>>>   @@ -792,6 +801,8 @@ blorp_params_get_mcs_partial_resolve_kernel(struct
>>> blorp_context *blorp,
>>>   {
>>>  const struct blorp_mcs_partial_resolve_key blorp_key = {
>>> .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
>>> +  .indirect_clear_color = params->dst.clear_color_addr.buffer !=
>>> NULL,
>>> +  .int_format = isl_format_has_int_channel(par
>>> ams->dst.view.format),
>>> .num_samples = params->num_samples,
>>>  };
>>>   @@ -826,7 +837,18 @@ blorp_params_get_mcs_partial_resolve_kernel(struct
>>> blorp_context *blorp,
>>>  discard->src[0] = nir_src_for_ssa(nir_inot(&b, is_clear));
>>>  nir_builder_instr_insert(&b, &discard->instr);
>>>   -   nir_copy_var(&b, frag_color, v_color);
>>> +   nir_ssa_def *clear_color = nir_load_var(&b, v_color);
>>> +   if (blorp_key.indirect_clear_color && blorp->isl_dev->info->gen <=
>>> 8) {
>>> +  /* Gen7-8 clear colors are stored as single 0/1 bits */
>>> +  clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
>>> + blorp_nir_bit(&b, clear_color, 30),
>>> + blorp_nir_bit(&b, clear_color, 29),
>>> + blorp_nir_bit(&b, clear_color, 28));
>>>
>>
>> Won't this need some swizzling somewhere?
>> Either when storing the color in the fast clear colors (behind the aux
>> surface) or here in the shader?
>>
>
> Good question...  I'll have to think on it more but we probably do.
>

No, we don't.  The clear color is, I believe, simply storred in the
format-specified channel order.  We're just taking that clear color and
committing it to the surface so we don't care about any swizzling.


> --Jason
>
>
>> +
>>> +  if (!blorp_key.int_format)
>>> + clear_color = nir_i2f32(&b, clear_color);
>>> +   }
>>> +   nir_store_var(&b, frag_color, clear_color, 0xf);
>>>struct brw_wm_prog_key wm_key;
>>>  brw_blorp_init_wm_prog_key(&wm_key);
>>> @@ -872,6 +894,7 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
>>>params.num_samples = params.dst.surf.samples;
>>>  params.num_layers = num_layers;
>>> +   params.dst_clear_color_as_input = surf->clear_color_addr.buffer !=
>>> NULL;
>>>memcpy(¶ms.wm_inputs.clear_color,
>>> surf->clear_color.f32, sizeof(float) * 4);
>>> diff --git a/src/intel/blorp/blorp_genX_exec.h
>>> b/src/intel/blorp/blorp_genX_exec.h
>>> index 7548392..c3df2d5 100644
>>> --- a/src/intel/blorp/blorp_genX_exec.h
>>> +++ b/src/intel/blorp/blorp_genX_exec.h
>>> @@ -297,7 +297,7 @@ static void
>>>   blorp_emit_vertex_buffers(struct blorp_batch *batch,
>>> const struct blorp_params *params)
>>>   {
>>> -   struct GENX(VERTEX_BUFFER_STATE) vb[2];
>>> +   struct GENX(VERTEX_BUFFER_STATE) vb[3];
>>>  memset(vb, 0, sizeof(vb));
>>>struct blorp_address addr;
>>> @@ -308,12 +308,20 @@ blorp_emit_vertex_buffers(struct blorp_batch
>>> *batch,
>>>  blorp_emit_input_varying_data(batch, params, &addr, &size);
>>>  blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0);
>>>   -   const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length)
>>> * 2;
>>> +   uint32_t num_vbs = 2;
>>> +   if (params->dst_clear_color_as_input) {
>>> +  blorp_fill_vertex_buffer_state(batch, vb, num_vbs++,
>>> + params->dst.clear_color_addr,
>>> + batch->blorp->isl_dev->ss.cle
>>> ar_value_size,
>>> 

Re: [Mesa-dev] [PATCH 1/1] ac/nir: use ordered float comparisons except for not equal

2018-02-23 Thread Timothy Arceri
Using this on its own I believe will cause CTS regressions, which is 
what the other patches were about. Feel free to take on the feedback and 
come up with a proper solution. I'm not really sure how to progress this.



On 24/02/18 00:21, Samuel Pitoiset wrote:

Original patch from Timothy Arceri, I have just fixed the
not equal case locally.

This fixes one important rendering issue in Wolfenstein 2
(the cutscene transition issue).

RadeonSI uses the same ordered comparisons, so I guess that
what we should do as well.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104302
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104905
Cc: 
Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_nir_to_llvm.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 687157..bc1d16d2a4 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1801,16 +1801,16 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
break;
case nir_op_feq:
-   result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
+   result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
break;
case nir_op_fne:
result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
break;
case nir_op_flt:
-   result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
+   result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
break;
case nir_op_fge:
-   result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
+   result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
break;
case nir_op_fabs:
result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH crucible 3/3] Increase descriptor pool maxSets for miptree tests.

2018-02-23 Thread Jason Ekstrand
All three are

Reviewed-by: Jason Ekstrand 

On Fri, Feb 23, 2018 at 5:29 PM, Bas Nieuwenhuizen 
wrote:

> From: Bas Nieuwenhuizen 
>
> Some tests, e.g. func.miptree.r8g8b8a8-unorm.
> aspect-color.view-2d.levels01.array02.extent-512x512.upload-
> copy-with-draw.download-copy-with-draw
> use 2 sets on each phase for 2 phases without deallocating in between,
> so we need 4 sets.
> ---
>  src/framework/test/t_phase_setup.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/framework/test/t_phase_setup.c
> b/src/framework/test/t_phase_setup.c
> index 4a99d1e..199fea1 100644
> --- a/src/framework/test/t_phase_setup.c
> +++ b/src/framework/test/t_phase_setup.c
> @@ -288,7 +288,7 @@ t_setup_descriptor_pool(void)
>  .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
>  .pNext = NULL,
>  .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
> -.maxSets = 2,
> +.maxSets = 4,
>  .poolSizeCount = VK_DESCRIPTOR_TYPE_RANGE_SIZE,
>  .pPoolSizes = pool_sizes
>  };
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH crucible 1/3] Allow multiple devices.

2018-02-23 Thread Bas Nieuwenhuizen
From: Bas Nieuwenhuizen 

Ideally we would be able to choose the device of course, but at least
allowing tests to run is a good start.
---
 src/qonos/qonos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qonos/qonos.c b/src/qonos/qonos.c
index a1c7307..4f0236c 100644
--- a/src/qonos/qonos.c
+++ b/src/qonos/qonos.c
@@ -31,7 +31,7 @@ qoEnumeratePhysicalDevices(VkInstance instance, uint32_t 
*count,
 VkResult result;
 
 result = vkEnumeratePhysicalDevices(instance, count, physical_devices);
-t_assert(result == VK_SUCCESS);
+t_assert(result == VK_SUCCESS || result == VK_INCOMPLETE);
 }
 
 void
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH crucible 3/3] Increase descriptor pool maxSets for miptree tests.

2018-02-23 Thread Bas Nieuwenhuizen
From: Bas Nieuwenhuizen 

Some tests, e.g. 
func.miptree.r8g8b8a8-unorm.aspect-color.view-2d.levels01.array02.extent-512x512.upload-copy-with-draw.download-copy-with-draw
use 2 sets on each phase for 2 phases without deallocating in between,
so we need 4 sets.
---
 src/framework/test/t_phase_setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/framework/test/t_phase_setup.c 
b/src/framework/test/t_phase_setup.c
index 4a99d1e..199fea1 100644
--- a/src/framework/test/t_phase_setup.c
+++ b/src/framework/test/t_phase_setup.c
@@ -288,7 +288,7 @@ t_setup_descriptor_pool(void)
 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
 .pNext = NULL,
 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
-.maxSets = 2,
+.maxSets = 4,
 .poolSizeCount = VK_DESCRIPTOR_TYPE_RANGE_SIZE,
 .pPoolSizes = pool_sizes
 };
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH crucible 2/3] Fix timestamp frequency.

2018-02-23 Thread Bas Nieuwenhuizen
From: Bas Nieuwenhuizen 

Per spec:

"timestampPeriod is the number of nanoseconds required for a timestamp query to 
be
incremented by 1."

So:

period = nanoseconds/cycle and difference = cycles
difference * period = nanoseconds
difference * period / 100 = milliseconds
difference / (100 / period) = milliseconds

therefore freq = 100 / period, since we compute elapsed ms from this.

Fixes func.query.timestamp on radv.
---

This likely breaks the test for anv, so I would appreciate it if someone would 
either
look at anv or prove my commit wrong.

 src/tests/func/query/timestamp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/func/query/timestamp.c b/src/tests/func/query/timestamp.c
index ea7f683..aef007d 100644
--- a/src/tests/func/query/timestamp.c
+++ b/src/tests/func/query/timestamp.c
@@ -66,7 +66,7 @@ test_timestamp(void)
 t_assert(poll(NULL, 0, 100) == 0);
 b = get_timestamp();
 
-freq = 1 / (t_physical_dev_props->limits.timestampPeriod * 1000);
+freq = 100 / t_physical_dev_props->limits.timestampPeriod;
 elapsed_ms = (b - a) / freq;
 printf("difference: %" PRIu64 " - %" PRIu64 " = %" PRIu64 "\n", b / freq, 
a / freq, elapsed_ms);
 if (elapsed_ms < 90 || elapsed_ms > 110)
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: Really use correct HTILE expanded words.

2018-02-23 Thread Bas Nieuwenhuizen
On Fri, Feb 23, 2018 at 11:52 AM, James Legg  wrote:
> On Thu, 2018-02-22 at 22:48 +0100, Bas Nieuwenhuizen wrote:
>> since IIRC the last change was also done due to Feral noticing and we
>> are clearly lacking testcases in this area, can you check that that
>> case still works for you? Thanks a lot!
>
> I looked at an issue that was fixed with 5158603182fe7435 (and still
> occurs if I change the clear word back to 0x) and I can confirm
> this patch does not reintroduce it.

Nice, I fixed the Fixes tag and pushed it. Thanks a lot!

>
>> On Thu, Feb 22, 2018 at 5:57 PM, James Legg  
>> wrote:
>> > When transitioning to an htile compressed depth format, Set the full
>> > depth range, so later rasterization can pass HiZ. Previously, for depth
>> > only formats, the depth range was set to 0 to 0. This caused unwanted
>> > HiZ rejections with a VK_FORMAT_D16_UNORM depth buffer
>> > (VK_FORMAT_D32_SFLOAT was not affected somehow).
>> >
>> > These values are derived from PAL [0], since I can't find the
>> > specification describing the htile values.
>> >
>> > Fixes 5158603182fe7435: radv: Use correct HTILE expanded words.
>> >
>> > [0] 
>> > https://github.com/GPUOpen-Drivers/pal/blob/5cba4ecbda9452773f59692f5915301e7db4a183/src/core/hw/gfxip/gfx9/gfx9MaskRam.cpp#L1500
>> >
>> > CC: Dave Airlie 
>> > CC: Bas Nieuwenhuizen 
>> > Cc: mesa-sta...@lists.freedesktop.org
>> > ---
>> >  src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
>> >  1 file changed, 3 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
>> > b/src/amd/vulkan/radv_cmd_buffer.c
>> > index 8a384b114c..2b41baea3d 100644
>> > --- a/src/amd/vulkan/radv_cmd_buffer.c
>> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> > @@ -3440,8 +3440,8 @@ void radv_CmdEndRenderPass(
>> >
>> >  /*
>> >   * For HTILE we have the following interesting clear words:
>> > - *   0x030f: Uncompressed for depth+stencil HTILE.
>> > - *   0x000f: Uncompressed for depth only HTILE.
>> > + *   0xf30f: Uncompressed, full depth range, for depth+stencil HTILE
>> > + *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
>> >   *   0xfff0: Clear depth to 1.0
>> >   *   0x: Clear depth to 0.0
>> >   */
>> > @@ -3489,7 +3489,7 @@ static void 
>> > radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
>> > radv_initialize_htile(cmd_buffer, image, range, 0);
>> > } else if (!radv_layout_is_htile_compressed(image, src_layout, 
>> > src_queue_mask) &&
>> >radv_layout_is_htile_compressed(image, dst_layout, 
>> > dst_queue_mask)) {
>> > -   uint32_t clear_value = 
>> > vk_format_is_stencil(image->vk_format) ? 0x30f : 0xf;
>> > +   uint32_t clear_value = 
>> > vk_format_is_stencil(image->vk_format) ? 0xf30f : 0xfffc000f;
>> > radv_initialize_htile(cmd_buffer, image, range, 
>> > clear_value);
>> > } else if (radv_layout_is_htile_compressed(image, src_layout, 
>> > src_queue_mask) &&
>> >!radv_layout_is_htile_compressed(image, dst_layout, 
>> > dst_queue_mask)) {
>> > --
>> > 2.14.3
>> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/22] nir: Recognize some more open-coded fmin / fmax

2018-02-23 Thread Jason Ekstrand
On Fri, Feb 23, 2018 at 3:55 PM, Ian Romanick  wrote:

> From: Ian Romanick 
>
> shader-db results:
>
> Haswell, Broadwell, and Skylake had similar results. (Skylake shown)
> total instructions in shared programs: 14514817 -> 14514808 (<.01%)
> instructions in affected programs: 229 -> 220 (-3.93%)
> helped: 3
> HURT: 0
> helped stats (abs) min: 1 max: 4 x̄: 3.00 x̃: 4
> helped stats (rel) min: 2.86% max: 4.12% x̄: 3.70% x̃: 4.12%
>
> total cycles in shared programs: 533145211 -> 533144939 (<.01%)
> cycles in affected programs: 37268 -> 36996 (-0.73%)
> helped: 8
> HURT: 0
> helped stats (abs) min: 2 max: 134 x̄: 34.00 x̃: 2
> helped stats (rel) min: 0.02% max: 14.22% x̄: 3.53% x̃: 0.05%
>
> Sandy Bridge and Ivy Bridge had similar results. (Ivy Bridge shown)
> total cycles in shared programs: 257618409 -> 257618403 (<.01%)
> cycles in affected programs: 12582 -> 12576 (-0.05%)
> helped: 3
> HURT: 0
> helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
> helped stats (rel) min: 0.05% max: 0.05% x̄: 0.05% x̃: 0.05%
>
> No changes on Iron Lake or GM45.
>
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py
> b/src/compiler/nir/nir_opt_algebraic.py
> index d40d59b..f5f9e94 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -170,6 +170,8 @@ optimizations = [
> (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
> (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
> (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
> +   (('bcsel', ('fge', b, a), a, b), ('fmin', a, b)),
> +   (('bcsel', ('fge', a, b), a, b), ('fmax', a, b)),
>

Please flag as inexact.  As per the stupid GLSL definition, these are not
the same as fmin/fmax when you throw in a NaN.


> (('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
> (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
> (('bcsel', a, True, 'b@bool'), ('ior', a, b)),
> --
> 2.9.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104654] r600/sb: Alien Isolation GPU lock

2018-02-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104654

Gert Wollny  changed:

   What|Removed |Added

 Attachment #136842|0   |1
is obsolete||

--- Comment #11 from Gert Wollny  ---
Created attachment 137570
  --> https://bugs.freedesktop.org/attachment.cgi?id=137570&action=edit
Prelimiary fix, needs testing

The problem is actually not directly with sb: in the translation from TGSI the
jump offsets are not correctly evaluated when the last CF slot is ALU_EXT. I
have a patch ready, only need to test whether there are any piglit regressions.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/22] i965: Silence unused parameter warnings in generated OA code

2018-02-23 Thread Lionel Landwerlin

Rb!

On 23/02/18 23:55, Ian Romanick wrote:

From: Ian Romanick 

Reduces my build from 6301 warnings to 2075 warnings by silencing 4226
instances of things like

src/mesa/drivers/dri/i965/i965@sta/brw_oa_hsw.c: In function 
‘hsw__render_basic__gpu_core_clocks__read’:
src/mesa/drivers/dri/i965/i965@sta/brw_oa_hsw.c:41:62: warning: unused 
parameter ‘brw’ [-Wunused-parameter]
  hsw__render_basic__gpu_core_clocks__read(struct brw_context *brw,
   ^~~

Signed-off-by: Ian Romanick 
---
  src/mesa/drivers/dri/i965/brw_oa.py | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 576ea66..7931c82 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -260,7 +260,7 @@ def output_counter_read(set, counter, counter_vars):
  
  c("static " + ret_type)

  read_sym = "{0}__{1}__{2}__read".format(set.get('chipset').lower(), 
set.get('underscore_name'), counter.get('underscore_name'))
-c(read_sym + "(struct brw_context *brw,\n")
+c(read_sym + "(MAYBE_UNUSED struct brw_context *brw,\n")
  c_indent(len(read_sym) + 1)
  c("const struct brw_perf_query_info *query,\n")
  c("uint64_t *accumulator)\n")



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] vulkan: Add KHR_display extension to anv and radv using DRM

2018-02-23 Thread Jason Ekstrand
On Fri, Feb 23, 2018 at 3:43 PM, Keith Packard  wrote:

> Jason Ekstrand  writes:
>
> > I think I like option 1 (KEITHP_kms_display).  If the client knows the
> > difference between render and primary for 2, then they are most likely
> > already opening the master node themselves or at least have access to
> > the FD.
>
> Ok, I'll work on cleaning up that extension and renaming it. I have no
> idea how to get new words into the Vulkan spec, but it would be good to
> have that done too.
>

Once we're sure that's what we want, create an MR against the spec that
just adds enough to the XML to reserve your extension number.  That will
get merged almost immediately.  Then make a second one with the actual
extension text and we'll iterate on that either in Khronos gitlab or, if
you prefer, you can send it as a patch to mesa-dev and then make a Khrons
MR once it's baked.


> I guess the question is whether I should bother to leave the current
> try-to-open-primary kludge in place. In good news, under X, both radv
> and anv "fail" to use the primary device as another master is already
> running, so at least we aren't running with a primary FD open?
>

See also my comments about GEM handle ownership.


> > Sorry, I'm just not feeling all that opinionated about this at the
> moment.
> > :-)
>
> No more than I; either way is fine with me, if you're happy to use
> something like the existing code I've got, that's even nicer.
>
> > Clearly, we need systemd-edidd. :-)
>
> A library would be nice; we have the edid data everywhere, we just don't
> have it in a useful form except inside the X server and kernel.
>

Yeah, in the scary new world of Wayland compositors, having an edid library
would be a very good thing.  No sense in having everyone fail to handle it
properly themselves.


> > Yes, *if* it has a preferred resolution, we should return that one.  If
> it
> > doesn't, we should walk the list and return the largest instead of just
> > defaulting to 1024x768.  At least that's what the spec seems to say to
> > me.
>
> Oh. I totally missed that part. Yeah, that's just wrong. I've just
> pushed a patch that finds the largest mode when there isn't a preferred
> one. Oddly, I have no devices here which don't specify a preferred mode,
> so it will be somewhat difficult to test...
>
> > Yup.  Let's just drop INHERIT and only advertise OPAQUE.
>
> Done.
>
> I've updated my drm-lease branch with all of these changes merged into
> the existing patches (and so noted), plus the new patch described above
> which looks for the largest mode when no preferred mode is specified.
>
> Thanks again for all of your careful review; while we haven't changed
> any significant semantics, we have found a bunch of outright bugs in the
> code.
>

Glad to help. :-)  I figure I should learn something about KMS one day and
reviewing this is as good an opportunity as any.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH mesa 5/5] WIP - meson: add a message block at the end of the configuration stage

2018-02-23 Thread Dylan Baker
Quoting Eric Engestrom (2018-02-23 10:08:48)
> The messages are basically the same as the ones in configure.ac
> 
> Signed-off-by: Eric Engestrom 
> ---
> Sent out because it's as much as I could do before the weekend, and
> before I try to figure out the last bits I'd love some confirmation that
> this is what we want :)
> ---
>  meson.build | 128 
> 
>  src/gallium/meson.build |  13 +
>  2 files changed, 141 insertions(+)
> 
> diff --git a/meson.build b/meson.build
> index 770fdc7e50653bcfa7c2..0c84d09c02322ed7a80b 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1263,3 +1263,131 @@ env_test.set('NM', find_program('nm').path())
>  subdir('include')
>  subdir('bin')
>  subdir('src')
> +
> +
> +#
> +# Output some configuration info for the user
> +#
> +message('')
> +message('prefix:  ' + get_option('prefix'))
> +message('libdir:  ' + join_paths(get_option('prefix'), 
> get_option('libdir')))
> +message('includedir:  ' + join_paths(get_option('prefix'), 
> get_option('includedir')))

I'd really like to avoid having millions of message() calls, what about:
messsage('''

prefix: @0@
libdir: @1@
includedir: @2@

...

'''.format(
 ...
))

Dylan

> +
> +# API info
> +message('')
> +message('OpenGL:  @0@ (ES1: @1@ ES2: 
> @2@)'.format(with_opengl, with_gles1, with_gles2))
> +
> +# Driver info
> +message('')
> +if with_osmesa == 'gallium'
> +  message('OSMesa:  lib@0@ 
> (Gallium)'.format(osmesa_lib_name))
> +elif with_osmesa == 'classic'
> +  message('OSMesa:  lib@0@'.format(osmesa_lib_name))
> +elif with_osmesa == 'none'
> +  message('OSMesa:  no')
> +endif
> +
> +message('')
> +if with_dri
> +  message('DRI platform:' + with_dri_platform)
> +  if false #DRI_DIRS
> +message('DRI drivers: no')
> +  else
> +message('DRI drivers: ' + _dri_drivers)
> +  endif
> +  message('DRI driver dir:  ' + join_paths(get_option('prefix'), 
> dri_drivers_path))
> +endif
> +
> +if with_glx == 'dri'
> +message('GLX: DRI-based')
> +elif with_glx == 'xlib'
> +message('GLX: Xlib-based')
> +elif with_glx == 'gallium-xlib'
> +message('GLX: Xlib-based (Gallium)')
> +else
> +message('GLX: @0@'.format(with_glx))
> +endif
> +
> +# EGL
> +message('')
> +message('EGL: @0@'.format(with_egl))
> +if with_egl
> +  #message('EGL drivers: @0@ @1@'.format(
> +#with_dri  ? 'builtin:egl_dri2' : '',
> +#with_dri3 ? 'builtin:egl_dri3' : ''))
> +endif
> +message('GBM: @0@'.format(with_gbm))
> +
> +message('EGL/Vulkan/VL platforms:   @0@'.format(_platforms))
> +
> +# Vulkan
> +message('')
> +if with_any_vk
> +  message('Vulkan drivers:  ' + _vulkan_drivers)
> +  message('Vulkan ICD dir:  ' + join_paths(get_option('prefix'), 
> with_vulkan_icd_dir))
> +else
> +  message('Vulkan drivers:  no')
> +endif
> +
> +message('')
> +if with_llvm
> +  message('llvm:yes')
> +# message('llvm-config: ' + _llvm_config) #TODO
> +  message('llvm-version:' + '.'.join(_llvm_version))
> +else
> +  message('llvm:no')
> +endif
> +
> +message('')
> +if with_gallium
> +  message('Gallium drivers: ' + _gallium_drivers)
> +  message('Gallium st:  ' + ','.join(with_st))
> +else
> +  message('Gallium: no')
> +endif
> +
> +message('')
> +message('HUD extra stats: @0@'.format(with_gallium_extra_hud))
> +message('HUD lmsensors:   @0@'.format(with_lmsensors))
> +
> +if with_gallium_swr
> +  message('')
> +  if false # TODO: with_swr_builtin
> +message('SWR archs:   @0@ 
> (builtin)'.format(get_option('swr-arches')))
> +  else
> +message('SWR archs:   @0@'.format(get_option('swr-arches')))
> +  endif
> +endif
> +
> +# Libraries
> +message('')
> +#message('Shared libs: @0@'.format(with_shared)) #TODO
> +#message('Static libs: @0@'.format(with_static)) #TODO
> +message('Shared-glapi:@0@'.format(with_shared_glapi))
> +
> +# Compiler options
> +message('')
> +message('CFLAGS:  ' + ' '.join(c_args))
> +message('CXXFLAGS:' + ' '.join(cpp_args))
> +#message('LDFLAGS: ' + ' '.join(c_link_args)) #XXX: doesn't 
> really make sense in meson
> +message('Macros:  ' + ' '.join(pre_args))
> +message('')
> +
> +if with_llvm and false #TODO
> +  message('LLVM_CFLAGS: $LLVM_CFLAGS')
> +  message('LLVM_CXXFLAGS:   $LLVM_CXXFLAGS')
> +  message('LLVM_CPPFLAGS:   $LLVM_CPPFLAGS')
> +  message('LL

Re: [Mesa-dev] [RFC PATCH mesa 4/5] meson: store the result of whether we have gallium-extra-hud and lmsensors

2018-02-23 Thread Dylan Baker
Quoting Eric Engestrom (2018-02-23 10:08:47)
> Signed-off-by: Eric Engestrom 
> ---
>  meson.build | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/meson.build b/meson.build
> index 6c22601f9e8864f08e08..770fdc7e50653bcfa7c2 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1187,15 +1187,19 @@ if with_platform_x11
>endif
>  endif
>  
> +with_gallium_extra_hud = false
>  if get_option('gallium-extra-hud')
>pre_args += '-DHAVE_GALLIUM_EXTRA_HUD=1'
> +  with_gallium_extra_hud = true
>  endif

I'm not a huge fan of flags like this, we can always call get_option() again in
this case.

>  
>  _sensors = get_option('lmsensors')
> +with_lmsensors = false
>  if _sensors != 'false'
>dep_lmsensors = cc.find_library('libsensors', required : _sensors == 
> 'true')
>if dep_lmsensors.found()
>  pre_args += '-DHAVE_LIBSENSORS=1'
> +with_lmsensors = true
>endif
>  else
>dep_lmsensors = []

In this case the flag is probably the right thing to do, we could always do
`dep_lmsensors != [] and dep_lmsensors.found()` but that's kinda ugly.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glxinfo/wglinfo: print list of 4.3 shading language versions

2018-02-23 Thread Mike Lothian
The OCD side of me says it doesn't look right :P The lazy side of me says
it's fine

Either way it doesn't make much difference, it's only exposed with the
extended output anyway

On Fri, 23 Feb 2018 at 16:32 Brian Paul  wrote:

> On 02/23/2018 03:06 AM, Mike Lothian wrote:
> > This still looks wrong, there's now two 4.3 sections:
>
> Yeah, but I guess I don't see it as a big deal.  Fixing it would involve
> a bit of a hack.  While processing the big list of queries, I'd have to
> check for extension=="4.3" and call a function to query/print the GLSL
> versions.
>
> What do you think?
>
> -Brian
>
>
>
> >
> >4.3:
> >  GL_MAX_ELEMENT_INDEX = 2147483647
> >  GL_MAX_COMPUTE_UNIFORM_BLOCKS = 14
> >  GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS = 32
> >  GL_MAX_COMPUTE_IMAGE_UNIFORMS = 32
> >  GL_MAX_COMPUTE_SHARED_MEMORY_SIZE = 65536
> >  GL_MAX_COMPUTE_UNIFORM_COMPONENTS = 16384
> >  GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS = 16
> >  GL_MAX_COMPUTE_ATOMIC_COUNTERS = 4096
> >  GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS = 245760
> >  GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS = 1792
> >  GL_MAX_DEBUG_MESSAGE_LENGTH = 4096
> >  GL_MAX_DEBUG_LOGGED_MESSAGES = 10
> >  GL_MAX_DEBUG_GROUP_STACK_DEPTH = 64
> >  GL_MAX_LABEL_LENGTH = 256
> >  GL_MAX_UNIFORM_LOCATIONS = 98304
> >  GL_MAX_FRAMEBUFFER_WIDTH = 16384
> >  GL_MAX_FRAMEBUFFER_HEIGHT = 16384
> >  GL_MAX_FRAMEBUFFER_LAYERS = 2048
> >  GL_MAX_FRAMEBUFFER_SAMPLES = 16
> >  GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS = 12
> >  GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS = 12
> >  GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS = 12
> >  GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS = 12
> >  GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS = 12
> >  GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS = 12
> >  GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS = 72
> >  GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS = 72
> >  GL_MAX_SHADER_STORAGE_BLOCK_SIZE = 134217728
> >  GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES = 200
> >  GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET = 2047
> >  GL_MAX_VERTEX_ATTRIB_BINDINGS = 16
> >4.4:
> >  GL_MAX_VERTEX_ATTRIB_STRIDE = 2048
> >4.5:
> >  GL_MAX_CULL_DISTANCES = 8
> >  GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES = 8
> >GL_ARB_transform_feedback3:
> >  GL_MAX_TRANSFORM_FEEDBACK_BUFFERS = 4
> >  GL_MAX_VERTEX_STREAMS = 4
> >4.3:
> >  GL_NUM_SHADING_LANGUAGE_VERSIONS = 16
> >450
> >440
> >430
> >420
> >410
> >400
> >330
> >150
> >140
> >130
> >120
> >
> >320 es
> >310 es
> >300 es
> >100
> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/21] vulkan: Add KHR_display extension using DRM

2018-02-23 Thread Jason Ekstrand
Continuing on the new version of the patch...

On Tue, Feb 13, 2018 at 4:31 PM, Keith Packard  wrote:

> +enum wsi_image_state {
> +   wsi_image_idle,
> +   wsi_image_drawing,
> +   wsi_image_queued,
> +   wsi_image_flipping,
> +   wsi_image_displaying
> +};
>

With the exception of NIR (which is a bit odd), we usually use ALL_CAPS for
enum values.


> +
> +static const VkPresentModeKHR available_present_modes[] = {
> +   VK_PRESENT_MODE_FIFO_KHR,
>

Yes, this does seem like the only reasonable mode for now.  I suppose you
could check to see if the platform supports ASYNC_FLIP and advertise
IMMEDIATE in that case.  I know intel doesn't advertise it but I thought
amdgpu does.


> +};
> +
> +static VkResult
> +wsi_display_surface_get_present_modes(VkIcdSurfaceBase  *surface,
> +  uint32_t
> *present_mode_count,
> +  VkPresentModeKHR  *present_modes)
> +{
> +   if (present_modes == NULL) {
> +  *present_mode_count = ARRAY_SIZE(available_present_modes);
> +  return VK_SUCCESS;
> +   }
> +
> +   *present_mode_count = MIN2(*present_mode_count,
> ARRAY_SIZE(available_present_modes));
> +   typed_memcpy(present_modes, available_present_modes,
> *present_mode_count);
> +
> +   if (*present_mode_count < ARRAY_SIZE(available_present_modes))
> +  return VK_INCOMPLETE;
> +   return VK_SUCCESS;
>

vk_outarray, though I suppose you've probably already made that change.


> +}
> +
> +static VkResult
> +wsi_display_image_init(VkDevice device_h,
> +   struct wsi_swapchain *drv_chain,
> +   const VkSwapchainCreateInfoKHR   *create_info,
> +   const VkAllocationCallbacks  *allocator,
> +   struct wsi_display_image *image)
> +{
> +   struct wsi_display_swapchain *chain = (struct wsi_display_swapchain *)
> drv_chain;
> +   struct wsi_display   *wsi = chain->wsi;
> +   VkResult result;
> +   int  ret;
> +   uint32_t image_handle;
> +
> +   if (chain->base.use_prime_blit)
> +  result = wsi_create_prime_image(&chain->base, create_info,
> &image->base);
>

I don't see use_prime_blit being set anywhere.  Perhaps that comes in a
later patch?  The line is obviously correct, so I'm fine with leaving it.


> +   else
> +  result = wsi_create_native_image(&chain->base, create_info,
> &image->base);
>

This will have to be updated for modifiers.  I'm reasonably happy for it to
be the no-op update for now though KHR_display supporting real modifiers
would be nice. :-)


> +   if (result != VK_SUCCESS)
> +  return result;
> +
> +   ret = drmPrimeFDToHandle(wsi->master_fd, image->base.fd,
> &image_handle);
>

This opens up some interesting questions.  GEM handles are not reference
counted by the kernel.  If the driver that we're running on is using
master_fd, then we shouldn't close our handle because that would close the
handle for the driver as well.  If it's not using master_fd, we should
close the handle to avoid leaking it.  The later is only a worry in the
prime case but given that I saw a line above about use_prime_blit, you have
me scared. :-)

One solution to this connundrum would be to simply never use the master FD
for the Vulkan driver itself and always open a render node.  If handed a
master FD, you could check if it matches your render node and set
use_prime_blit accordingly.  Then the driver and WSI would have separate
handle domains and this problem would be solved.  You could also just open
the master FD twice, once for WSI and once for the driver.


> +
> +   close(image->base.fd);
> +   image->base.fd = -1;
> +
> +   if (ret < 0)
> +  goto fail_handle;
> +
> +   image->chain = chain;
> +   image->state = wsi_image_idle;
> +   image->fb_id = 0;
> +
> +   /* XXX extract depth and bpp from image somehow */
>

You have the format in create_info.  We could add some generic VkFormat
introspection or we can just handle the 2-4 cases we care about directly.


> +   ret = drmModeAddFB(wsi->master_fd, create_info->imageExtent.width,
> create_info->imageExtent.height,
> +  24, 32, image->base.row_pitch, image_handle,
> &image->fb_id);
>

What happens if we close the handle immediately after calling
drmModeAddFB?  Does the FB become invalid?  If so, then we probably want to
stash the handle so we can clean it up when we destroy the swapchain.
Unless, of course, we're willing to make the assumption (as stated above)
that the driver and WSI are always using the same FD.


> +
> +   if (ret)
> +  goto fail_fb;
> +
> +   return VK_SUCCESS;
> +
> +fail_fb:
> +   /* fall through */
> +
> +fail_handle:
> +   wsi_destroy_image(&chain->base, &image->base);
> +
> +   return VK_ERROR_OUT_OF_HOST_MEMORY;
> +}
> +
> +static void
> +wsi_display_image_finish(struct wsi_swapchain   *drv_chain,
> + con

Re: [Mesa-dev] [RFC PATCH mesa 2/5] meson: simplify the gbm option code, and avoid changing types

2018-02-23 Thread Dylan Baker
Quoting Eric Engestrom (2018-02-23 10:08:45)
> Signed-off-by: Eric Engestrom 
> ---
>  meson.build | 16 +++-
>  1 file changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index 2d474b140373292e49e7..28d068742ff914a623f6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -303,16 +303,14 @@ if not (with_dri or with_gallium or with_glx == 'xlib' 
> or with_glx == 'gallium-x
>with_shared_glapi = false
>  endif
>  
> -with_gbm = get_option('gbm')
> -if with_gbm == 'auto' and with_dri  # TODO: or gallium
> -  with_gbm = system_has_kms_drm
> -elif with_gbm == 'true'
> -  if not system_has_kms_drm
> -error('GBM only supports DRM/KMS platforms')
> -  endif
> -  with_gbm = true
> +_gbm = get_option('gbm')
> +if _gbm == 'auto'
> +  with_gbm = system_has_kms_drm and with_dri  # TODO: or gallium

The gallium comment can be dropped, it's wrong.

Reviewed-by: Dylan Baker 

>  else
> -  with_gbm = false
> +  with_gbm = _gbm == 'true'
> +endif
> +if with_gbm and not system_has_kms_drm
> +  error('GBM only supports DRM/KMS platforms')
>  endif

>  
>  _egl = get_option('egl')
> -- 
> Cheers,
>   Eric
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH mesa 3/5] meson: avoid changing types for the dri3 option

2018-02-23 Thread Dylan Baker
Reviewed-by: Dylan Baker 

Quoting Eric Engestrom (2018-02-23 10:08:46)
> Signed-off-by: Eric Engestrom 
> ---
>  meson.build | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index 28d068742ff914a623f6..6c22601f9e8864f08e08 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -380,11 +380,11 @@ if with_vulkan_icd_dir == ''
>  endif
>  
>  with_dri2 = (with_dri or with_any_vk) and with_dri_platform == 'drm'
> -with_dri3 = get_option('dri3')
> -if with_dri3 == 'auto'
> +_dri3 = get_option('dri3')
> +if _dri3 == 'auto'
>with_dri3 = system_has_kms_drm and with_dri2
>  else
> -  with_dri3 = with_dri3 == 'true'
> +  with_dri3 = _dri3 == 'true'
>  endif
>  
>  if with_any_vk and (with_platform_x11 and not with_dri3)
> -- 
> Cheers,
>   Eric
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH mesa 1/5] meson: give different names to the vars containing dri drivers vs gallium drivers

2018-02-23 Thread Dylan Baker
Quoting Eric Engestrom (2018-02-23 10:08:44)
> Signed-off-by: Eric Engestrom 
> ---
>  meson.build | 26 +-
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index e470e62074da9a46767c..2d474b140373292e49e7 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -108,24 +108,24 @@ with_dri_r100 = false
>  with_dri_r200 = false
>  with_dri_nouveau = false
>  with_dri_swrast = false
> -_drivers = get_option('dri-drivers')
> -if _drivers == 'auto'
> +_dri_drivers = get_option('dri-drivers')

If we're going to keep these around I'd rather drop the leading _, that tells me
that this is a temporary variable and not something to be relied on.

> +if _dri_drivers == 'auto'
>if host_machine.system() == 'linux'
>  # TODO: PPC, Sparc
>  if ['x86', 'x86_64'].contains(host_machine.cpu_family())
> -  _drivers = 'i915,i965,r100,r200,nouveau'
> +  _dri_drivers = 'i915,i965,r100,r200,nouveau'
>  else
>error('Unknown architecture. Please pass -Ddri-drivers to set driver 
> options. Patches gladly accepted to fix this.')
>  endif
>elif ['darwin', 'windows', 'cygwin', 
> 'haiku'].contains(host_machine.system())
>  # only swrast would make sense here, but gallium swrast is a much better 
> default
> -_drivers = ''
> +_dri_drivers = ''
>else
>  error('Unknown OS. Please pass -Ddri-drivers to set driver options. 
> Patches gladly accepted to fix this.')
>endif
>  endif
> -if _drivers != ''
> -  _split = _drivers.split(',')
> +if _dri_drivers != ''
> +  _split = _dri_drivers.split(',')
>with_dri_i915 = _split.contains('i915')
>with_dri_i965 = _split.contains('i965')
>with_dri_r100 = _split.contains('r100')
> @@ -151,25 +151,25 @@ with_gallium_i915 = false
>  with_gallium_svga = false
>  with_gallium_virgl = false
>  with_gallium_swr = false
> -_drivers = get_option('gallium-drivers')
> -if _drivers == 'auto'
> +_gallium_drivers = get_option('gallium-drivers')
> +if _gallium_drivers == 'auto'
>if host_machine.system() == 'linux'
>  # TODO: PPC, Sparc
>  if ['x86', 'x86_64'].contains(host_machine.cpu_family())
> -  _drivers = 'r300,r600,radeonsi,nouveau,virgl,svga,swrast'
> +  _gallium_drivers = 'r300,r600,radeonsi,nouveau,virgl,svga,swrast'
>  elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
> -  _drivers = 'pl111,vc4,vc5,freedreno,etnaviv,imx,virgl,svga,swrast'
> +  _gallium_drivers = 
> 'pl111,vc4,vc5,freedreno,etnaviv,imx,virgl,svga,swrast'
>  else
>error('Unknown architecture. Please pass -Dgallium-drivers to set 
> driver options. Patches gladly accepted to fix this.')
>  endif
>elif ['darwin', 'windows', 'cygwin', 
> 'haiku'].contains(host_machine.system())
> -_drivers = 'swrast'
> +_gallium_drivers = 'swrast'
>else
>  error('Unknown OS. Please pass -Dgallium-drivers to set driver options. 
> Patches gladly accepted to fix this.')
>endif
>  endif
> -if _drivers != ''
> -  _split = _drivers.split(',')
> +if _gallium_drivers != ''
> +  _split = _gallium_drivers.split(',')
>with_gallium_pl111 = _split.contains('pl111')
>with_gallium_radeonsi = _split.contains('radeonsi')
>with_gallium_r300 = _split.contains('r300')
> -- 
> Cheers,
>   Eric
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:19 PM PST Matt Turner wrote:
> Align16 is no more. We previously generated an align16 ADD instruction
> to calculate DDY:
> 
>add(8) g11<1>F  -g10<4>.xyxyF  g10<4>.zwzwF  { align16 1Q };
> 
> Without align16, we now implement it as two align1 instructions:
> 
>add(4) g11<2>F   -g10<4,2,0>Fg10.2<4,2,0>F  { align1 1N };
>add(4) g11.1<2>F -g10.1<4,2,0>F  g10.3<4,2,0>F  { align1 1N };
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 70 
> ++---
>  1 file changed, 56 insertions(+), 14 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 013d2c820a0..ffc46972420 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst,
>  {
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>/* produce accurate derivatives */
> -  struct brw_reg src0 = src;
> -  struct brw_reg src1 = src;
> +  if (devinfo->gen >= 11) {
> + struct brw_reg x = src;
> + struct brw_reg y = src;
> + struct brw_reg z = src;
> + struct brw_reg w = src;
> + struct brw_reg dst_e = dst;
> + struct brw_reg dst_o = dst;

Maybe call these dst_even and dst_odd?
Or perhaps dst_e = dst /* even channels */?

> +
> + x.vstride = BRW_VERTICAL_STRIDE_4;
> + y.vstride = BRW_VERTICAL_STRIDE_4;
> + z.vstride = BRW_VERTICAL_STRIDE_4;
> + w.vstride = BRW_VERTICAL_STRIDE_4;
> +
> + x.width = BRW_WIDTH_2;
> + y.width = BRW_WIDTH_2;
> + z.width = BRW_WIDTH_2;
> + w.width = BRW_WIDTH_2;
> +
> + x.hstride = BRW_HORIZONTAL_STRIDE_0;
> + y.hstride = BRW_HORIZONTAL_STRIDE_0;
> + z.hstride = BRW_HORIZONTAL_STRIDE_0;
> + w.hstride = BRW_HORIZONTAL_STRIDE_0;

If you like, you could drop some wordiness by doing:

  struct brw_reg x = stride(src, 4, 2, 0);
  struct brw_reg y = stride(src, 4, 2, 0);
  struct brw_reg z = stride(src, 4, 2, 0);
  struct brw_reg w = stride(src, 4, 2, 0);

> +
> + x.subnr = 0 * sizeof(float);
> + y.subnr = 1 * sizeof(float);
> + z.subnr = 2 * sizeof(float);
> + w.subnr = 3 * sizeof(float);
> +

With or without any suggestions, this patch is:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/9] meson: recommend building the surfaceless platform

2018-02-23 Thread Dylan Baker
Quoting Emil Velikov (2018-02-23 11:32:01)
> From: Emil Velikov 
> 
> It has no special requirements, size and build-time is effectively zero.
> 
> Signed-off-by: Emil Velikov 
> ---
>  meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/meson.build b/meson.build
> index 8cf67b81715..3329eb9f7be 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -315,7 +315,7 @@ elif _egl == 'true'
>elif not with_shared_glapi
>  error('EGL requires shared-glapi')
>elif egl_native_platform == ''
> -error('No platforms specified, consider -Dplatforms=drm,x11 at least')
> +error('No platforms specified, consider -Dplatforms=drm,x11,surfaceless 
> at least')
>endif
>with_egl = true
>  else
> -- 
> 2.16.0

I don't have a strong opinion either way. You're absolutely right that it
basically is free to build, it's also pretty much useless except for ChromeOS
and a few test platforms.

Acked-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/9] autotools: error out when building with mangling and glvnd

2018-02-23 Thread Dylan Baker
Quoting Emil Velikov (2018-02-23 11:32:05)
> From: Emil Velikov 
> 
> It's not a thing that can work, nor is a wise idea to attempt.
> 
> Signed-off-by: Emil Velikov 
> ---
>  configure.ac | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/configure.ac b/configure.ac
> index d56d771dcd6..1e794d3f16d 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1501,6 +1501,9 @@ AC_ARG_ENABLE([mangling],
>[enable_mangling=no]
>  )
>  if test "x${enable_mangling}" = "xyes" ; then
> +  if test "x$enable_libglvnd" = xyes; then
> +AC_MSG_ERROR([Building --enable-mangling with --enable-libglvnd no 
> allowed.])

The grammar is a bit off in this "no" should be "is not",
but how about:
`Conflicting options --enable-mangling and --enable-libglvnd`?

Dylan

> +  fi
>DEFINES="${DEFINES} -DUSE_MGL_NAMESPACE"
>GL_LIB="Mangled${GL_LIB}"
>OSMESA_LIB="Mangled${OSMESA_LIB}"
> -- 
> 2.16.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/9] meson: require shared glapi when using DRI based libGL

2018-02-23 Thread Dylan Baker
Quoting Emil Velikov (2018-02-23 11:32:03)
> From: Emil Velikov 
> 
> Just like we do in the autotools build.
> 
> Signed-off-by: Emil Velikov 
> ---
>  meson.build | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index 417b9b4a79f..f97a3c56b09 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -347,8 +347,12 @@ if with_glx != 'disabled'
>  if with_dri
>error('xlib conflicts with any dri driver')
>  endif
> -  elif with_glx == 'dri' and not with_dri
> -error('dri based GLX requires at least one DRI driver')
> +  elif with_glx == 'dri'
> +if not with_dri
> +  error('dri based GLX requires at least one DRI driver')
> +elif not with_shared_glapi
> +  error('dri based GLX requires shared-glapi')
> +endif
>endif
>  endif
>  
> -- 
> 2.16.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/17] intel/compiler/fs: Simplify ddx/ddy code generation

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:18 PM PST Matt Turner wrote:
> The brw_reg() constructor just obfuscates things here, in my opinion.
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 77 
> +++--
>  1 file changed, 35 insertions(+), 42 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index e5a5a76a932..013d2c820a0 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1168,20 +1168,17 @@ fs_generator::generate_ddx(const fs_inst *inst,
>width = BRW_WIDTH_4;
> }
>  
> -   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
> - src.negate, src.abs,
> -  BRW_REGISTER_TYPE_F,
> -  vstride,
> -  width,
> -  BRW_HORIZONTAL_STRIDE_0,
> -  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
> -   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
> - src.negate, src.abs,
> -  BRW_REGISTER_TYPE_F,
> -  vstride,
> -  width,
> -  BRW_HORIZONTAL_STRIDE_0,
> -  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
> +   struct brw_reg src0 = src;
> +   struct brw_reg src1 = src;
> +
> +   src0.subnr   = sizeof(float);
> +   src0.vstride = vstride;
> +   src0.width   = width;
> +   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
> +   src1.vstride = vstride;
> +   src1.width   = width;
> +   src1.hstride = BRW_HORIZONTAL_STRIDE_0;
> +

I would like to see an explicit src1.subnr = 0 * sizeof(float) here.

> brw_ADD(p, dst, src0, negate(src1));
>  }
>  
> @@ -1195,40 +1192,36 @@ fs_generator::generate_ddy(const fs_inst *inst,
>  {
> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>/* produce accurate derivatives */
> -  struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
> -src.negate, src.abs,
> -BRW_REGISTER_TYPE_F,
> -BRW_VERTICAL_STRIDE_4,
> -BRW_WIDTH_4,
> -BRW_HORIZONTAL_STRIDE_1,
> -BRW_SWIZZLE_XYXY, WRITEMASK_XYZW);
> -  struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
> -src.negate, src.abs,
> -BRW_REGISTER_TYPE_F,
> -BRW_VERTICAL_STRIDE_4,
> -BRW_WIDTH_4,
> -BRW_HORIZONTAL_STRIDE_1,
> -BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
> +  struct brw_reg src0 = src;
> +  struct brw_reg src1 = src;
> +
> +  src0.swizzle = BRW_SWIZZLE_XYXY;
> +  src0.vstride = BRW_VERTICAL_STRIDE_4;
> +  src0.width   = BRW_WIDTH_4;
> +  src0.hstride = BRW_HORIZONTAL_STRIDE_1;
> +
> +  src1.swizzle = BRW_SWIZZLE_ZWZW;
> +  src1.vstride = BRW_VERTICAL_STRIDE_4;
> +  src1.width   = BRW_WIDTH_4;
> +  src1.hstride = BRW_HORIZONTAL_STRIDE_1;
> +

I agree that the full brw_reg constructor is clunky here, but it might
look a bit nicer still as:

  struct brw_reg src0 = stride(src, 4, 4, 1);
  struct brw_reg src1 = stride(src, 4, 4, 1);
  src0.swizzle = BRW_SWIZZLE_XYXY;
  src1.swizzle = BRW_SWIZZLE_ZWZW;

>brw_push_insn_state(p);
>brw_set_default_access_mode(p, BRW_ALIGN_16);
>brw_ADD(p, dst, negate(src0), src1);
>brw_pop_insn_state(p);
> } else {
>/* replicate the derivative at the top-left pixel to other pixels */
> -  struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
> -src.negate, src.abs,
> -BRW_REGISTER_TYPE_F,
> -BRW_VERTICAL_STRIDE_4,
> -BRW_WIDTH_4,
> -BRW_HORIZONTAL_STRIDE_0,
> -BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
> -  struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
> -src.negate, src.abs,
> -BRW_REGISTER_TYPE_F,
> -BRW_VERTICAL_STRIDE_4,
> -BRW_WIDTH_4,
> -BRW_HORIZONTAL_STRIDE_0,
> -BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
> +  struct brw_reg src0 = src;
> +  struct brw_reg src1 = src;
> +
> +  src0.vstride = BRW_VERTICAL_STRIDE_4;
> +  src0.width   = BRW_WIDTH_4;
> +  src0.hstride = BRW_HORIZONTAL_STRIDE_0;
> +  src1.vstride = BRW_VERTICAL_STRIDE_4;
> +  src1.width   = BRW_WIDTH_4;
> +

Re: [Mesa-dev] [PATCH 3/9] meson: remove unreachable with_glx == 'auto' check

2018-02-23 Thread Dylan Baker
Quoting Emil Velikov (2018-02-23 11:32:02)
> From: Emil Velikov 
> 
> Cannot happen since, props to the autodetection further up.
> 
> Signed-off-by: Emil Velikov 
> ---
>  meson.build | 6 +-
>  1 file changed, 1 insertion(+), 5 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index 3329eb9f7be..417b9b4a79f 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -334,11 +334,7 @@ endif
>  pre_args += '-DGLX_USE_TLS'
>  if with_glx != 'disabled'
>if not (with_platform_x11 and with_any_opengl)
> -if with_glx == 'auto'
> -  with_glx = 'disabled'
> -else
> -  error('Cannot build GLX support without X11 platform support and at 
> least one OpenGL API')
> -endif
> +error('Cannot build GLX support without X11 platform support and at 
> least one OpenGL API')
>elif with_glx == 'gallium-xlib' 
>  if not with_gallium
>error('Gallium-xlib based GLX requires at least one gallium driver')
> -- 
> 2.16.0

This patch is applicable now,
Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/17] intel/compiler: Add instruction compaction support on Gen11

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:22 PM PST Matt Turner wrote:
> Gen11 only differs from SKL+ in that it uses a new datatype index table.
> ---
>  src/intel/compiler/brw_eu_compact.c | 42 
> +
>  1 file changed, 42 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_eu_compact.c 
> b/src/intel/compiler/brw_eu_compact.c
> index 8d33e2adffc..ae14ef10ec0 100644
> --- a/src/intel/compiler/brw_eu_compact.c
> +++ b/src/intel/compiler/brw_eu_compact.c
> @@ -637,6 +637,41 @@ static const uint16_t gen8_src_index_table[32] = {
> 0b010110001000
>  };
>  
> +static const uint32_t gen11_datatype_table[32] = {
> +   0b00101,
> +   0b001000100,
> +   0b001000101,
> +   0b001001101,
> +   0b0010101100101,
> +   0b0010010100101,
> +   0b0010010010101,
> +   0b00100100101000101,
> +   0b00100100101100101,
> +   0b001010101,
> +   0b001110100,
> +   0b001110101,
> +   0b001000101000101000101,
> +   0b001000111000101000100,
> +   0b001000111000101000101,
> +   0b001100100100101100101,
> +   0b001100101100100100101,
> +   0b001100101100101100100,
> +   0b001100101100101100101,
> +   0b00110000101100100,
> +   0b001001100,
> +   0b0010001100101,
> +   0b0010101000101,
> +   0b001010100,
> +   0b001000101000101000100,
> +   0b00100011100010100,
> +   0b00100100100101001,
> +   0b00110100101100101,
> +   0b00110000101100101,
> +   0b00100001101001100,
> +   0b001001001001001001000,
> +   0b001001011001001001000,
> +};
> +
>  /* This is actually the control index table for Cherryview (26 bits), but the
>   * only difference from Broadwell (24 bits) is that it has two extra 0-bits 
> at
>   * the start.
> @@ -1450,8 +1485,15 @@ brw_init_compaction_tables(const struct 
> gen_device_info *devinfo)
> assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
> assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
> assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
> +   assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
>  
> switch (devinfo->gen) {
> +   case 11:
> +  control_index_table = gen8_control_index_table;
> +  datatype_table = gen11_datatype_table;
> +  subreg_table = gen8_subreg_table;
> +  src_index_table = gen8_src_index_table;
> +  break;
> case 10:
> case 9:
> case 8:
> 

This looks right to me.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/17] intel/compiler: Disable Align16 tests on Gen11+

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:23 PM PST Matt Turner wrote:
> Align16 is no more.

Patches 16-17 are:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/17] intel/compiler: Mark line, pln, and lrp as removed on Gen11+

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:21 PM PST Matt Turner wrote:
> ---
>  src/intel/compiler/brw_eu.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
> index bc297a21b32..3646076a8e8 100644
> --- a/src/intel/compiler/brw_eu.c
> +++ b/src/intel/compiler/brw_eu.c
> @@ -384,7 +384,8 @@ enum gen {
> GEN75 = (1 << 5),
> GEN8  = (1 << 6),
> GEN9  = (1 << 7),
> -   GEN10  = (1 << 8),
> +   GEN10 = (1 << 8),
> +   GEN11 = (1 << 9),
> GEN_ALL = ~0
>  };
>  
> @@ -628,16 +629,16 @@ static const struct opcode_desc opcode_descs[128] = {
> },
> /* Reserved 88 */
> [BRW_OPCODE_LINE] = {
> -  .name = "line",.nsrc = 2, .ndst = 1, .gens = GEN_ALL,
> +  .name = "line",.nsrc = 2, .ndst = 1, .gens = GEN_LE(GEN10),
> },
> [BRW_OPCODE_PLN] = {
> -  .name = "pln", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN45),
> +  .name = "pln", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN45) & 
> GEN_LE(GEN10),
> },
> [BRW_OPCODE_MAD] = {
>.name = "mad", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN6),
> },
> [BRW_OPCODE_LRP] = {
> -  .name = "lrp", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN6),
> +  .name = "lrp", .nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN6) & 
> GEN_LE(GEN10),
> },
> [93] = {
>.name = "madm",.nsrc = 3, .ndst = 1, .gens = GEN_GE(GEN8),
> @@ -662,6 +663,7 @@ gen_from_devinfo(const struct gen_device_info *devinfo)
> case 8: return GEN8;
> case 9: return GEN9;
> case 10: return GEN10;
> +   case 11: return GEN11;
> default:
>unreachable("not reached");
> }
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/17] intel/compiler/fs: Don't generate integer DWord multiply on Gen11

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:16 PM PST Matt Turner wrote:
> Like CHV et al., Gen11 does not support 32x32 -> 32/64-bit integer
> multiplies.
> ---
>  src/intel/common/gen_device_info.c | 4 
>  src/intel/common/gen_device_info.h | 1 +
>  src/intel/compiler/brw_fs.cpp  | 6 +-
>  3 files changed, 6 insertions(+), 5 deletions(-)

That's too bad.  Correct, though.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/17] intel/compiler/fs: Fix application of cmod and saturate to LINE/MAC pair

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:15 PM PST Matt Turner wrote:
> ---
>  src/intel/compiler/brw_fs_generator.cpp | 17 +
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 0854709b272..f2bdac7d731 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -673,6 +673,7 @@ fs_generator::generate_linterp(fs_inst *inst,
> struct brw_reg delta_x = src[0];
> struct brw_reg delta_y = offset(src[0], inst->exec_size / 8);
> struct brw_reg interp = src[1];
> +   brw_inst *i[2];
>  
> if (devinfo->gen >= 11) {
>struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_NF);
> @@ -727,11 +728,19 @@ fs_generator::generate_linterp(fs_inst *inst,
>  
>return false;
> } else {
> -  brw_LINE(p, brw_null_reg(), interp, delta_x);
> -  brw_MAC(p, dst, suboffset(interp, 1), delta_y);
> -
> -  return true;
> +  i[0] = brw_LINE(p, brw_null_reg(), interp, delta_x);
> +  i[1] = brw_MAC(p, dst, suboffset(interp, 1), delta_y);
> }
> +
> +   brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod);
> +
> +   /* brw_set_default_saturate() is called before emitting instructions, so 
> the
> +* saturate bit is set in each instruction, so we need to unset it on the
> +* first instruction.
> +*/
> +   brw_inst_set_saturate(p->devinfo, i[0], false);
> +
> +   return true;
>  }
>  
>  void
> 

I really need to keep reading.  The next patch fixes the thing I was
complaining about.  Sorry about that.  With this ordering, patch 7
will break LINE/MAC and patch 8 will fix it...so at least that much
needs changing.

I'm fine with handling this explicitly, so it's:
Reviewed-by: Kenneth Graunke 

but we could also just...not...if we drop the
multiple_instructions_emitted stuff.  Not sure which is better.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/17] intel/compiler: Lower flrp32 on Gen11+

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:20 PM PST Matt Turner wrote:
> The LRP instruction is no more.
> ---
>  src/intel/compiler/brw_compiler.c   | 35 
> +
>  src/intel/compiler/brw_fs_builder.h |  2 +-
>  src/intel/compiler/brw_fs_generator.cpp |  2 +-
>  src/intel/compiler/brw_vec4_builder.h   |  2 +-
>  src/intel/compiler/brw_vec4_visitor.cpp |  2 +-
>  5 files changed, 26 insertions(+), 17 deletions(-)

The documentation suggests emulating it with MAD and NF, similar to PLN,
but I think that's only necessary if we need additional precision, which
GL doesn't require.  It looks like that would be 4 MADs in SIMD16 vs.
add/mul/neg, so I think lowering is the right call here.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/17] intel/compiler/fs: Implement FS_OPCODE_LINTERP with MADs on Gen11+

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:13 PM PST Matt Turner wrote:
> The PLN instruction is no more. Its functionality is now implemented
> using two MAD instructions with the new native-float type. Instead of
> 
>pln(16) r20.0<1>:F r10.4<0;1,0>:F r4.0<8;8,1>:F
> 
> we now have
> 
>mad(8) acc0<1>:NF r10.7<0;1,0>:F r4.0<8;8,1>:F r10.4<0;1,0>:F
>mad(8) r20.0<1>:F acc0<8;8,1>:NF r5.0<8;8,1>:F r10.5<0;1,0>:F
>mad(8) acc0<1>:NF r10.7<0;1,0>:F r6.0<8;8,1>:F r10.4<0;1,0>:F
>mad(8) r21.0<1>:F acc0<8;8,1>:NF r7.0<8;8,1>:F r10.5<0;1,0>:F
> 
> ... and in the case of SIMD8 only the first pair of MAD instructions is
> used.
> ---
>  src/intel/compiler/brw_eu_emit.c|  2 +-
>  src/intel/compiler/brw_fs_generator.cpp | 49 
> +++--
>  2 files changed, 48 insertions(+), 3 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index ec871e5aa75..a96fe43556e 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -968,7 +968,7 @@ ALU2(DP4)
>  ALU2(DPH)
>  ALU2(DP3)
>  ALU2(DP2)
> -ALU3F(MAD)
> +ALU3(MAD)
>  ALU3F(LRP)
>  ALU1(BFREV)
>  ALU3(BFE)
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index cd5be054f69..54869bc3ebc 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -674,8 +674,53 @@ fs_generator::generate_linterp(fs_inst *inst,
> struct brw_reg delta_y = offset(src[0], inst->exec_size / 8);
> struct brw_reg interp = src[1];
>  
> -   if (devinfo->has_pln &&
> -   (devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) {
> +   if (devinfo->gen >= 11) {
> +  struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_NF);
> +  struct brw_reg dwP = suboffset(interp, 0);
> +  struct brw_reg dwQ = suboffset(interp, 1);
> +  struct brw_reg dwR = suboffset(interp, 3);
> +
> +  brw_set_default_access_mode(p, BRW_ALIGN_1);
> +  brw_set_default_exec_size(p, BRW_EXECUTE_8);
> +
> +  if (inst->exec_size == 8) {
> + brw_inst *i[2];
> +
> + i[0] = brw_MAD(p,acc, dwR, offset(delta_x, 0), dwP);
> + i[1] = brw_MAD(p, offset(dst, 0), acc, offset(delta_y, 0), dwQ);
> +
> + brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod);
> +
> + /* brw_set_default_saturate() is called before emitting 
> instructions,
> +  * so the saturate bit is set in each instruction, so we need to 
> unset
> +  * it on the first instruction of each pair.
> +  */
> + brw_inst_set_saturate(p->devinfo, i[0], false);
> +  } else {
> + brw_inst *i[4];
> +
> + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
> + i[0] = brw_MAD(p,acc, dwR, offset(delta_x, 0), dwP);
> + i[1] = brw_MAD(p, offset(dst, 0), acc, offset(delta_x, 1), dwQ);
> +
> + brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
> + i[2] = brw_MAD(p,acc, dwR, offset(delta_y, 0), dwP);
> + i[3] = brw_MAD(p, offset(dst, 1), acc, offset(delta_y, 1), dwQ);
> +
> + brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
> +
> + brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod);
> + brw_inst_set_cond_modifier(p->devinfo, i[3], inst->conditional_mod);
> +
> + /* brw_set_default_saturate() is called before emitting 
> instructions,
> +  * so the saturate bit is set in each instruction, so we need to 
> unset
> +  * it on the first instruction of each pair.
> +  */
> + brw_inst_set_saturate(p->devinfo, i[0], false);
> + brw_inst_set_saturate(p->devinfo, i[2], false);
> +  }
> +   } else if (devinfo->has_pln &&
> +  (devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) {
>brw_PLN(p, dst, interp, delta_x);
> } else {
>brw_LINE(p, brw_null_reg(), interp, delta_x);

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/17] intel/compiler/fs: Return multiple_instructions_emitted from generate_linterp

2018-02-23 Thread Kenneth Graunke
On Friday, February 23, 2018 3:51:37 PM PST Kenneth Graunke wrote:
> On Tuesday, February 20, 2018 9:15:14 PM PST Matt Turner wrote:
> > If multiple instructions are emitted, special handling of things like
> > conditional mod, saturate, and NoDDClr/NoDDChk need to be performed.
> > 
> > I noticed that conditional mods were misapplied when adding support for
> > Gen11 (in the previous patch). The next patch fixes the same bug in the
> > Gen4 LINE/MAC case, though I was not able to trigger it.
> > ---
> >  src/intel/compiler/brw_fs.h |  2 +-
> >  src/intel/compiler/brw_fs_generator.cpp | 12 +---
> >  2 files changed, 10 insertions(+), 4 deletions(-)
> 
> Ugh...I noticed a couple things:
> 
> 1. Nothing has set multiple_instructions_emitted since May 2016
>(commit 95272f5c7e6914fe8a85a4e37e07f1e8e3634446), which means
>that block of code has been dead for a long time.
> 
> 2. Nothing in the FS backend sets inst->no_dd_* either.  It looks like
>it was used for general interpolation until July 2016 (commit
>1eef0b73aa323d94d5a080cd1efa81ccacdbd0d2) and for the unlit centroid
>workaround until August 2016 (commit 875341c69b99dea7942a68c9060aa3).
>So, that's also basically dead.
> 
> 3. You mention saturate, but we don't handle that specially for multiple
>instructions.
> 
> 4. You didn't handle conditional modifiers in generate_linterp, so...
>if conditional LINTERP is a thing, it's going to be broken.  That
>said, I'm pretty sure it isn't a thing.

Sorry, this isn't quite true.  For Gen11+, you handle conditional mod
specially, because you need to put it on multiple instructions - one of
which is the last.  So, without multiple_instructions_emitted flagged,
your code would set it...and this code would set it again.  Harmless.

However, it looks like you haven't extended generate_linterp() to handle
conditional mod in the older LINE/MAC case.  So, with this patch, those
won't get conditions at all.  Without this patch, we would set it on the
final MAC...which I think is what we want.

So, I think this is unnecessary for Gen11+ and harmful for old HW.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/22] nir: Replace fmin(b2f(a), b) with a bcsel

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

All of the affected shaders are HDR mappers from Serious Sam 3.

All Gen7+ platforms had similar results. (Skylake shown)
total instructions in shared programs: 14516285 -> 14516273 (<.01%)
instructions in affected programs: 348 -> 336 (-3.45%)
helped: 12
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 2.08% max: 6.67% x̄: 4.31% x̃: 4.17%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -5.55% -3.06%
Instructions are helped.

total cycles in shared programs: 533163876 -> 533163808 (<.01%)
cycles in affected programs: 1144 -> 1076 (-5.94%)
helped: 4
HURT: 0
helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17
helped stats (rel) min: 5.80% max: 6.08% x̄: 5.94% x̃: 5.94%
95% mean confidence interval for cycles value: -18.84 -15.16
95% mean confidence interval for cycles %-change: -6.20% -5.68%
Cycles are helped.

Sandy Bridge
total instructions in shared programs: 10533321 -> 10533309 (<.01%)
instructions in affected programs: 372 -> 360 (-3.23%)
helped: 12
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 2.00% max: 5.88% x̄: 3.91% x̃: 3.85%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -4.96% -2.86%
Instructions are helped.

total cycles in shared programs: 146136632 -> 146136428 (<.01%)
cycles in affected programs: 11668 -> 11464 (-1.75%)
helped: 12
HURT: 0
helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17
helped stats (rel) min: 0.99% max: 3.44% x̄: 2.20% x̃: 2.29%
95% mean confidence interval for cycles value: -17.66 -16.34
95% mean confidence interval for cycles %-change: -2.82% -1.58%
Cycles are helped.

Iron Lake
total instructions in shared programs: 7886301 -> 7886277 (<.01%)
instructions in affected programs: 576 -> 552 (-4.17%)
helped: 12
HURT: 0
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 2.94% max: 6.06% x̄: 4.51% x̃: 4.65%
95% mean confidence interval for instructions value: -2.00 -2.00
95% mean confidence interval for instructions %-change: -5.30% -3.72%
Instructions are helped.

total cycles in shared programs: 178113176 -> 178113176 (0.00%)
cycles in affected programs: 2116 -> 2116 (0.00%)
helped: 2
HURT: 4
helped stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4
helped stats (rel) min: 1.14% max: 1.14% x̄: 1.14% x̃: 1.14%
HURT stats (abs)   min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel)   min: 0.50% max: 0.65% x̄: 0.58% x̃: 0.58%
95% mean confidence interval for cycles value: -3.25 3.25
95% mean confidence interval for cycles %-change: -0.93% 0.94%
Inconclusive result (value mean confidence interval includes 0).

GM45
total instructions in shared programs: 4857756 -> 4857744 (<.01%)
instructions in affected programs: 294 -> 282 (-4.08%)
helped: 6
HURT: 0
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 2.94% max: 5.71% x̄: 4.40% x̃: 4.55%
95% mean confidence interval for instructions value: -2.00 -2.00
95% mean confidence interval for instructions %-change: -5.71% -3.09%
Instructions are helped.

total cycles in shared programs: 122178730 -> 122178722 (<.01%)
cycles in affected programs: 700 -> 692 (-1.14%)
helped: 2
HURT: 0

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 65211e6..314b064 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -176,6 +176,15 @@ optimizations = [
(('fmin',('b2f(is_used_once)', a),   
('b2f', b)),   ('b2f', ('iand', a, b))),
(('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', 
('b2f', b))), ('fneg', ('b2f', ('iand', a, b,
 
+   # fmin(b2f(a), b)
+   # bcsel(a, fmin(b2f(a), b), fmin(b2f(a), b))
+   # bcsel(a, fmin(b2f(True), b), fmin(b2f(False), b))
+   # bcsel(a, fmin(1.0, b), fmin(0.0, b))
+   #
+   # Since b is a constant, constant folding will eliminate the fmin and the
+   # fmax.  If b is > 1.0, the bcsel will be replaced with a b2f.
+   (('fmin', ('b2f', a), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 
0.0))),
+
# ignore this opt when the result is used by a bcsel or if so we can make
# use of conditional modifiers on supported hardware.
(('flt(is_not_used_by_conditional)', ('fadd(is_used_once)', a, ('fneg', 
b)), 0.0), ('flt', a, b)),
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/22] i965/vec4: Relax writemask condition in CSE

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

If the previously seen instruction generates more fields than the new
instruction, still allow CSE to happen.  This doesn't do much, but it
also enables a couple more shaders in the next patch.  It helped quite a
bit in another change series that I have (at least for now) abandoned.

No changes on Skylake, Broadwell, Iron Lake or GM45.

Ivy Bridge and Haswell had similar results. (Ivy Bridge shown)
total instructions in shared programs: 11780295 -> 11780294 (<.01%)
instructions in affected programs: 302 -> 301 (-0.33%)
helped: 1
HURT: 0

total cycles in shared programs: 257308315 -> 257308313 (<.01%)
cycles in affected programs: 2074 -> 2072 (-0.10%)
helped: 1
HURT: 0

Sandy Bridge
total instructions in shared programs: 10506687 -> 10506686 (<.01%)
instructions in affected programs: 335 -> 334 (-0.30%)
helped: 1
HURT: 0

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_vec4_cse.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_vec4_cse.cpp 
b/src/intel/compiler/brw_vec4_cse.cpp
index 2e65ef7..aa2f127 100644
--- a/src/intel/compiler/brw_vec4_cse.cpp
+++ b/src/intel/compiler/brw_vec4_cse.cpp
@@ -127,7 +127,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
   a->base_mrf == b->base_mrf &&
   a->header_size == b->header_size &&
   a->shadow_compare == b->shadow_compare &&
-  a->dst.writemask == b->dst.writemask &&
+  ((a->dst.writemask & b->dst.writemask) == a->dst.writemask) &&
   a->force_writemask_all == b->force_writemask_all &&
   a->size_written == b->size_written &&
   a->exec_size == b->exec_size &&
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/22] i965/fs: Merge CMP and SEL into CSEL on Gen8+

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Skylake
total instructions in shared programs: 14514547 -> 14503025 (-0.08%)
instructions in affected programs: 2008312 -> 1996790 (-0.57%)
helped: 5816
HURT: 0
helped stats (abs) min: 1 max: 27 x̄: 1.98 x̃: 1
helped stats (rel) min: 0.03% max: 6.34% x̄: 0.90% x̃: 0.69%
95% mean confidence interval for instructions value: -2.04 -1.92
95% mean confidence interval for instructions %-change: -0.92% -0.88%
Instructions are helped.

total cycles in shared programs: 533136780 -> 532150352 (-0.19%)
cycles in affected programs: 347942196 -> 346955768 (-0.28%)
helped: 4796
HURT: 767
helped stats (abs) min: 1 max: 32000 x̄: 223.04 x̃: 10
helped stats (rel) min: <.01% max: 24.02% x̄: 2.72% x̃: 0.74%
HURT stats (abs)   min: 1 max: 5252 x̄: 108.59 x̃: 21
HURT stats (rel)   min: 0.01% max: 89.43% x̄: 2.50% x̃: 0.55%
95% mean confidence interval for cycles value: -233.64 -120.99
95% mean confidence interval for cycles %-change: -2.13% -1.87%
Cycles are helped.

LOST:   0
GAINED: 1

Broadwell
total instructions in shared programs: 14808028 -> 14796582 (-0.08%)
instructions in affected programs: 2341840 -> 2330394 (-0.49%)
helped: 5802
HURT: 0
helped stats (abs) min: 1 max: 27 x̄: 1.97 x̃: 1
helped stats (rel) min: 0.03% max: 6.25% x̄: 0.90% x̃: 0.69%
95% mean confidence interval for instructions value: -2.03 -1.92
95% mean confidence interval for instructions %-change: -0.92% -0.88%
Instructions are helped.

total cycles in shared programs: 559431004 -> 558393842 (-0.19%)
cycles in affected programs: 368801897 -> 367764735 (-0.28%)
helped: 4695
HURT: 828
helped stats (abs) min: 1 max: 32000 x̄: 230.83 x̃: 10
helped stats (rel) min: <.01% max: 23.60% x̄: 2.61% x̃: 0.62%
HURT stats (abs)   min: 1 max: 2538 x̄: 56.25 x̃: 14
HURT stats (rel)   min: <.01% max: 96.72% x̄: 2.88% x̃: 0.85%
95% mean confidence interval for cycles value: -244.48 -131.10
95% mean confidence interval for cycles %-change: -1.92% -1.65%
Cycles are helped.

No changes on earlier platforms.

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_fs.cpp | 85 +++
 src/intel/compiler/brw_fs.h   |  1 +
 2 files changed, 86 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index accae1b..1192436 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2813,6 +2813,85 @@ mask_relative_to(const fs_reg &r, const fs_reg &s, 
unsigned ds)
 }
 
 bool
+fs_visitor::opt_peephole_csel()
+{
+   if (devinfo->gen < 8)
+  return false;
+
+   bool progress = false;
+
+   foreach_block_reverse(block, cfg) {
+  int ip = block->end_ip + 1;
+
+  foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
+ ip--;
+
+ if (inst->opcode != BRW_OPCODE_SEL ||
+ inst->predicate == BRW_PREDICATE_NONE ||
+ (inst->dst.type != BRW_REGISTER_TYPE_F &&
+  inst->dst.type != BRW_REGISTER_TYPE_D &&
+  inst->dst.type != BRW_REGISTER_TYPE_UD))
+continue;
+
+ /* Because it is a 3-src instruction, CSEL cannot have any immediate
+  * values as sources.
+  */
+ if (inst->src[0].file != VGRF ||
+ (inst->src[1].file != VGRF && !inst->src[1].is_zero()))
+continue;
+
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) 
{
+if (!scan_inst->flags_written())
+   continue;
+
+if ((scan_inst->opcode != BRW_OPCODE_CMP &&
+ scan_inst->opcode != BRW_OPCODE_MOV) ||
+scan_inst->predicate != BRW_PREDICATE_NONE ||
+scan_inst->src[0].file != VGRF ||
+scan_inst->src[0].type != BRW_REGISTER_TYPE_F)
+   break;
+
+if (scan_inst->opcode == BRW_OPCODE_CMP && 
!scan_inst->src[1].is_zero())
+   break;
+
+const brw::fs_builder ibld(this, block, inst);
+
+const enum brw_conditional_mod cond =
+   inst->predicate == BRW_PREDICATE_NORMAL
+   ? scan_inst->conditional_mod
+   : brw_swap_cmod(scan_inst->conditional_mod);
+
+fs_inst *csel_inst = NULL;
+
+if (inst->src[1].file == VGRF) {
+   csel_inst = ibld.CSEL(inst->dst,
+ inst->src[0],
+ inst->src[1],
+ scan_inst->src[0],
+ cond);
+} else if (cond == BRW_CONDITIONAL_NZ) {
+   csel_inst = ibld.CSEL(inst->dst,
+ inst->src[0],
+ scan_inst->src[0],
+ scan_inst->src[0],
+ cond);
+
+   /* This ensures that we get 0.0 and not -0.0. */
+   csel_inst->src[1].abs = true;
+}
+
+if (csel_inst != NULL)
+   

[Mesa-dev] [PATCH 08/22] isl: Silence unused parameter warnings in __gen_combine_address implementations

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 1808 warnings to 1772 warnings by silencing 36
instances of things like

../../SOURCE/master/src/intel/isl/isl_emit_depth_stencil.c: In function 
‘__gen_combine_address’:
../../SOURCE/master/src/intel/isl/isl_emit_depth_stencil.c:30:29: warning: 
unused parameter ‘data’ [-Wunused-parameter]
 __gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
 ^~~~
../../SOURCE/master/src/intel/isl/isl_emit_depth_stencil.c:30:41: warning: 
unused parameter ‘loc’ [-Wunused-parameter]
 __gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
 ^~~

Signed-off-by: Ian Romanick 
---
 src/intel/isl/isl_emit_depth_stencil.c | 4 +++-
 src/intel/isl/isl_surface_state.c  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/intel/isl/isl_emit_depth_stencil.c 
b/src/intel/isl/isl_emit_depth_stencil.c
index 90ce889..51b3f00 100644
--- a/src/intel/isl/isl_emit_depth_stencil.c
+++ b/src/intel/isl/isl_emit_depth_stencil.c
@@ -27,7 +27,9 @@
 #define __gen_user_data void
 
 static uint64_t
-__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
+__gen_combine_address(__attribute__((unused)) void *data,
+  __attribute__((unused)) void *loc, uint64_t addr,
+  uint32_t delta)
 {
return addr + delta;
 }
diff --git a/src/intel/isl/isl_surface_state.c 
b/src/intel/isl/isl_surface_state.c
index bfb27fa..b93f428 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -27,7 +27,9 @@
 #define __gen_user_data void
 
 static uint64_t
-__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
+__gen_combine_address(__attribute__((unused)) void *data,
+  __attribute__((unused)) void *loc, uint64_t addr,
+  uint32_t delta)
 {
return addr + delta;
 }
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/22] nir: Replace an odd comparison involving fmin of -b2f

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

I noticed the fge version while looking at a shader for an unrelated
reason.  The feq version prevents a regression in a later change that
performs strength reduction of some compares.

Broadwell and Skylake had similar results. (Skylake shown)
total instructions in shared programs: 14514808 -> 14514796 (<.01%)
instructions in affected programs: 750 -> 738 (-1.60%)
helped: 4
HURT: 0
helped stats (abs) min: 1 max: 5 x̄: 3.00 x̃: 3
helped stats (rel) min: 0.83% max: 1.96% x̄: 1.40% x̃: 1.40%
95% mean confidence interval for instructions value: -6.67 0.67
95% mean confidence interval for instructions %-change: -2.43% -0.36%
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 533144939 -> 533144853 (<.01%)
cycles in affected programs: 8911 -> 8825 (-0.97%)
helped: 4
HURT: 0
helped stats (abs) min: 16 max: 32 x̄: 21.50 x̃: 19
helped stats (rel) min: 0.60% max: 1.89% x̄: 1.28% x̃: 1.31%
95% mean confidence interval for cycles value: -32.94 -10.06
95% mean confidence interval for cycles %-change: -2.30% -0.26%
Cycles are helped.

Haswell
total instructions in shared programs: 13093785 -> 13093775 (<.01%)
instructions in affected programs: 924 -> 914 (-1.08%)
helped: 4
HURT: 2
helped stats (abs) min: 1 max: 5 x̄: 3.00 x̃: 3
helped stats (rel) min: 0.82% max: 1.95% x̄: 1.39% x̃: 1.39%
HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 1.19% max: 1.19% x̄: 1.19% x̃: 1.19%
95% mean confidence interval for instructions value: -4.53 1.20
95% mean confidence interval for instructions %-change: -2.02% 0.97%
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 409580553 -> 409580118 (<.01%)
cycles in affected programs: 10909 -> 10474 (-3.99%)
helped: 5
HURT: 1
helped stats (abs) min: 6 max: 222 x̄: 89.60 x̃: 18
helped stats (rel) min: 0.16% max: 24.72% x̄: 9.54% x̃: 1.78%
HURT stats (abs)   min: 13 max: 13 x̄: 13.00 x̃: 13
HURT stats (rel)   min: 0.39% max: 0.39% x̄: 0.39% x̃: 0.39%
95% mean confidence interval for cycles value: -180.68 35.68
95% mean confidence interval for cycles %-change: -19.55% 3.79%
Inconclusive result (value mean confidence interval includes 0).

Ivy Bridge
total instructions in shared programs: 11811851 -> 11811840 (<.01%)
instructions in affected programs: 1032 -> 1021 (-1.07%)
helped: 5
HURT: 1
helped stats (abs) min: 1 max: 5 x̄: 2.40 x̃: 1
helped stats (rel) min: 0.63% max: 1.95% x̄: 1.13% x̃: 0.97%
HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 1.19% max: 1.19% x̄: 1.19% x̃: 1.19%
95% mean confidence interval for instructions value: -4.17 0.51
95% mean confidence interval for instructions %-change: -1.86% 0.36%
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 257618403 -> 257618168 (<.01%)
cycles in affected programs: 10784 -> 10549 (-2.18%)
helped: 4
HURT: 2
helped stats (abs) min: 4 max: 220 x̄: 64.50 x̃: 17
helped stats (rel) min: 0.50% max: 24.34% x̄: 7.07% x̃: 1.72%
HURT stats (abs)   min: 9 max: 14 x̄: 11.50 x̃: 11
HURT stats (rel)   min: 0.24% max: 0.42% x̄: 0.33% x̃: 0.33%
95% mean confidence interval for cycles value: -133.11 54.78
95% mean confidence interval for cycles %-change: -14.79% 5.59%
Inconclusive result (value mean confidence interval includes 0).

GM45, Iron Lake, and Sandy Bridge had similar results. (Sandy Bridge shown)
total instructions in shared programs: 10533871 -> 10533859 (<.01%)
instructions in affected programs: 865 -> 853 (-1.39%)
helped: 4
HURT: 0
helped stats (abs) min: 1 max: 5 x̄: 3.00 x̃: 3
helped stats (rel) min: 0.63% max: 1.83% x̄: 1.22% x̃: 1.21%
95% mean confidence interval for instructions value: -6.67 0.67
95% mean confidence interval for instructions %-change: -2.16% -0.29%
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 146139904 -> 146139852 (<.01%)
cycles in affected programs: 15213 -> 15161 (-0.34%)
helped: 4
HURT: 0
helped stats (abs) min: 3 max: 18 x̄: 13.00 x̃: 15
helped stats (rel) min: 0.15% max: 0.84% x̄: 0.39% x̃: 0.29%
95% mean confidence interval for cycles value: -23.79 -2.21
95% mean confidence interval for cycles %-change: -0.88% 0.09%
Inconclusive result (%-change mean confidence interval includes 0).

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index f5f9e94..0069516 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -152,6 +152,19 @@ optimizations = [
 
(('fge', ('fneg', ('b2f', a)), 0.0), ('inot', a)),
 
+   # fmin(-b2f(a), b) >= 0.0
+   # -b2f(a) >= 0.0 && b >= 0.0
+   # -b2f(a) == 0.0 && b >= 0.0-b2f can only be 0 or -1, never >0
+   # b2f(a) == 0.0 && b >= 0.0
+   # a == False && b >= 0.0
+   # !a && b >= 0.0
+   #
+   # The fge in the second r

[Mesa-dev] [PATCH 21/22] i965/vec4: Allow CSE on subset VF constant loads

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

No changes on other platforms.

Haswell, Ivy Bridge, and Sandy Bridge had similar results. (Haswell shown)
total instructions in shared programs: 13059891 -> 13059884 (<.01%)
instructions in affected programs: 431 -> 424 (-1.62%)
helped: 7
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 1.19% max: 5.26% x̄: 2.05% x̃: 1.49%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -3.39% -0.71%
Instructions are helped.

total cycles in shared programs: 409260032 -> 409260018 (<.01%)
cycles in affected programs: 4228 -> 4214 (-0.33%)
helped: 7
HURT: 0
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 0.28% max: 2.04% x̄: 0.54% x̃: 0.28%
95% mean confidence interval for cycles value: -2.00 -2.00
95% mean confidence interval for cycles %-change: -1.15% 0.07%

Inconclusive result (%-change mean confidence interval includes 0).

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_vec4_cse.cpp | 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/intel/compiler/brw_vec4_cse.cpp 
b/src/intel/compiler/brw_vec4_cse.cpp
index aa2f127..3302939 100644
--- a/src/intel/compiler/brw_vec4_cse.cpp
+++ b/src/intel/compiler/brw_vec4_cse.cpp
@@ -104,6 +104,27 @@ operands_match(const vec4_instruction *a, const 
vec4_instruction *b)
   return xs[0].equals(ys[0]) &&
  ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
   (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
+   } else if (a->opcode == BRW_OPCODE_MOV &&
+  xs[0].file == IMM &&
+  xs[0].type == BRW_REGISTER_TYPE_VF) {
+  src_reg tmp_x = xs[0];
+  src_reg tmp_y = ys[0];
+
+  /* Smash out the values that are not part of the writemask.  Otherwise
+   * the equals and negative_equals operators will fail due to mismatches
+   * in unused components.
+   */
+  static const uint32_t mask[] = {
+ 0x, 0x00ff, 0xff00, 0x,
+ 0x00ff, 0x00ff00ff, 0x0000, 0x00ff,
+ 0xff00, 0xffff, 0xff00ff00, 0xff00,
+ 0x, 0x00ff, 0xff00, 0x
+  };
+
+  tmp_x.ud &= mask[a->dst.writemask & b->dst.writemask];
+  tmp_y.ud &= mask[a->dst.writemask & b->dst.writemask];
+
+  return tmp_x.equals(tmp_y);
} else if (!a->is_commutative()) {
   return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
} else {
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/22] nir: Pull b2f out of bcsel

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

All platforms had similar results. (Skylake shown)
total instructions in shared programs: 14516592 -> 14516586 (<.01%)
instructions in affected programs: 500 -> 494 (-1.20%)
helped: 2
HURT: 0

total cycles in shared programs: 533167044 -> 533166998 (<.01%)
cycles in affected programs: 6988 -> 6942 (-0.66%)
helped: 2
HURT: 0

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 0069516..65211e6 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -372,6 +372,7 @@ optimizations = [
(('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a,
(('bcsel', True, b, c), b),
(('bcsel', False, b, c), c),
+   (('bcsel', a, ('b2f(is_used_once)', b), ('b2f', c)), ('b2f', ('bcsel', a, 
b, c))),
# The result of this should be hit by constant propagation and, in the
# next round of opt_algebraic, get picked up by one of the above two.
(('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/22] i965/fs: Add infrastructure for generating CSEL instructions.

2018-02-23 Thread Ian Romanick
From: Kenneth Graunke 

v2 (idr): Don't allow CSEL with a non-float src2.

v3 (idr): Add CSEL to fs_inst::flags_written.  Suggested by Matt.

Signed-off-by: Kenneth Graunke 
Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_eu.h |  1 +
 src/intel/compiler/brw_eu_emit.c|  1 +
 src/intel/compiler/brw_fs.cpp   |  1 +
 src/intel/compiler/brw_fs_builder.h | 22 +-
 src/intel/compiler/brw_fs_generator.cpp |  5 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 2d0f56f..3201624 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -171,6 +171,7 @@ ALU2(SHR)
 ALU2(SHL)
 ALU1(DIM)
 ALU2(ASR)
+ALU3(CSEL)
 ALU1(F32TO16)
 ALU1(F16TO32)
 ALU2(ADD)
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index c25d8d6..305a759 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -953,6 +953,7 @@ ALU2(SHR)
 ALU2(SHL)
 ALU1(DIM)
 ALU2(ASR)
+ALU3(CSEL)
 ALU1(FRC)
 ALU1(RNDD)
 ALU2(MAC)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index bed632d..accae1b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -929,6 +929,7 @@ unsigned
 fs_inst::flags_written() const
 {
if ((conditional_mod && (opcode != BRW_OPCODE_SEL &&
+opcode != BRW_OPCODE_CSEL &&
 opcode != BRW_OPCODE_IF &&
 opcode != BRW_OPCODE_WHILE)) ||
opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
diff --git a/src/intel/compiler/brw_fs_builder.h 
b/src/intel/compiler/brw_fs_builder.h
index 87394bc..47eac42 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -458,7 +458,6 @@ namespace brw {
   ALU1(BFREV)
   ALU1(CBIT)
   ALU2(CMPN)
-  ALU3(CSEL)
   ALU1(DIM)
   ALU2(DP2)
   ALU2(DP3)
@@ -534,6 +533,27 @@ namespace brw {
   }
 
   /**
+   * CSEL: dst = src2  0.0f ? src0 : src1
+   */
+  instruction *
+  CSEL(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+   const src_reg &src2, brw_conditional_mod condition) const
+  {
+ /* CSEL only operates on floats, so we can't do integer =/>
+  * comparisons.  Zero/non-zero (== and !=) comparisons almost work.
+  * 0x8000 fails because it is -0.0, and -0.0 == 0.0.
+  */
+ assert(src2.type == BRW_REGISTER_TYPE_F);
+
+ return set_condmod(condition,
+emit(BRW_OPCODE_CSEL,
+ retype(dst, BRW_REGISTER_TYPE_F),
+ retype(src0, BRW_REGISTER_TYPE_F),
+ retype(src1, BRW_REGISTER_TYPE_F),
+ src2));
+  }
+
+  /**
* Emit a linear interpolation instruction.
*/
   instruction *
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index cd5be05..9157367 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1826,6 +1826,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
   case BRW_OPCODE_SEL:
 brw_SEL(p, dst, src[0], src[1]);
 break;
+  case BRW_OPCODE_CSEL:
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_CSEL(p, dst, src[0], src[1], src[2]);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ break;
   case BRW_OPCODE_BFREV:
  assert(devinfo->gen >= 7);
  brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/22] genxml: Silence unused parameter warnings in generated pack code

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 1960 warnings to 1808 warnings by silencing 152
instances of things like

In file included from ../../SOURCE/master/src/intel/genxml/genX_pack.h:32:0,
 from 
../../SOURCE/master/src/intel/isl/isl_emit_depth_stencil.c:36:
src/intel/genxml/gen4_pack.h: In function ‘__gen_uint’:
src/intel/genxml/gen4_pack.h:58:49: warning: unused parameter ‘end’ 
[-Wunused-parameter]
 __gen_uint(uint64_t v, uint32_t start, uint32_t end)
 ^~~
src/intel/genxml/gen4_pack.h: In function ‘__gen_offset’:
src/intel/genxml/gen4_pack.h:94:35: warning: unused parameter ‘start’ 
[-Wunused-parameter]
 __gen_offset(uint64_t v, uint32_t start, uint32_t end)
   ^
src/intel/genxml/gen4_pack.h:94:51: warning: unused parameter ‘end’ 
[-Wunused-parameter]
 __gen_offset(uint64_t v, uint32_t start, uint32_t end)
   ^~~
src/intel/genxml/gen4_pack.h: In function ‘__gen_ufixed’:
src/intel/genxml/gen4_pack.h:133:48: warning: unused parameter ‘end’ 
[-Wunused-parameter]
 __gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
^~~

Signed-off-by: Ian Romanick 
---
 src/intel/genxml/gen_pack_header.py | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/intel/genxml/gen_pack_header.py 
b/src/intel/genxml/gen_pack_header.py
index e6cea86..7dcada8 100644
--- a/src/intel/genxml/gen_pack_header.py
+++ b/src/intel/genxml/gen_pack_header.py
@@ -57,6 +57,12 @@ pack_header = """%(license)s
 #ifndef __gen_field_functions
 #define __gen_field_functions
 
+#ifdef NDEBUG
+#define NDEBUG_UNUSED __attribute__((unused))
+#else
+#define NDEBUG_UNUSED
+#endif
+
 union __gen_value {
float f;
uint32_t dw;
@@ -69,7 +75,7 @@ __gen_mbo(uint32_t start, uint32_t end)
 }
 
 static inline uint64_t
-__gen_uint(uint64_t v, uint32_t start, uint32_t end)
+__gen_uint(uint64_t v, uint32_t start, NDEBUG_UNUSED uint32_t end)
 {
__gen_validate_value(v);
 
@@ -105,7 +111,7 @@ __gen_sint(int64_t v, uint32_t start, uint32_t end)
 }
 
 static inline uint64_t
-__gen_offset(uint64_t v, uint32_t start, uint32_t end)
+__gen_offset(uint64_t v, NDEBUG_UNUSED uint32_t start, NDEBUG_UNUSED uint32_t 
end)
 {
__gen_validate_value(v);
 #ifndef NDEBUG
@@ -144,7 +150,7 @@ __gen_sfixed(float v, uint32_t start, uint32_t end, 
uint32_t fract_bits)
 }
 
 static inline uint64_t
-__gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
+__gen_ufixed(float v, uint32_t start, NDEBUG_UNUSED uint32_t end, uint32_t 
fract_bits)
 {
__gen_validate_value(v);
 
@@ -169,6 +175,8 @@ __gen_ufixed(float v, uint32_t start, uint32_t end, 
uint32_t fract_bits)
 #error #define __gen_combine_address before including this file
 #endif
 
+#undef NDEBUG_UNUSED
+
 #endif
 
 """
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/22] nir: Silence unused parameter warnings in generated nir_constant_expressions code

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 2075 warnings to 2023 warnings by silencing 52
instances of things like

src/compiler/nir/nir_constant_expressions.c: In function ‘evaluate_bfi’:
src/compiler/nir/nir_constant_expressions.c:1812:61: warning: unused parameter 
‘bit_size’ [-Wunused-parameter]
 evaluate_bfi(MAYBE_UNUSED unsigned num_components, unsigned bit_size,
 ^~~~

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_constant_expressions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_constant_expressions.py 
b/src/compiler/nir/nir_constant_expressions.py
index 6571d3b..ee92be5 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -389,7 +389,8 @@ struct bool32_vec {
 
 % for name, op in sorted(opcodes.iteritems()):
 static nir_const_value
-evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size,
+evaluate_${name}(MAYBE_UNUSED unsigned num_components,
+ ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned 
bit_size,
  MAYBE_UNUSED nir_const_value *_src)
 {
nir_const_value _dst_val = { {0, } };
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/22] nir: Narrow some dot product operations

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

On vector platforms, this helps elide some constant loads.

No changes on Broadwell or Skylake.

Haswell
total instructions in shared programs: 13093793 -> 13060163 (-0.26%)
instructions in affected programs: 1277532 -> 1243902 (-2.63%)
helped: 13216
HURT: 95
helped stats (abs) min: 1 max: 18 x̄: 2.56 x̃: 2
helped stats (rel) min: 0.21% max: 20.00% x̄: 3.63% x̃: 2.78%
HURT stats (abs)   min: 1 max: 6 x̄: 1.77 x̃: 1
HURT stats (rel)   min: 0.09% max: 5.56% x̄: 1.25% x̃: 1.19%
95% mean confidence interval for instructions value: -2.57 -2.49
95% mean confidence interval for instructions %-change: -3.65% -3.54%
Instructions are helped.

total cycles in shared programs: 409580819 -> 409268463 (-0.08%)
cycles in affected programs: 71730652 -> 71418296 (-0.44%)
helped: 9898
HURT: 2352
helped stats (abs) min: 2 max: 16014 x̄: 37.08 x̃: 16
helped stats (rel) min: <.01% max: 35.55% x̄: 6.26% x̃: 4.50%
HURT stats (abs)   min: 2 max: 276 x̄: 23.25 x̃: 6
HURT stats (rel)   min: <.01% max: 40.00% x̄: 3.54% x̃: 1.97%
95% mean confidence interval for cycles value: -33.19 -17.80
95% mean confidence interval for cycles %-change: -4.50% -4.26%
Cycles are helped.

total fills in shared programs: 82059 -> 82052 (<.01%)
fills in affected programs: 21 -> 14 (-33.33%)
helped: 7
HURT: 0

Sandy Bridge and Ivy Bridge had similar results (Ivy Bridge shown)
total instructions in shared programs: 11811851 -> 11780605 (-0.26%)
instructions in affected programs: 1155007 -> 1123761 (-2.71%)
helped: 12304
HURT: 95
helped stats (abs) min: 1 max: 18 x̄: 2.55 x̃: 2
helped stats (rel) min: 0.21% max: 20.00% x̄: 3.69% x̃: 2.86%
HURT stats (abs)   min: 1 max: 6 x̄: 1.77 x̃: 1
HURT stats (rel)   min: 0.09% max: 5.56% x̄: 1.25% x̃: 1.19%
95% mean confidence interval for instructions value: -2.56 -2.48
95% mean confidence interval for instructions %-change: -3.71% -3.59%
Instructions are helped.

total cycles in shared programs: 257618409 -> 257316805 (-0.12%)
cycles in affected programs: 71999580 -> 71697976 (-0.42%)
helped: 9155
HURT: 2380
helped stats (abs) min: 2 max: 16014 x̄: 38.44 x̃: 16
helped stats (rel) min: <.01% max: 35.75% x̄: 6.39% x̃: 4.62%
HURT stats (abs)   min: 2 max: 290 x̄: 21.14 x̃: 4
HURT stats (rel)   min: <.01% max: 41.55% x̄: 3.14% x̃: 1.33%
95% mean confidence interval for cycles value: -34.32 -17.97
95% mean confidence interval for cycles %-change: -4.55% -4.29%
Cycles are helped.

GM45 and Iron Lake had nearly identical results (Iron Lake shown)
total instructions in shared programs: 7886750 -> 7879944 (-0.09%)
instructions in affected programs: 373781 -> 366975 (-1.82%)
helped: 3715
HURT: 47
helped stats (abs) min: 1 max: 8 x̄: 1.86 x̃: 1
helped stats (rel) min: 0.22% max: 16.67% x̄: 2.88% x̃: 2.06%
HURT stats (abs)   min: 1 max: 6 x̄: 2.55 x̃: 2
HURT stats (rel)   min: 1.09% max: 5.00% x̄: 1.93% x̃: 2.35%
95% mean confidence interval for instructions value: -1.85 -1.77
95% mean confidence interval for instructions %-change: -2.91% -2.73%
Instructions are helped.

total cycles in shared programs: 178114636 -> 178095452 (-0.01%)
cycles in affected programs: 7227666 -> 7208482 (-0.27%)
helped: 3349
HURT: 301
helped stats (abs) min: 2 max: 90 x̄: 6.55 x̃: 4
helped stats (rel) min: <.01% max: 14.18% x̄: 0.95% x̃: 0.63%
HURT stats (abs)   min: 2 max: 42 x̄: 9.13 x̃: 10
HURT stats (rel)   min: 0.01% max: 11.19% x̄: 1.22% x̃: 1.50%
95% mean confidence interval for cycles value: -5.52 -4.99
95% mean confidence interval for cycles %-change: -0.81% -0.73%
Cycles are helped.

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 26ddf10..3366a43 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -125,6 +125,14 @@ optimizations = [
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
 
+   (('fdot4', ('vec4', a, b,   c,   1.0), d), ('fdph',  ('vec3', a, b, c), d)),
+   (('fdot4', ('vec4', a, b,   c,   0.0), d), ('fdot3', ('vec3', a, b, c), d)),
+   (('fdot4', ('vec4', a, b,   0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)),
+   (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)),
+
+   (('fdot3', ('vec3', a, b,   0.0), c), ('fdot2', ('vec2', a, b), c)),
+   (('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)),
+
# (a * #b + #c) << #d
# ((a * #b) << #d) + (#c << #d)
# (a * (#b << #d)) + (#c << #d)
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/22] i965: Silence unused parameter warnings in genX_state_upload

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 1772 warnings to 1717 warnings by silencing 55
instances of things like

../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c: In function 
‘gen4_emit_vertex_buffer_state’:
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c:313:41: 
warning: unused parameter ‘end_offset’ [-Wunused-parameter]
unsigned end_offset,
 ^~
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c: In function 
‘gen4_emit_sampler_state_pointers_xs’:
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c:4689:58: 
warning: unused parameter ‘brw’ [-Wunused-parameter]
 genX(emit_sampler_state_pointers_xs)(struct brw_context *brw,
  ^~~
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c:4690:62: 
warning: unused parameter ‘stage_state’ [-Wunused-parameter]
  struct brw_stage_state *stage_state)
  ^~~
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c: In function 
‘gen4_upload_default_color’:
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c:4730:40: 
warning: unused parameter ‘format’ [-Wunused-parameter]
mesa_format format, GLenum base_format,
^~
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c: In function 
‘translate_wrap_mode’:
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c:4906:41: 
warning: unused parameter ‘brw’ [-Wunused-parameter]
 translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
 ^~~
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c: In function 
‘gen4_update_sampler_state’:
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_state_upload.c:4972:37: 
warning: unused parameter ‘batch_offset_for_sampler_state’ [-Wunused-parameter]
uint32_t batch_offset_for_sampler_state)
 ^~

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 34 +++
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 8668abd..c597b08 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -138,7 +138,7 @@ KSP(struct brw_context *brw, uint32_t offset)
 }
 #else
 static uint32_t
-KSP(struct brw_context *brw, uint32_t offset)
+KSP(UNUSED struct brw_context *brw, uint32_t offset)
 {
return offset;
 }
@@ -310,9 +310,9 @@ genX(emit_vertex_buffer_state)(struct brw_context *brw,
unsigned buffer_nr,
struct brw_bo *bo,
unsigned start_offset,
-   unsigned end_offset,
+   MAYBE_UNUSED unsigned end_offset,
unsigned stride,
-   unsigned step_rate)
+   MAYBE_UNUSED unsigned step_rate)
 {
struct GENX(VERTEX_BUFFER_STATE) buf_state = {
   .VertexBufferIndex = buffer_nr,
@@ -4686,8 +4686,8 @@ genX(emit_mi_report_perf_count)(struct brw_context *brw,
  * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
  */
 static void
-genX(emit_sampler_state_pointers_xs)(struct brw_context *brw,
- struct brw_stage_state *stage_state)
+genX(emit_sampler_state_pointers_xs)(MAYBE_UNUSED struct brw_context *brw,
+ MAYBE_UNUSED struct brw_stage_state 
*stage_state)
 {
 #if GEN_GEN >= 7
static const uint16_t packet_headers[] = {
@@ -4727,7 +4727,7 @@ has_component(mesa_format format, int i)
 static void
 genX(upload_default_color)(struct brw_context *brw,
const struct gl_sampler_object *sampler,
-   mesa_format format, GLenum base_format,
+   MAYBE_UNUSED mesa_format format, GLenum base_format,
bool is_integer_format, bool is_stencil_sampling,
uint32_t *sdc_offset)
 {
@@ -4903,7 +4903,7 @@ genX(upload_default_color)(struct brw_context *brw,
 }
 
 static uint32_t
-translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
+translate_wrap_mode(GLenum wrap, MAYBE_UNUSED bool using_nearest)
 {
switch (wrap) {
case GL_REPEAT:
@@ -4968,8 +4968,7 @@ genX(update_sampler_state)(struct brw_context *brw,
mesa_format format, GLenum base_format,
const struct gl_texture_object *texObj,
  

[Mesa-dev] [PATCH 16/22] nir: Simplify some comparisons like a+b < a

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

All Gen7+ platforms had similar results. (Skylake shown)
total instructions in shared programs: 14514555 -> 14514547 (<.01%)
instructions in affected programs: 1972 -> 1964 (-0.41%)
helped: 8
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.39% max: 0.42% x̄: 0.41% x̃: 0.41%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -0.41% -0.40%
Instructions are helped.

total cycles in shared programs: 533141444 -> 533136780 (<.01%)
cycles in affected programs: 164728 -> 160064 (-2.83%)
helped: 181
HURT: 3
helped stats (abs) min: 2 max: 94 x̄: 26.17 x̃: 30
helped stats (rel) min: 0.12% max: 5.33% x̄: 3.42% x̃: 3.80%
HURT stats (abs)   min: 4 max: 54 x̄: 24.00 x̃: 14
HURT stats (rel)   min: 0.20% max: 2.39% x̄: 1.09% x̃: 0.68%
95% mean confidence interval for cycles value: -27.12 -23.58
95% mean confidence interval for cycles %-change: -3.54% -3.16%
Cycles are helped.

Sandy Bridge
total instructions in shared programs: 10533667 -> 10533539 (<.01%)
instructions in affected programs: 10148 -> 10020 (-1.26%)
helped: 124
HURT: 0
helped stats (abs) min: 1 max: 2 x̄: 1.03 x̃: 1
helped stats (rel) min: 0.39% max: 4.35% x̄: 2.20% x̃: 2.04%
95% mean confidence interval for instructions value: -1.06 -1.00
95% mean confidence interval for instructions %-change: -2.46% -1.95%
Instructions are helped.

total cycles in shared programs: 146136887 -> 146132122 (<.01%)
cycles in affected programs: 206382 -> 201617 (-2.31%)
helped: 171
HURT: 0
helped stats (abs) min: 2 max: 40 x̄: 27.87 x̃: 30
helped stats (rel) min: 0.08% max: 5.73% x̄: 2.98% x̃: 2.67%
95% mean confidence interval for cycles value: -29.19 -26.54
95% mean confidence interval for cycles %-change: -3.20% -2.76%
Cycles are helped.

Iron Lake
total instructions in shared programs: 7886515 -> 7886507 (<.01%)
instructions in affected programs: 3016 -> 3008 (-0.27%)
helped: 8
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.25% max: 0.28% x̄: 0.27% x̃: 0.27%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -0.27% -0.26%
Instructions are helped.

total cycles in shared programs: 178100396 -> 178100388 (<.01%)
cycles in affected programs: 156128 -> 156120 (<.01%)
helped: 4
HURT: 4
helped stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4
helped stats (rel) min: 0.02% max: 0.04% x̄: 0.03% x̃: 0.03%
HURT stats (abs)   min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel)   min: <.01% max: 0.01% x̄: <.01% x̃: <.01%
95% mean confidence interval for cycles value: -3.68 1.68
95% mean confidence interval for cycles %-change: -0.03% <.01%
Inconclusive result (value mean confidence interval includes 0).

GM45
total instructions in shared programs: 4857872 -> 4857868 (<.01%)
instructions in affected programs: 1544 -> 1540 (-0.26%)
helped: 4
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.25% max: 0.27% x̄: 0.26% x̃: 0.26%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -0.28% -0.24%
Instructions are helped.

total cycles in shared programs: 122167654 -> 122167662 (<.01%)
cycles in affected programs: 96248 -> 96256 (<.01%)
helped: 0
HURT: 4
HURT stats (abs)   min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel)   min: <.01% max: 0.01% x̄: <.01% x̃: <.01%
95% mean confidence interval for cycles value: 2.00 2.00
95% mean confidence interval for cycles %-change: <.01% 0.02%
Cycles are HURT.

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index af3ad2c..26ddf10 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -153,6 +153,15 @@ optimizations = [
 
(('fge', ('fneg', ('b2f', a)), 0.0), ('inot', a)),
 
+   (('~flt', ('fadd', a, b), a), ('flt', b, 0.0)),
+   (('~fge', ('fadd', a, b), a), ('fge', b, 0.0)),
+   (('~feq', ('fadd', a, b), a), ('feq', b, 0.0)),
+   (('~fne', ('fadd', a, b), a), ('fne', b, 0.0)),
+
+   # Cannot remove the addition from ilt or ige due to overflow.
+   (('ieq', ('iadd', a, b), a), ('ieq', b, 0)),
+   (('ine', ('iadd', a, b), a), ('ine', b, 0)),
+
# fmin(-b2f(a), b) >= 0.0
# -b2f(a) >= 0.0 && b >= 0.0
# -b2f(a) == 0.0 && b >= 0.0-b2f can only be 0 or -1, never >0
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/22] mesa: Silence unused parameter warnings from TEXSTORE_PARAMS

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 1717 warnings to 1547 warnings by silencing 170
instances of things like

In file included from ../../SOURCE/master/src/mesa/main/texcompress_bptc.h:30:0,
 from ../../SOURCE/master/src/mesa/main/texcompress_bptc.c:31:
../../SOURCE/master/src/mesa/main/texcompress_bptc.c: In function 
‘_mesa_texstore_bptc_rgba_unorm’:
../../SOURCE/master/src/mesa/main/texstore.h:60:14: warning: unused parameter 
‘dstFormat’ [-Wunused-parameter]
  mesa_format dstFormat, \
  ^
../../SOURCE/master/src/mesa/main/texcompress_bptc.c:1276:32: note: in 
expansion of macro ‘TEXSTORE_PARAMS’
 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
^~~

Signed-off-by: Ian Romanick 
---
 src/mesa/main/texcompress_etc.c | 22 +++---
 src/mesa/main/texstore.h| 18 --
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index d465010..faeaae9 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -105,7 +105,7 @@ static const int etc2_modifier_tables_non_opaque[8][4] = {
 #undef UINT8_TYPE
 
 GLboolean
-_mesa_texstore_etc1_rgb8(TEXSTORE_PARAMS)
+_mesa_texstore_etc1_rgb8(UNUSED_TEXSTORE_PARAMS)
 {
/* GL_ETC1_RGB8_OES is only valid in glCompressedTexImage2D */
assert(0);
@@ -1097,7 +1097,7 @@ etc2_unpack_srgb8_punchthrough_alpha1(uint8_t *dst_row,
 /* ETC2 texture formats are valid in glCompressedTexImage2D and
  * glCompressedTexSubImage2D functions */
 GLboolean
-_mesa_texstore_etc2_rgb8(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_rgb8(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1105,7 +1105,7 @@ _mesa_texstore_etc2_rgb8(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_srgb8(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_srgb8(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1113,7 +1113,7 @@ _mesa_texstore_etc2_srgb8(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_rgba8_eac(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_rgba8_eac(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1121,7 +1121,7 @@ _mesa_texstore_etc2_rgba8_eac(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_srgb8_alpha8_eac(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_srgb8_alpha8_eac(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1129,7 +1129,7 @@ _mesa_texstore_etc2_srgb8_alpha8_eac(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_r11_eac(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_r11_eac(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1137,7 +1137,7 @@ _mesa_texstore_etc2_r11_eac(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_signed_r11_eac(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_signed_r11_eac(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1145,7 +1145,7 @@ _mesa_texstore_etc2_signed_r11_eac(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_rg11_eac(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_rg11_eac(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1153,7 +1153,7 @@ _mesa_texstore_etc2_rg11_eac(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_signed_rg11_eac(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_signed_rg11_eac(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1161,7 +1161,7 @@ _mesa_texstore_etc2_signed_rg11_eac(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_rgb8_punchthrough_alpha1(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_rgb8_punchthrough_alpha1(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
@@ -1169,7 +1169,7 @@ 
_mesa_texstore_etc2_rgb8_punchthrough_alpha1(TEXSTORE_PARAMS)
 }
 
 GLboolean
-_mesa_texstore_etc2_srgb8_punchthrough_alpha1(TEXSTORE_PARAMS)
+_mesa_texstore_etc2_srgb8_punchthrough_alpha1(UNUSED_TEXSTORE_PARAMS)
 {
assert(0);
 
diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
index f08dc08..b8281c5 100644
--- a/src/mesa/main/texstore.h
+++ b/src/mesa/main/texstore.h
@@ -56,8 +56,8 @@
  */
 #define TEXSTORE_PARAMS \
struct gl_context *ctx, GLuint dims, \
-   GLenum baseInternalFormat, \
-   mesa_format dstFormat, \
+MAYBE_UNUSED GLenum baseInternalFormat, \
+MAYBE_UNUSED mesa_format dstFormat, \
 GLint dstRowStride, \
 GLubyte **dstSlices, \
GLint srcWidth, GLint srcHeight, GLint srcDepth, \
@@ -65,6 +65,20 @@
const GLvoid *srcAddr, \
const struct gl_pixelstore_attrib *srcPacking
 
+/* This macro must be kept in sync with TEXSTORE_PARAMS.  It is used in the
+ * few places where none of the parameters are used (i.e., the ETC texstore
+ * functions).
+ */
+#define UNUSED_TEXSTORE_PARAMS  \
+UNUSED struct gl_context *ctx, UNUSED GLuint dims,  \
+UNUSED GLenum baseInternalFormat,   \
+UNUSED mesa_format dstFormat,   \
+UNUSED GLint dstRowStride,  \
+UNUSED GLubyte **dstSlices, \
+UNUSED 

[Mesa-dev] [PATCH 22/22] nir: Don't i2b a value that is already Boolean

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

A bunch of shaders have sequences like:

i2b(u2i(floatBitsToUint(intBitsToFloat(x == y ? -1 : 0

Other optimizations (and NIR's typeless nature) reduce this to

i2b(x == y)

which is silly.

Skylake
total instructions in shared programs: 14498698 -> 14497948 (<.01%)
instructions in affected programs: 74480 -> 73730 (-1.01%)
helped: 277
HURT: 0
helped stats (abs) min: 1 max: 32 x̄: 2.71 x̃: 2
helped stats (rel) min: 0.04% max: 13.79% x̄: 1.45% x̃: 0.68%
95% mean confidence interval for instructions value: -3.35 -2.06
95% mean confidence interval for instructions %-change: -1.74% -1.16%
Instructions are helped.

total cycles in shared programs: 532015500 -> 531999238 (<.01%)
cycles in affected programs: 5943878 -> 5927616 (-0.27%)
helped: 251
HURT: 74
helped stats (abs) min: 1 max: 13149 x̄: 127.89 x̃: 14
helped stats (rel) min: 0.01% max: 17.31% x̄: 1.55% x̃: 0.53%
HURT stats (abs)   min: 1 max: 4550 x̄: 214.04 x̃: 15
HURT stats (rel)   min: <.01% max: 44.43% x̄: 2.81% x̃: 0.33%
95% mean confidence interval for cycles value: -158.51 58.43
95% mean confidence interval for cycles %-change: -1.07% -0.04%
Inconclusive result (value mean confidence interval includes 0).

total loops in shared programs: 4753 -> 4735 (-0.38%)
loops in affected programs: 18 -> 0
helped: 18
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00%
95% mean confidence interval for loops value: -1.00 -1.00
95% mean confidence interval for loops %-change: -100.00% -100.00%
Loops are helped.

Haswell and Broadwell had simliar results. (Broadwell shown)
total instructions in shared programs: 14791877 -> 14791127 (<.01%)
instructions in affected programs: 77326 -> 76576 (-0.97%)
helped: 278
HURT: 1
helped stats (abs) min: 1 max: 32 x̄: 2.70 x̃: 2
helped stats (rel) min: 0.04% max: 13.79% x̄: 1.42% x̃: 0.68%
HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 0.49% max: 0.49% x̄: 0.49% x̃: 0.49%
95% mean confidence interval for instructions value: -3.33 -2.05
95% mean confidence interval for instructions %-change: -1.70% -1.13%
Instructions are helped.

total cycles in shared programs: 558250067 -> 558252872 (<.01%)
cycles in affected programs: 5806328 -> 5809133 (0.05%)
helped: 235
HURT: 83
helped stats (abs) min: 1 max: 10630 x̄: 81.73 x̃: 16
helped stats (rel) min: 0.03% max: 18.58% x̄: 1.60% x̃: 0.51%
HURT stats (abs)   min: 1 max: 10590 x̄: 265.19 x̃: 20
HURT stats (rel)   min: <.01% max: 15.28% x̄: 1.89% x̃: 0.54%
95% mean confidence interval for cycles value: -89.87 107.51
95% mean confidence interval for cycles %-change: -1.06% -0.32%
Inconclusive result (value mean confidence interval includes 0).

total loops in shared programs: 4735 -> 4717 (-0.38%)
loops in affected programs: 18 -> 0
helped: 18
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00%
95% mean confidence interval for loops value: -1.00 -1.00
95% mean confidence interval for loops %-change: -100.00% -100.00%
Loops are helped.

total fills in shared programs: 83111 -> 83110 (<.01%)
fills in affected programs: 28 -> 27 (-3.57%)
helped: 1
HURT: 0

Ivy Bridge
total instructions in shared programs: 11774173 -> 11773436 (<.01%)
instructions in affected programs: 70819 -> 70082 (-1.04%)
helped: 267
HURT: 0
helped stats (abs) min: 1 max: 48 x̄: 2.76 x̃: 2
helped stats (rel) min: 0.21% max: 19.51% x̄: 1.57% x̃: 0.63%
95% mean confidence interval for instructions value: -3.51 -2.01
95% mean confidence interval for instructions %-change: -1.94% -1.21%
Instructions are helped.

total cycles in shared programs: 257153833 -> 257148932 (<.01%)
cycles in affected programs: 585341 -> 580440 (-0.84%)
helped: 167
HURT: 100
helped stats (abs) min: 1 max: 1327 x̄: 44.89 x̃: 16
helped stats (rel) min: 0.04% max: 26.54% x̄: 2.41% x̃: 0.88%
HURT stats (abs)   min: 1 max: 200 x̄: 25.95 x̃: 16
HURT stats (rel)   min: 0.04% max: 9.81% x̄: 1.34% x̃: 0.65%
95% mean confidence interval for cycles value: -33.25 -3.46
95% mean confidence interval for cycles %-change: -1.47% -0.54%
Cycles are helped.

total loops in shared programs: 3416 -> 3398 (-0.53%)
loops in affected programs: 18 -> 0
helped: 18
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00%
95% mean confidence interval for loops value: -1.00 -1.00
95% mean confidence interval for loops %-change: -100.00% -100.00%
Loops are helped.

LOST:   2
GAINED: 0

Sandy Bridge
total instructions in shared programs: 10499306 -> 10499094 (<.01%)
instructions in affected programs: 6051 -> 5839 (-3.50%)
helped: 43
HURT: 0
helped stats (abs) min: 1 max: 32 x̄: 4.93 x̃: 2
helped stats (rel) min: 0.39% max: 12.90% x̄: 4.29% x̃: 2.45%
95% mean confidence interval for instructions value: -7.66 -2.20
95% mean confidence interval for instructions %-change: -5.47% -3.12%
Instructions are helped.

total

[Mesa-dev] [PATCH 03/22] i965: Silence warnings about mixing enum and non-enum in conditional

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 6451 warnings to 6301 warnings by silencing 150
instances of

../../SOURCE/master/src/intel/compiler/brw_inst.h: In function ‘brw_reg_type 
brw_inst_src1_type(const gen_device_info*, const brw_inst*)’:
../../SOURCE/master/src/intel/compiler/brw_inst.h:802:55: warning: enumeral and 
non-enumeral type in conditional expression [-Wextra]
unsigned file = __builtin_strcmp("dst", #reg) == 0 ?   \
~~~^
BRW_GENERAL_REGISTER_FILE :\

brw_inst_##reg##_reg_file(devinfo, inst);  \

../../SOURCE/master/src/intel/compiler/brw_inst.h:811:1: note: in expansion of 
macro ‘REG_TYPE’
 REG_TYPE(src1)
 ^~~~

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_inst.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h
index 0f5e4ac..a67225c 100644
--- a/src/intel/compiler/brw_inst.h
+++ b/src/intel/compiler/brw_inst.h
@@ -800,7 +800,7 @@ brw_inst_##reg##_type(const struct gen_device_info 
*devinfo,  \
   const brw_inst *inst)   \
 { \
unsigned file = __builtin_strcmp("dst", #reg) == 0 ?   \
-   BRW_GENERAL_REGISTER_FILE :\
+   (unsigned) BRW_GENERAL_REGISTER_FILE : \
brw_inst_##reg##_reg_file(devinfo, inst);  \
unsigned hw_type = brw_inst_##reg##_reg_hw_type(devinfo, inst);\
return brw_hw_type_to_reg_type(devinfo, (enum brw_reg_file)file, hw_type); \
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/22] i965: Silence unused parameter warnings in blorp

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 2023 warnings to 1960 warnings by silencing 63
instances of things like

In file included from 
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_blorp_exec.c:33:0:
../../SOURCE/master/src/intel/blorp/blorp_genX_exec.h: In function 
‘blorp_emit_cc_viewport’:
../../SOURCE/master/src/intel/blorp/blorp_genX_exec.h:500:51: warning: unused 
parameter ‘params’ [-Wunused-parameter]
const struct blorp_params *params)
   ^~
../../SOURCE/master/src/intel/blorp/blorp_genX_exec.h: In function 
‘blorp_emit_sampler_state’:
../../SOURCE/master/src/intel/blorp/blorp_genX_exec.h:524:53: warning: unused 
parameter ‘params’ [-Wunused-parameter]
  const struct blorp_params *params)
 ^~
In file included from 
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_blorp_exec.c:36:0:
../../SOURCE/master/src/mesa/drivers/dri/i965/gen4_blorp_exec.h: In function 
‘blorp_emit_vs_state’:
../../SOURCE/master/src/mesa/drivers/dri/i965/gen4_blorp_exec.h:50:48: warning: 
unused parameter ‘params’ [-Wunused-parameter]
 const struct blorp_params *params)
^~
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_blorp_exec.c: In function 
‘blorp_flush_range’:
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_blorp_exec.c:197:39: 
warning: unused parameter ‘batch’ [-Wunused-parameter]
 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
   ^
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_blorp_exec.c:197:52: 
warning: unused parameter ‘start’ [-Wunused-parameter]
 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
^
../../SOURCE/master/src/mesa/drivers/dri/i965/genX_blorp_exec.c:197:66: 
warning: unused parameter ‘size’ [-Wunused-parameter]
 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
  ^~~~

Signed-off-by: Ian Romanick 
---
 src/intel/blorp/blorp_genX_exec.h   | 12 +---
 src/mesa/drivers/dri/i965/gen4_blorp_exec.h | 14 ++
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  6 --
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/intel/blorp/blorp_genX_exec.h 
b/src/intel/blorp/blorp_genX_exec.h
index 737720a..ac70bb9 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -496,8 +496,7 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
 
 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
 static uint32_t
-blorp_emit_cc_viewport(struct blorp_batch *batch,
-   const struct blorp_params *params)
+blorp_emit_cc_viewport(struct blorp_batch *batch)
 {
uint32_t cc_vp_offset;
blorp_emit_dynamic(batch, GENX(CC_VIEWPORT), vp, 32, &cc_vp_offset) {
@@ -520,8 +519,7 @@ blorp_emit_cc_viewport(struct blorp_batch *batch,
 }
 
 static uint32_t
-blorp_emit_sampler_state(struct blorp_batch *batch,
- const struct blorp_params *params)
+blorp_emit_sampler_state(struct blorp_batch *batch)
 {
uint32_t offset;
blorp_emit_dynamic(batch, GENX(SAMPLER_STATE), sampler, 32, &offset) {
@@ -980,7 +978,7 @@ blorp_emit_blend_state(struct blorp_batch *batch,
 
 static uint32_t
 blorp_emit_color_calc_state(struct blorp_batch *batch,
-const struct blorp_params *params)
+MAYBE_UNUSED const struct blorp_params *params)
 {
uint32_t offset;
blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
@@ -1171,7 +1169,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
 
if (params->src.enabled)
-  blorp_emit_sampler_state(batch, params);
+  blorp_emit_sampler_state(batch);
 
blorp_emit_3dstate_multisample(batch, params);
 
@@ -1205,7 +1203,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
blorp_emit_sf_config(batch, params);
blorp_emit_ps_config(batch, params);
 
-   blorp_emit_cc_viewport(batch, params);
+   blorp_emit_cc_viewport(batch);
 }
 
 / This is the end of the pipeline setup code /
diff --git a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h 
b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
index 7e5199c..e59bc9f 100644
--- a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
+++ b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
@@ -46,8 +46,7 @@ instruction_state_address(struct blorp_batch *batch, uint32_t 
offset)
 }
 
 static struct blorp_address
-blorp_emit_vs_state(struct blorp_batch *batch,
-const struct blorp_params *params)
+blorp_emit_vs_state(struct blorp_batch *batch)
 {
assert(batch->blorp->driver_ctx == batch->driver_batch);
struct brw_context *brw = batch->driver_b

[Mesa-dev] [PATCH 15/22] nir: Use De Morgan's Law on logic compounded comparisons

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

The replacement of the comparison operators must happen during this
step.  If it does not, the next pass of nir_opt_algebraic will reapply
De Morgan's Law in the "opposite direction" before performing dead code
elimination.  The resulting infinite loop will eventually get OOM
killed.

Haswell, Broadwell, and Skylake had similar results. (Broadwell shown)
total instructions in shared programs: 14808185 -> 14808036 (<.01%)
instructions in affected programs: 13758 -> 13609 (-1.08%)
helped: 39
HURT: 0
helped stats (abs) min: 1 max: 10 x̄: 3.82 x̃: 3
helped stats (rel) min: 0.44% max: 1.55% x̄: 0.98% x̃: 1.01%
95% mean confidence interval for instructions value: -4.67 -2.97
95% mean confidence interval for instructions %-change: -1.09% -0.88%
Instructions are helped.

total cycles in shared programs: 559438333 -> 559435832 (<.01%)
cycles in affected programs: 199160 -> 196659 (-1.26%)
helped: 42
HURT: 3
helped stats (abs) min: 2 max: 184 x̄: 61.50 x̃: 51
helped stats (rel) min: 0.02% max: 6.94% x̄: 1.41% x̃: 1.40%
HURT stats (abs)   min: 2 max: 40 x̄: 27.33 x̃: 40
HURT stats (rel)   min: 0.05% max: 0.74% x̄: 0.51% x̃: 0.74%
95% mean confidence interval for cycles value: -71.47 -39.69
95% mean confidence interval for cycles %-change: -1.64% -0.93%
Cycles are helped.

Sandy Bridge and Ivy Bridge had similar results. (Ivy Bridge shown)
total instructions in shared programs: 11811776 -> 11811553 (<.01%)
instructions in affected programs: 15201 -> 14978 (-1.47%)
helped: 39
HURT: 0
helped stats (abs) min: 1 max: 20 x̄: 5.72 x̃: 6
helped stats (rel) min: 0.44% max: 2.53% x̄: 1.30% x̃: 1.26%
95% mean confidence interval for instructions value: -7.21 -4.23
95% mean confidence interval for instructions %-change: -1.48% -1.12%
Instructions are helped.

total cycles in shared programs: 257617270 -> 257614589 (<.01%)
cycles in affected programs: 212107 -> 209426 (-1.26%)
helped: 45
HURT: 0
helped stats (abs) min: 2 max: 180 x̄: 59.58 x̃: 54
helped stats (rel) min: 0.02% max: 6.02% x̄: 1.30% x̃: 1.32%
95% mean confidence interval for cycles value: -74.02 -45.14
95% mean confidence interval for cycles %-change: -1.59% -1.01%
Cycles are helped.

Iron Lake
total instructions in shared programs: 7886648 -> 7886515 (<.01%)
instructions in affected programs: 14106 -> 13973 (-0.94%)
helped: 29
HURT: 0
helped stats (abs) min: 1 max: 10 x̄: 4.59 x̃: 4
helped stats (rel) min: 0.35% max: 1.83% x̄: 0.90% x̃: 0.81%
95% mean confidence interval for instructions value: -5.65 -3.52
95% mean confidence interval for instructions %-change: -1.03% -0.76%
Instructions are helped.

total cycles in shared programs: 178100812 -> 178100396 (<.01%)
cycles in affected programs: 67970 -> 67554 (-0.61%)
helped: 29
HURT: 0
helped stats (abs) min: 2 max: 40 x̄: 14.34 x̃: 12
helped stats (rel) min: 0.15% max: 1.69% x̄: 0.58% x̃: 0.54%
95% mean confidence interval for cycles value: -18.30 -10.39
95% mean confidence interval for cycles %-change: -0.71% -0.45%
Cycles are helped.

GM45
total instructions in shared programs: 4857939 -> 4857872 (<.01%)
instructions in affected programs: 7426 -> 7359 (-0.90%)
helped: 15
HURT: 0
helped stats (abs) min: 1 max: 10 x̄: 4.47 x̃: 4
helped stats (rel) min: 0.33% max: 1.80% x̄: 0.87% x̃: 0.77%
95% mean confidence interval for instructions value: -6.06 -2.87
95% mean confidence interval for instructions %-change: -1.06% -0.67%
Instructions are helped.

total cycles in shared programs: 122167930 -> 122167654 (<.01%)
cycles in affected programs: 43118 -> 42842 (-0.64%)
helped: 15
HURT: 0
helped stats (abs) min: 4 max: 40 x̄: 18.40 x̃: 16
helped stats (rel) min: 0.15% max: 1.69% x̄: 0.62% x̃: 0.54%
95% mean confidence interval for cycles value: -25.03 -11.77
95% mean confidence interval for cycles %-change: -0.82% -0.41%
Cycles are helped.

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 314b064..af3ad2c 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -24,6 +24,7 @@
 #Jason Ekstrand (ja...@jlekstrand.net)
 
 import nir_algebraic
+import itertools
 
 # Convenience variables
 a = 'a'
@@ -545,6 +546,14 @@ optimizations = [
  'options->lower_unpack_snorm_4x8'),
 ]
 
+invert = {'feq': 'fne', 'fne': 'feq', 'fge': 'flt', 'flt': 'fge' }
+
+for left, right in list(itertools.combinations(invert.keys(), 2)) + 
zip(invert.keys(), invert.keys()):
+   optimizations.append((('inot', ('ior(is_used_once)', (left, a, b), (right, 
c, d))),
+ ('iand', (invert[left], a, b), (invert[right], c, 
d
+   optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, 
c, d))),
+ ('ior', (invert[left], a, b), (invert[right], c, d
+
 def fexp2i(exp, bits):
# We assume that exp is already in the right range.
if bits == 32:
-- 
2.9.5


[Mesa-dev] [PATCH 11/22] nir: Recognize some more open-coded fmin / fmax

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

shader-db results:

Haswell, Broadwell, and Skylake had similar results. (Skylake shown)
total instructions in shared programs: 14514817 -> 14514808 (<.01%)
instructions in affected programs: 229 -> 220 (-3.93%)
helped: 3
HURT: 0
helped stats (abs) min: 1 max: 4 x̄: 3.00 x̃: 4
helped stats (rel) min: 2.86% max: 4.12% x̄: 3.70% x̃: 4.12%

total cycles in shared programs: 533145211 -> 533144939 (<.01%)
cycles in affected programs: 37268 -> 36996 (-0.73%)
helped: 8
HURT: 0
helped stats (abs) min: 2 max: 134 x̄: 34.00 x̃: 2
helped stats (rel) min: 0.02% max: 14.22% x̄: 3.53% x̃: 0.05%

Sandy Bridge and Ivy Bridge had similar results. (Ivy Bridge shown)
total cycles in shared programs: 257618409 -> 257618403 (<.01%)
cycles in affected programs: 12582 -> 12576 (-0.05%)
helped: 3
HURT: 0
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 0.05% max: 0.05% x̄: 0.05% x̃: 0.05%

No changes on Iron Lake or GM45.

Signed-off-by: Ian Romanick 
---
 src/compiler/nir/nir_opt_algebraic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index d40d59b..f5f9e94 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -170,6 +170,8 @@ optimizations = [
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
+   (('bcsel', ('fge', b, a), a, b), ('fmin', a, b)),
+   (('bcsel', ('fge', a, b), a, b), ('fmax', a, b)),
(('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
(('bcsel', a, True, 'b@bool'), ('ior', a, b)),
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/22] i965: Silence unused parameter warnings in generated OA code

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 6301 warnings to 2075 warnings by silencing 4226
instances of things like

src/mesa/drivers/dri/i965/i965@sta/brw_oa_hsw.c: In function 
‘hsw__render_basic__gpu_core_clocks__read’:
src/mesa/drivers/dri/i965/i965@sta/brw_oa_hsw.c:41:62: warning: unused 
parameter ‘brw’ [-Wunused-parameter]
 hsw__render_basic__gpu_core_clocks__read(struct brw_context *brw,
  ^~~

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_oa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 576ea66..7931c82 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -260,7 +260,7 @@ def output_counter_read(set, counter, counter_vars):
 
 c("static " + ret_type)
 read_sym = "{0}__{1}__{2}__read".format(set.get('chipset').lower(), 
set.get('underscore_name'), counter.get('underscore_name'))
-c(read_sym + "(struct brw_context *brw,\n")
+c(read_sym + "(MAYBE_UNUSED struct brw_context *brw,\n")
 c_indent(len(read_sym) + 1)
 c("const struct brw_perf_query_info *query,\n")
 c("uint64_t *accumulator)\n")
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/22] intel/compiler: Silence unused parameter warnings in release builds

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 7005 warnings to 6451 warnings by silencing 554
instances of

In file included from ../../SOURCE/master/src/intel/compiler/brw_disasm.c:28:0:
../../SOURCE/master/src/intel/compiler/brw_inst.h: In function 
‘brw_inst_3src_a1_src0_imm’:
../../SOURCE/master/src/intel/compiler/brw_inst.h:346:57: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_inst_3src_a1_src0_imm(const struct gen_device_info *devinfo,
 ^~~
../../SOURCE/master/src/intel/compiler/brw_inst.h: In function 
‘brw_inst_3src_a1_src2_imm’:
../../SOURCE/master/src/intel/compiler/brw_inst.h:354:57: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_inst_3src_a1_src2_imm(const struct gen_device_info *devinfo,
 ^~~
../../SOURCE/master/src/intel/compiler/brw_inst.h: In function 
‘brw_inst_set_3src_a1_src0_imm’:
../../SOURCE/master/src/intel/compiler/brw_inst.h:362:61: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_inst_set_3src_a1_src0_imm(const struct gen_device_info *devinfo,
 ^~~
../../SOURCE/master/src/intel/compiler/brw_inst.h: In function 
‘brw_inst_set_3src_a1_src2_imm’:
../../SOURCE/master/src/intel/compiler/brw_inst.h:370:61: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_inst_set_3src_a1_src2_imm(const struct gen_device_info *devinfo,
 ^~~
../../SOURCE/master/src/intel/compiler/brw_inst.h: In function 
‘brw_inst_imm_uq’:
../../SOURCE/master/src/intel/compiler/brw_inst.h:703:47: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_inst_imm_uq(const struct gen_device_info *devinfo, const brw_inst *insn)
   ^~~
In file included from ../../SOURCE/master/src/intel/compiler/brw_shader.h:29:0,
 from ../../SOURCE/master/src/intel/compiler/brw_disasm.c:29:
../../SOURCE/master/src/intel/compiler/brw_compiler.h: In function 
‘brw_stage_has_packed_dispatch’:
../../SOURCE/master/src/intel/compiler/brw_compiler.h:1277:61: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
 ^~~
../../SOURCE/master/src/intel/compiler/brw_disasm.c: In function ‘src_ia1’:
../../SOURCE/master/src/intel/compiler/brw_disasm.c:849:18: warning: unused 
parameter ‘_reg_file’ [-Wunused-parameter]
 unsigned _reg_file,
  ^

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_compiler.h |  2 +-
 src/intel/compiler/brw_inst.h | 11 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index b1086bb..fb68a34 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1274,7 +1274,7 @@ encode_slm_size(unsigned gen, uint32_t bytes)
  * '2^n - 1' for some n.
  */
 static inline bool
-brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
+brw_stage_has_packed_dispatch(MAYBE_UNUSED const struct gen_device_info 
*devinfo,
   gl_shader_stage stage,
   const struct brw_stage_prog_data *prog_data)
 {
diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h
index 99e637e..0f5e4ac 100644
--- a/src/intel/compiler/brw_inst.h
+++ b/src/intel/compiler/brw_inst.h
@@ -343,7 +343,7 @@ REG_TYPE(src2)
  *  @{
  */
 static inline uint16_t
-brw_inst_3src_a1_src0_imm(const struct gen_device_info *devinfo,
+brw_inst_3src_a1_src0_imm(MAYBE_UNUSED const struct gen_device_info *devinfo,
   const brw_inst *insn)
 {
assert(devinfo->gen >= 10);
@@ -351,7 +351,7 @@ brw_inst_3src_a1_src0_imm(const struct gen_device_info 
*devinfo,
 }
 
 static inline uint16_t
-brw_inst_3src_a1_src2_imm(const struct gen_device_info *devinfo,
+brw_inst_3src_a1_src2_imm(MAYBE_UNUSED const struct gen_device_info *devinfo,
   const brw_inst *insn)
 {
assert(devinfo->gen >= 10);
@@ -359,7 +359,7 @@ brw_inst_3src_a1_src2_imm(const struct gen_device_info 
*devinfo,
 }
 
 static inline void
-brw_inst_set_3src_a1_src0_imm(const struct gen_device_info *devinfo,
+brw_inst_set_3src_a1_src0_imm(MAYBE_UNUSED const struct gen_device_info 
*devinfo,
   brw_inst *insn, uint16_t value)
 {
assert(devinfo->gen >= 10);
@@ -367,7 +367,7 @@ brw_inst_set_3src_a1_src0_imm(const struct gen_device_info 
*devinfo,
 }
 
 static inline void
-brw_inst_set_3src_a1_src2_imm(const struct gen_device_info *devinfo,
+brw_inst_set_3src_a1_src2_imm(MAYBE_UNUSED const struct gen_device_info 
*devinfo,
   brw_inst *insn, uint16_t value)
 {
assert(devinfo->gen >=

[Mesa-dev] [PATCH 01/22] i965: Silence unused parameter warnings

2018-02-23 Thread Ian Romanick
From: Ian Romanick 

Reduces my build from 7119 warnings to 7005 warnings by silencing 114
instances of

In file included from 
../../SOURCE/master/src/mesa/drivers/dri/i965/brw_context.h:46:0,
 from 
../../SOURCE/master/src/mesa/drivers/dri/i965/intel_pixel_read.c:38:
../../SOURCE/master/src/mesa/drivers/dri/i965/brw_bufmgr.h: In function 
‘brw_bo_unmap’:
../../SOURCE/master/src/mesa/drivers/dri/i965/brw_bufmgr.h:258:47: warning: 
unused parameter ‘bo’ [-Wunused-parameter]
 static inline int brw_bo_unmap(struct brw_bo *bo) { return 0; }
   ^~

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 005ff19..851f466 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -252,7 +252,7 @@ MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct 
brw_bo *bo, unsigned
  * Reduces the refcount on the userspace mapping of the buffer
  * object.
  */
-static inline int brw_bo_unmap(struct brw_bo *bo) { return 0; }
+static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; }
 
 /** Write data into an object. */
 int brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/17] intel/compiler/fs: Return multiple_instructions_emitted from generate_linterp

2018-02-23 Thread Kenneth Graunke
On Tuesday, February 20, 2018 9:15:14 PM PST Matt Turner wrote:
> If multiple instructions are emitted, special handling of things like
> conditional mod, saturate, and NoDDClr/NoDDChk need to be performed.
> 
> I noticed that conditional mods were misapplied when adding support for
> Gen11 (in the previous patch). The next patch fixes the same bug in the
> Gen4 LINE/MAC case, though I was not able to trigger it.
> ---
>  src/intel/compiler/brw_fs.h |  2 +-
>  src/intel/compiler/brw_fs_generator.cpp | 12 +---
>  2 files changed, 10 insertions(+), 4 deletions(-)

Ugh...I noticed a couple things:

1. Nothing has set multiple_instructions_emitted since May 2016
   (commit 95272f5c7e6914fe8a85a4e37e07f1e8e3634446), which means
   that block of code has been dead for a long time.

2. Nothing in the FS backend sets inst->no_dd_* either.  It looks like
   it was used for general interpolation until July 2016 (commit
   1eef0b73aa323d94d5a080cd1efa81ccacdbd0d2) and for the unlit centroid
   workaround until August 2016 (commit 875341c69b99dea7942a68c9060aa3).
   So, that's also basically dead.

3. You mention saturate, but we don't handle that specially for multiple
   instructions.

4. You didn't handle conditional modifiers in generate_linterp, so...
   if conditional LINTERP is a thing, it's going to be broken.  That
   said, I'm pretty sure it isn't a thing.

Maybe just drop this patch, and instead delete the
multiple_instructions_emitted stuff entirely?  Maybe move no_dd_* back
to the vec4 backend as well?


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Patch submit question

2018-02-23 Thread Jordan Justen
On 2018-02-23 13:20:21, Kyriazis, George wrote:
> Hello mesa-dev,
> 
> I have a quick patch submission question:
> 
> I have a patch that I’d like to also nominate to mesa-stable
> (specifically 18.0.1 or 18.0.2), however the same patch is different
> between master and stable branches, due to neighboring code having
> changed in the meantime.
> 
> What’s the best process of submitting / pushing this sort of change?

719f2c934030f74ce0a4892233f494f168852698 was an 18.0 specific patch. I
Cc'd 18.0 in the commit message:

Cc: "18.0" 

And, I used:

git format-patch --subject-prefix=PATCH\ 18.0

You might also add "Fixes" to the commit message if applicable, even
though in this case it wouldn't be used to trigger cherry-picking to
the stable branch.

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] vulkan: Add KHR_display extension to anv and radv using DRM

2018-02-23 Thread Keith Packard
Jason Ekstrand  writes:

> I think I like option 1 (KEITHP_kms_display).  If the client knows the
> difference between render and primary for 2, then they are most likely
> already opening the master node themselves or at least have access to
> the FD.

Ok, I'll work on cleaning up that extension and renaming it. I have no
idea how to get new words into the Vulkan spec, but it would be good to
have that done too.

I guess the question is whether I should bother to leave the current
try-to-open-primary kludge in place. In good news, under X, both radv
and anv "fail" to use the primary device as another master is already
running, so at least we aren't running with a primary FD open?

> Sorry, I'm just not feeling all that opinionated about this at the moment.
> :-)

No more than I; either way is fine with me, if you're happy to use
something like the existing code I've got, that's even nicer.

> Clearly, we need systemd-edidd. :-)

A library would be nice; we have the edid data everywhere, we just don't
have it in a useful form except inside the X server and kernel.

> Yes, *if* it has a preferred resolution, we should return that one.  If it
> doesn't, we should walk the list and return the largest instead of just
> defaulting to 1024x768.  At least that's what the spec seems to say to
> me.

Oh. I totally missed that part. Yeah, that's just wrong. I've just
pushed a patch that finds the largest mode when there isn't a preferred
one. Oddly, I have no devices here which don't specify a preferred mode,
so it will be somewhat difficult to test...

> Yup.  Let's just drop INHERIT and only advertise OPAQUE.

Done.

I've updated my drm-lease branch with all of these changes merged into
the existing patches (and so noted), plus the new patch described above
which looks for the largest mode when no preferred mode is specified.

Thanks again for all of your careful review; while we haven't changed
any significant semantics, we have found a bunch of outright bugs in the
code.

-- 
-keith


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH crucible 0/3] amd: gcn_shader and shader_trinary_minmax tests

2018-02-23 Thread Daniel Schürmann
These Patches contain crucible tests for the two extensions:
VK_AMD_gcn_shader
VK_AMD_shader_trinary_minmax

The tests cover basic functionality, including some boundaries and constant 
folding.
amd_common contains functions to run the tests.

Bas Nieuwenhuizen (2):
  amd: common functions for amd extension tests
  amd: shader_trinary_minmax extension tests

Daniel Schürmann (1):
  amd: gcn_shader extension tests

 Makefile.am|   5 +
 src/tests/func/amd/amd_common.c| 115 ++
 src/tests/func/amd/amd_common.h|  53 +++
 src/tests/func/amd/gcn_shader.c| 252 +
 src/tests/func/amd/shader_trinary_minmax.c | 576 +
 5 files changed, 1001 insertions(+)
 create mode 100644 src/tests/func/amd/amd_common.c
 create mode 100644 src/tests/func/amd/amd_common.h
 create mode 100644 src/tests/func/amd/gcn_shader.c
 create mode 100644 src/tests/func/amd/shader_trinary_minmax.c

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH crucible 1/3] amd: common functions for amd extension tests

2018-02-23 Thread Daniel Schürmann
From: Bas Nieuwenhuizen 

Co-authored-by: Daniel Schürmann 
Signed-off-by: Daniel Schürmann 
---
 Makefile.am |   3 ++
 src/tests/func/amd/amd_common.c | 115 
 src/tests/func/amd/amd_common.h |  53 ++
 3 files changed, 171 insertions(+)
 create mode 100644 src/tests/func/amd/amd_common.c
 create mode 100644 src/tests/func/amd/amd_common.h

diff --git a/Makefile.am b/Makefile.am
index 6c144dd..515bedf 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,6 +75,7 @@ bin_crucible_SOURCES = \
src/tests/example/basic.c \
src/tests/example/images.c \
src/tests/example/messages.c \
+   src/tests/func/amd/amd_common.c \
src/tests/func/cmd-buffer/secondary.c \
src/tests/func/copy/copy-buffer.c \
src/tests/func/4-vertex-buffers.c \
@@ -139,6 +140,8 @@ BUILT_SOURCES = \
src/tests/func/miptree/miptree-spirv.h \
src/tests/func/miptree/miptree_gen.c \
src/tests/func/push-constants/basic-spirv.h \
+   src/tests/func/amd/gcn_shader-spirv.h \
+   src/tests/func/amd/shader_trinary_minmax-spirv.h \
src/tests/func/shader/fragcoord-spirv.h \
src/tests/func/shader/pack_unpack-spirv.h \
src/tests/func/shader_ballot/ext_shader_ballot-spirv.h \
diff --git a/src/tests/func/amd/amd_common.c b/src/tests/func/amd/amd_common.c
new file mode 100644
index 000..aea9502
--- /dev/null
+++ b/src/tests/func/amd/amd_common.c
@@ -0,0 +1,115 @@
+// Copyright 2018 Google LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice (including the next
+// paragraph) shall be included in all copies or substantial portions of the
+// Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+
+#include "tapi/t.h"
+#include "amd_common.h"
+
+VkDeviceMemory
+common_init(VkShaderModule cs, const uint32_t ssbo_size)
+{
+VkDescriptorSetLayout set_layout;
+
+set_layout = qoCreateDescriptorSetLayout(t_device,
+.bindingCount = 1,
+.pBindings = (VkDescriptorSetLayoutBinding[]) {
+{
+.binding = 0,
+.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+.descriptorCount = 1,
+.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+.pImmutableSamplers = NULL,
+},
+});
+
+VkPipelineLayout pipeline_layout = qoCreatePipelineLayout(t_device,
+.setLayoutCount = 1,
+.pSetLayouts = &set_layout,
+.pushConstantRangeCount = 0);
+
+VkPipeline pipeline;
+vkCreateComputePipelines(t_device, t_pipeline_cache, 1,
+&(VkComputePipelineCreateInfo) {
+.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+.pNext = NULL,
+.stage = {
+.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+.module = cs,
+.pName = "main",
+},
+.flags = 0,
+.layout = pipeline_layout
+}, NULL, &pipeline);
+
+VkDescriptorSet set =
+qoAllocateDescriptorSet(t_device,
+.descriptorPool = t_descriptor_pool,
+.pSetLayouts = &set_layout);
+
+VkBuffer buffer_out = qoCreateBuffer(t_device, .size = ssbo_size);
+VkDeviceMemory mem_out = qoAllocBufferMemory(t_device, buffer_out,
+.properties = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+qoBindBufferMemory(t_device, buffer_out, mem_out, 0);
+
+vkUpdateDescriptorSets(t_device,
+/*writeCount*/ 1,
+(VkWriteDescriptorSet[]) {
+{
+.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+.dstSet = set,
+.dstBinding = 0,
+.dstArrayElement = 0,
+.descriptorCount = 1,
+.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+.pBufferInfo = &(VkDescriptorBufferInfo) {
+.buffer = buffer_out,
+.offset = 0,
+

[Mesa-dev] [PATCH crucible 2/3] amd: gcn_shader extension tests

2018-02-23 Thread Daniel Schürmann
Signed-off-by: Daniel Schürmann 
---
 Makefile.am |   1 +
 src/tests/func/amd/gcn_shader.c | 252 
 2 files changed, 253 insertions(+)
 create mode 100644 src/tests/func/amd/gcn_shader.c

diff --git a/Makefile.am b/Makefile.am
index 515bedf..2c65424 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -76,6 +76,7 @@ bin_crucible_SOURCES = \
src/tests/example/images.c \
src/tests/example/messages.c \
src/tests/func/amd/amd_common.c \
+   src/tests/func/amd/gcn_shader.c \
src/tests/func/cmd-buffer/secondary.c \
src/tests/func/copy/copy-buffer.c \
src/tests/func/4-vertex-buffers.c \
diff --git a/src/tests/func/amd/gcn_shader.c b/src/tests/func/amd/gcn_shader.c
new file mode 100644
index 000..7d63d7a
--- /dev/null
+++ b/src/tests/func/amd/gcn_shader.c
@@ -0,0 +1,252 @@
+// Copyright 2018 Valve Corporation
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice (including the next
+// paragraph) shall be included in all copies or substantial portions of the
+// Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+
+#include "util/simple_pipeline.h"
+#include "tapi/t.h"
+#include "amd_common.h"
+
+#include "gcn_shader-spirv.h"
+
+static void
+time(void)
+{
+t_require_ext("VK_AMD_gcn_shader");
+VkShaderModule fs = qoCreateShaderModuleGLSL(
+t_device, FRAGMENT,
+QO_EXTENSION GL_ARB_gpu_shader_int64 : enable
+QO_EXTENSION GL_AMD_gcn_shader : enable
+layout(location = 0) out vec4 f_color;
+layout(push_constant) uniform push_consts {
+int nothing;
+};
+/* We cannot really test the time output.
+ * Thus, we only check for monotony. */
+void main() {
+float res = 0.0;
+uint64_t time1 = timeAMD();
+if (time1 == 0) res = 1.0;
+uint64_t time2 = timeAMD();
+if (time2 < time1) res = 1.0;
+
+f_color = vec4(res, 1.0 - res, 0.0, 1.0);
+}
+);
+struct {
+uint32_t nothing;
+} push;
+run_simple_pipeline(fs, &push, sizeof(push));
+}
+test_define {
+.name = "func.amd.gcn-shader.time",
+.start = time,
+.image_filename = "32x32-green.ref.png",
+};
+
+static void
+cubeFaceCoordTC(void)
+{
+t_require_ext("VK_AMD_gcn_shader");
+VkShaderModule cs = qoCreateShaderModuleGLSL(
+t_device, COMPUTE,
+QO_EXTENSION GL_AMD_gcn_shader : enable
+layout(set = 0, binding = 0, std430) buffer Storage {
+vec4 v[];
+} ssbo;
+
+layout (local_size_x = 1) in;
+
+void main()
+{
+uint idx = gl_GlobalInvocationID.x;
+ssbo.v[idx].w = cubeFaceCoordAMD(ssbo.v[idx].xyz).x;
+}
+);
+
+const struct {
+float params[3];
+float result;
+} cases[] = {
+{{1.0, -0.1, 0.8,},  0.1},  // -y
+{{-1.0, 0.5, 0.3,}, -0.5},  // -y
+{{-0.2, 1.0, 0.7,},  0.7},  // +z
+{{0.9, -1.0, 0.4,}, -0.4},  // -z
+{{-0.1, 0.0, 1.0,}, -0.0},  // -y
+{{0.3, 0.6, -1.0,}, -0.6},  // -y
+/* corner cases */
+{{1.0, 1.0, -0.0,}, -0.0},  // +z -> posY
+{{1.0, -1.0, 0.0,}, -0.0},  // -z -> negY
+{{0.0, 0.0,  0.0,}, -0.0},  // -y -> posZ
+{{1.0, 1.0, -1.0,}, -1.0},  // -y -> negZ
+};
+
+RUN_CASES(float, "%f");
+}
+test_define {
+.name = "func.amd.gcn-shader.cube-face-coord-tc",
+.start = cubeFaceCoordTC,
+.no_image = true,
+};
+
+static void
+cubeFaceCoordSC(void)
+{
+t_require_ext("VK_AMD_gcn_shader");
+VkShaderModule cs = qoCreateShaderModuleGLSL(
+t_device, COMPUTE,
+QO_EXTENSION GL_AMD_gcn_shader : enable
+layout(set = 0, binding = 0, std430) buffer Storage {
+vec4 v[];
+} ssbo;
+
+layout (local_size_x = 1) in;
+
+void main()
+{
+uint idx = gl_GlobalInvocationID.x;
+ssbo.v[idx].w = cubeFaceCoordAMD(ssbo.v[idx].xyz).y;
+}
+);
+
+const stru

[Mesa-dev] [PATCH crucible 3/3] amd: shader_trinary_minmax extension tests

2018-02-23 Thread Daniel Schürmann
From: Bas Nieuwenhuizen 

Co-authored-by: Daniel Schürmann 
Signed-off-by: Daniel Schürmann 
---
 Makefile.am|   1 +
 src/tests/func/amd/shader_trinary_minmax.c | 576 +
 2 files changed, 577 insertions(+)
 create mode 100644 src/tests/func/amd/shader_trinary_minmax.c

diff --git a/Makefile.am b/Makefile.am
index 2c65424..7c4a52b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -77,6 +77,7 @@ bin_crucible_SOURCES = \
src/tests/example/messages.c \
src/tests/func/amd/amd_common.c \
src/tests/func/amd/gcn_shader.c \
+   src/tests/func/amd/shader_trinary_minmax.c \
src/tests/func/cmd-buffer/secondary.c \
src/tests/func/copy/copy-buffer.c \
src/tests/func/4-vertex-buffers.c \
diff --git a/src/tests/func/amd/shader_trinary_minmax.c 
b/src/tests/func/amd/shader_trinary_minmax.c
new file mode 100644
index 000..6e5fe4a
--- /dev/null
+++ b/src/tests/func/amd/shader_trinary_minmax.c
@@ -0,0 +1,576 @@
+// Copyright 2018 Google LLC
+// Copyright 2018 Valve Corporation
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice (including the next
+// paragraph) shall be included in all copies or substantial portions of the
+// Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+
+
+#include "util/simple_pipeline.h"
+#include "tapi/t.h"
+#include 
+#include "amd_common.h"
+
+#include "shader_trinary_minmax-spirv.h"
+
+static void
+umin3_compute(void)
+{
+t_require_ext("VK_AMD_shader_trinary_minmax");
+
+VkShaderModule cs = qoCreateShaderModuleGLSL(
+t_device, COMPUTE,
+QO_EXTENSION GL_AMD_shader_trinary_minmax: enable
+layout(set = 0, binding = 0, std430) buffer Storage {
+uvec4 v[];
+} ssbo;
+
+layout (local_size_x = 1) in;
+
+void main()
+{
+ssbo.v[gl_GlobalInvocationID.x].w = 
min3(ssbo.v[gl_GlobalInvocationID.x].x,
+ 
ssbo.v[gl_GlobalInvocationID.x].y,
+ 
ssbo.v[gl_GlobalInvocationID.x].z);
+}
+);
+
+const struct {
+uint32_t params[3];
+uint32_t result;
+} cases[] = {
+{{0, 0, 0,}, 0},
+{{UINT32_MAX, UINT32_MAX, UINT32_MAX}, UINT32_MAX},
+{{1, 5, 7}, 1},
+{{UINT32_MAX, 5, 5}, 5},
+{{5, 0, UINT32_MAX}, 0},
+{{UINT32_MAX/2, UINT32_MAX/3, UINT32_MAX/4}, UINT32_MAX/4},
+};
+
+RUN_CASES(uint32_t, "%u");
+}
+
+test_define {
+.name = "func.amd.shader-trinary-minmax.umin3",
+.start = umin3_compute,
+.no_image = true,
+};
+
+
+static void
+smin3_compute(void)
+{
+t_require_ext("VK_AMD_shader_trinary_minmax");
+
+VkShaderModule cs = qoCreateShaderModuleGLSL(
+t_device, COMPUTE,
+QO_EXTENSION GL_AMD_shader_trinary_minmax: enable
+layout(set = 0, binding = 0, std430) buffer Storage {
+ivec4 v[];
+} ssbo;
+
+layout (local_size_x = 1) in;
+
+void main()
+{
+ssbo.v[gl_GlobalInvocationID.x].w = 
min3(ssbo.v[gl_GlobalInvocationID.x].x,
+ 
ssbo.v[gl_GlobalInvocationID.x].y,
+ 
ssbo.v[gl_GlobalInvocationID.x].z);
+}
+);
+
+const struct {
+int32_t params[3];
+int32_t result;
+} cases[] = {
+{{0, 0, 0,}, 0},
+{{INT32_MAX, INT32_MAX, INT32_MAX}, INT32_MAX},
+{{INT32_MIN, INT32_MIN, INT32_MIN}, INT32_MIN},
+{{1, 5, 7}, 1},
+{{INT32_MAX, 5, 5}, 5},
+{{5, 0, INT32_MAX}, 0},
+{{INT32_MAX/2, INT32_MAX/3, INT32_MAX/4}, INT32_MAX/4},
+{{INT32_MIN, INT32_MAX, 0}, INT32_MIN},
+};
+
+RUN_CASES(int32_t, "%d");
+}
+
+test_define {
+.name = "func.amd.shader-trinary-minmax.smin3",
+.start = smin3_compute,
+.no_image = true,
+};
+
+static void
+fmin3_compute(void)
+{
+t_require_ext("VK_AMD_shader_trinary_minmax");
+
+VkShaderModule cs = qo

Re: [Mesa-dev] [PATCH] gbm: Fix the alpha masks in the GBM format table.

2018-02-23 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, Feb 23, 2018 at 5:57 PM, Eric Anholt  wrote:
> Once GBM started looking at the values of the alpha masks, ARGB/ABGR
> wouldn't match any more because we had both A and R in the low bits.
>
> Fixes: 2ed344645d65 ("gbm/dri: Add RGBA masks to GBM format table")
> ---
>  src/gbm/backends/dri/gbm_dri.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
> index 0d088bd62b62..f987c04c1b7d 100644
> --- a/src/gbm/backends/dri/gbm_dri.c
> +++ b/src/gbm/backends/dri/gbm_dri.c
> @@ -562,7 +562,7 @@ static const struct gbm_dri_visual 
> gbm_dri_visuals_table[] = {
> },
> {
>   GBM_FORMAT_ARGB, __DRI_IMAGE_FORMAT_ARGB,
> - { 0x00ff, 0xff00, 0x00ff, 0x00ff },
> + { 0x00ff, 0xff00, 0x00ff, 0xff00 },
> },
> {
>   GBM_FORMAT_XBGR, __DRI_IMAGE_FORMAT_XBGR,
> @@ -570,7 +570,7 @@ static const struct gbm_dri_visual 
> gbm_dri_visuals_table[] = {
> },
> {
>   GBM_FORMAT_ABGR, __DRI_IMAGE_FORMAT_ABGR,
> - { 0x00ff, 0xff00, 0x00ff, 0x00ff },
> + { 0x00ff, 0xff00, 0x00ff, 0xff00 },
> },
> {
>   GBM_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010,
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] vulkan: Add KHR_display extension to anv and radv using DRM

2018-02-23 Thread Jason Ekstrand
On Thu, Feb 15, 2018 at 9:46 AM, Keith Packard  wrote:

> Jason Ekstrand  writes:
>
> > It seems a little odd to me to default to opening the master node and
> then
> > fall back to the render node if it doesn't work.  I suppose that's
> probably
> > ok so long as we ensure that vkGetPhysicalDeviceDisplayPropertiesKHR
> > returns no displays if we're on the render node.
> >
> > We could always go back to the DRM fd extension idea but I'm not coming
> up
> > with something nice and clean in the 60 seconds I've thought about it.
>
> As I said in the last comments about this section, Dave Airlie and I
> added this code only recently so that we could test this extension
> without also needing the kernel and X leasing changes.
>
> I think we should decide how to enable this functionality "for real",
> and I have two easy options:
>
>  1) Use my KEITHP_kms_display extension (presumably renamed MESA), which
> exposes a way to pass the DRM fd from the application into the
> driver. This makes it possible for the application to get the FD
> through any mechanism at all (including RandR or the new Wayland
> extension) and leave that out of the Vulkan code entirely.
>
>  2) Add a new extension which passes a new data structure that directs
> the driver to open either the Render or Primary nodes.
>
> When this is done, we can switch from the current code which tries to
> open the Primary node whenever the KHR_display extension is requested.
>

I think I like option 1.  If the client knows the difference between render
and primary for 2, then they are most likely already opening the master
node themselves or at least have access to the FD.

For an application that just wants to draw some stuff on the screen and is
ok with letting Vulkan fully handle KMS for it, the current code may be a
fine solution.

Sorry, I'm just not feeling all that opinionated about this at the moment.
:-)


> > Would it make anything easier if we just storred the DRM struct here?
> "No"
> > is a perfectly valid answer.
>
> Nope -- once we add the acquire_xlib extension, we get modes through
> either X or DRM, depending on whether we're pre-lease or post-lease.
>

Sounds good.


> > Any particular reason why the list of modes is global and not in the
> > connector?  It seems like it would be a tiny bit more efficient and
> > convenient to put the list in the connector.
>
> I think you're right. I have some vague memory of a lifetime issue with
> connectors, but can't think of what it might be, even after reviewing
> the relevant parts of the Vulkan spec. I've gone ahead and changed it;
> seems to work fine.
>
> >> +   LIST_FOR_EACH_ENTRY(display_mode, &wsi->display_modes, list)
> >> +  if (display_mode->connector == connector)
> >> + display_mode->valid = false;
> >>
> >
> > Please use braces for loops containing more than one line.
>
> Well, that was easy to fix -- the condition is now gone :-)
>

Even better!


> > Since we're allocating these in a physical device query, we need to use
> the
> > INSTANCE scope.  the OBJECT scope is intended for vkCreate functions to
> > allocated data that will live no longer than the associated vkDestroy
> > function.
>
> Thanks! The whole Vulkan memory model remains a mystery to me.
>
> I've changed allocation of wsi_display_mode and wsi_display_connector to
> use SCOPE_INSTANCE. VkIceSurfaceDisplay, wsi_display_fence and
> wsi_display_swapchain remain using SCOPE_OBJECT.
>

That sounds correct to me.


> I've also changed *all* instances of vk_alloc to use
> vk_zalloc. These are all small data structures allocated only during
> application startup, so I think the benefit of known memory contents is
> worth the cost of memset.
>
> > Hooray for obviously false fixed constants!
> >
> > I know the answer to this will be "EDIDs lie, never trust them!" but can
> we
> > get the real value somehow?  As someone who has a 13" laptop with a
> > 3200x1800 display, I know that number isn't always right. :-)
>
> Yes, we could dig the real value out of the EDID, but we'd have to parse
> the entire EDID to manage that. I don't want to stick an EDID parser
> directly in Mesa, so I'm kinda waiting for someone to create a separate
> EDID parsing library that the X server, Mesa and others can share. Until
> then, I'd prefer to just lie here.
>

Clearly, we need systemd-edidd. :-)


> > double-;;
>
> Thx. I remember seeing this while reviewing patches and forgot all about
> it...
>
> > From the Vulkan spec:
> >
> > Note:
> > For devices which have no natural value to return here,
> implementations
> > *should* return the maximum resolution supported.
> >
> > We should walk the list and pick the biggest one.
>
> I did this intentionally -- most monitors have a preferred resolution,
> which is their native pixel size. And, we want to tell applications to
> use that size, even if the monitor offers a larger (presumabl scaled)
> resolution in their mode list.
>

Yes, *if* it has a preferred r

[Mesa-dev] [PATCH] gbm: Fix the alpha masks in the GBM format table.

2018-02-23 Thread Eric Anholt
Once GBM started looking at the values of the alpha masks, ARGB/ABGR
wouldn't match any more because we had both A and R in the low bits.

Fixes: 2ed344645d65 ("gbm/dri: Add RGBA masks to GBM format table")
---
 src/gbm/backends/dri/gbm_dri.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 0d088bd62b62..f987c04c1b7d 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -562,7 +562,7 @@ static const struct gbm_dri_visual gbm_dri_visuals_table[] 
= {
},
{
  GBM_FORMAT_ARGB, __DRI_IMAGE_FORMAT_ARGB,
- { 0x00ff, 0xff00, 0x00ff, 0x00ff },
+ { 0x00ff, 0xff00, 0x00ff, 0xff00 },
},
{
  GBM_FORMAT_XBGR, __DRI_IMAGE_FORMAT_XBGR,
@@ -570,7 +570,7 @@ static const struct gbm_dri_visual gbm_dri_visuals_table[] 
= {
},
{
  GBM_FORMAT_ABGR, __DRI_IMAGE_FORMAT_ABGR,
- { 0x00ff, 0xff00, 0x00ff, 0x00ff },
+ { 0x00ff, 0xff00, 0x00ff, 0xff00 },
},
{
  GBM_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010,
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/extensions: fix c_vk_version for patch == None

2018-02-23 Thread Bas Nieuwenhuizen
Huh, can confirm that it generates None for me too, no clue what
defines that symbol.

Reviewed-by: Bas Nieuwenhuizen 

Did you have push access? Otherwise I can push it.

On Fri, Feb 23, 2018 at 11:33 PM, Mauro Rossi  wrote:
> Similar to cb0d1ba156 ("anv/extensions: Fix VkVersion::c_vk_version for patch 
> == None")
> fixes the following building errors:
>
> out/target/product/x86_64/obj_x86/STATIC_LIBRARIES/libmesa_radv_common_intermediates/radv_entrypoints.c:1161:48:
> error: use of undeclared identifier 'None'; did you mean 'long'?
>   return instance && VK_MAKE_VERSION(1, 0, None) <= core_version;
>^~~~
>long
> external/mesa/include/vulkan/vulkan.h:34:43: note: expanded from macro 
> 'VK_MAKE_VERSION'
> (((major) << 22) | ((minor) << 12) | (patch))
>   ^
> ...
> fatal error: too many errors emitted, stopping now [-ferror-limit=]
> 20 errors generated.
>
> Fixes: e72ad05c1d ("radv: Return NULL for entrypoints when not supported.")
> ---
>  src/amd/vulkan/radv_extensions.py | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index ac6ec8744e..92b1ea3e14 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -116,7 +116,8 @@ class VkVersion:
>  return '.'.join(ver_list)
>
>  def c_vk_version(self):
> -ver_list = [str(self.major), str(self.minor), str(self.patch)]
> +patch = self.patch if self.patch is not None else 0
> +ver_list = [str(self.major), str(self.minor), str(patch)]
>  return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
>
>  def __int_ver(self):
> --
> 2.14.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv/extensions: fix c_vk_version for patch == None

2018-02-23 Thread Mauro Rossi
Similar to cb0d1ba156 ("anv/extensions: Fix VkVersion::c_vk_version for patch 
== None")
fixes the following building errors:

out/target/product/x86_64/obj_x86/STATIC_LIBRARIES/libmesa_radv_common_intermediates/radv_entrypoints.c:1161:48:
error: use of undeclared identifier 'None'; did you mean 'long'?
  return instance && VK_MAKE_VERSION(1, 0, None) <= core_version;
   ^~~~
   long
external/mesa/include/vulkan/vulkan.h:34:43: note: expanded from macro 
'VK_MAKE_VERSION'
(((major) << 22) | ((minor) << 12) | (patch))
  ^
...
fatal error: too many errors emitted, stopping now [-ferror-limit=]
20 errors generated.

Fixes: e72ad05c1d ("radv: Return NULL for entrypoints when not supported.")
---
 src/amd/vulkan/radv_extensions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index ac6ec8744e..92b1ea3e14 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -116,7 +116,8 @@ class VkVersion:
 return '.'.join(ver_list)
 
 def c_vk_version(self):
-ver_list = [str(self.major), str(self.minor), str(self.patch)]
+patch = self.patch if self.patch is not None else 0
+ver_list = [str(self.major), str(self.minor), str(patch)]
 return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
 
 def __int_ver(self):
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104681] Einstein@Home BOINC FGRPB1G GPU app crash

2018-02-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104681

Germano Massullo  changed:

   What|Removed |Added

 CC||germano.massu...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104182] Seti@Home OpenCL program starts then returns: "CL file build failure"

2018-02-23 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104182

Germano Massullo  changed:

   What|Removed |Added

 CC||germano.massu...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 2/2] anv: enable VK_EXT_shader_viewport_index_layer

2018-02-23 Thread Caio Marcelo de Oliveira Filho
---
 src/intel/vulkan/anv_extensions.py | 1 +
 src/intel/vulkan/anv_pipeline.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 581921e62a..00760fdd4e 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -86,6 +86,7 @@ EXTENSIONS = [
 Extension('VK_KHX_multiview', 1, True),
 Extension('VK_EXT_debug_report',  8, True),
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
+Extension('VK_EXT_shader_viewport_index_layer',   1, True),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e16a7a1994..115e1374d3 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -143,6 +143,7 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
  .multiview = true,
  .variable_pointers = true,
  .storage_16bit = device->instance->physicalDevice.info.gen >= 8,
+ .shader_viewport_index_layer = true,
   },
};
 
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/2] spirv: Add SpvCapabilityShaderViewportIndexLayerEXT

2018-02-23 Thread Caio Marcelo de Oliveira Filho
This capability allows gl_ViewportIndex and gl_Layer to also be used
as outputs in Vertex and Tesselation shaders.

v2: Make conditional to the capability, add gl_Layer, add tesselation
shaders. (Iago)

v3: Don't export to tesselation control shader.
---
 src/compiler/shader_info.h | 1 +
 src/compiler/spirv/spirv_to_nir.c  | 4 
 src/compiler/spirv/vtn_variables.c | 8 
 3 files changed, 13 insertions(+)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index e7fd7dbe62..2fcbde74be 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -43,6 +43,7 @@ struct spirv_supported_capabilities {
bool multiview;
bool variable_pointers;
bool storage_16bit;
+   bool shader_viewport_index_layer;
 };
 
 typedef struct shader_info {
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index c6df764682..fdb2993db5 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3203,6 +3203,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
  spv_check_supported(storage_16bit, cap);
  break;
 
+  case SpvCapabilityShaderViewportIndexLayerEXT:
+ spv_check_supported(shader_viewport_index_layer, cap);
+ break;
+
   default:
  vtn_fail("Unhandled capability");
   }
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 9eb85c24e9..75d8aec5da 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1192,6 +1192,10 @@ vtn_get_builtin_location(struct vtn_builder *b,
  *mode = nir_var_shader_in;
   else if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
  *mode = nir_var_shader_out;
+  else if (b->options && b->options->caps.shader_viewport_index_layer &&
+   (b->shader->info.stage == MESA_SHADER_VERTEX ||
+b->shader->info.stage == MESA_SHADER_TESS_EVAL))
+ *mode = nir_var_shader_out;
   else
  vtn_fail("invalid stage for SpvBuiltInLayer");
   break;
@@ -1199,6 +1203,10 @@ vtn_get_builtin_location(struct vtn_builder *b,
   *location = VARYING_SLOT_VIEWPORT;
   if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
  *mode = nir_var_shader_out;
+  else if (b->options && b->options->caps.shader_viewport_index_layer &&
+   (b->shader->info.stage == MESA_SHADER_VERTEX ||
+b->shader->info.stage == MESA_SHADER_TESS_EVAL))
+ *mode = nir_var_shader_out;
   else if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
  *mode = nir_var_shader_in;
   else
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] spirv: Add SpvCapabilityShaderViewportIndexLayerEXT

2018-02-23 Thread Caio Marcelo de Oliveira Filho
On Fri, Feb 23, 2018 at 11:34:31AM -0800, Caio Marcelo de Oliveira Filho wrote:
> Hi,
> 
> > diff --git a/src/compiler/spirv/vtn_variables.c 
> > b/src/compiler/spirv/vtn_variables.c
> > index 9eb85c24e9..ba2b74c2c2 100644
> > --- a/src/compiler/spirv/vtn_variables.c
> > +++ b/src/compiler/spirv/vtn_variables.c
> > @@ -1192,6 +1192,11 @@ vtn_get_builtin_location(struct vtn_builder *b,
> >   *mode = nir_var_shader_in;
> >else if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
> >   *mode = nir_var_shader_out;
> > +  else if (b->options && b->options->caps.shader_viewport_index_layer 
> > &&
> > +   (b->shader->info.stage == MESA_SHADER_VERTEX ||
> > +b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
> 
> I'm a bit unsure about this being exported to Tesselation Control
> Shader, but the extension spec does refer to Tesselation shaders (and
> not explicitly the evaluation, like in other places).

The version of the spec that includes the extensions says explicitly
in section 14.6.

The ViewportIndex decoration must be used only within vertex,
tessellation evaluation, geometry, and fragment shaders.

So I'll send a v3 of this patch. I wonder if the extension text (both
for the Vulkan and SPIR-V) should be more explicit about this too.


Thanks,
Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Update vertex processing mode on _mesa_UseProgram.

2018-02-23 Thread Clayton Craft

On Fri, Feb 23, 2018 at 09:01:17PM +0100, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

Hi Clayton,

The following change fixes the reported problem on my site.
Please test/review!!

Just ran this through the CI and the previously failing test is now passing.

I can also set up the CI to automatically build a branch of your choice (e.g. in
your personal mesa repo) when you push to it, to help with testing your
commits before they are pushed to the master branch. Would you like for me to
do this?
If so, I just need to know the location of your repo and the branch you would
like pushes to trigger a CI build on.



best

Mathias


The change is a bug fix for 92d76a169:
 mesa: Provide an alternative to get_vp_mode()
that actually got exposed through 4562a7b0:
 vbo: Make use of _DrawVAO from the dlist code.

Fixes: KHR-GLES31.core.shader_image_load_store.advanced-sso-simple
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105229
Signed-off-by: Mathias Fröhlich 
---
src/mesa/main/shaderapi.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index d8a3031a38..b7bd7167e1 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2069,6 +2069,8 @@ use_program(GLuint program, bool no_error)
_mesa_BindProgramPipeline(ctx->Pipeline.Current->Name);
  }
   }
+
+   _mesa_update_vertex_processing_mode(ctx);
}


--
2.14.3



signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] spirv/i965/anv: Relax push constant offset assertions being 32-bit aligned

2018-02-23 Thread Jason Ekstrand
Assuming the CTS is still happy with it after those changes,

Reviewed-by: Jason Ekstrand 

On Fri, Feb 23, 2018 at 1:16 PM, Chema Casanova 
wrote:

> On 23/02/18 20:09, Jason Ekstrand wrote:
> > On Fri, Feb 23, 2018 at 1:26 AM, Jose Maria Casanova Crespo
> > mailto:jmcasan...@igalia.com>> wrote:
> >
> > The introduction of 16-bit types with VK_KHR_16bit_storages implies
> that
> > push constant offsets could be multiple of 2-bytes. Some assertions
> are
> > relaxed so offsets can be multiple of 4-bytes or multiple of size of
> the
> > base type.
> >
> > For 16-bit types, the push constant offset takes into account the
> > internal offset in the 32-bit uniform bucket adding 2-bytes when we
> > access
> > not 32-bit aligned elements. In all 32-bit aligned cases it just
> > becomes 0.
> > ---
> >  src/compiler/spirv/vtn_variables.c  |  1 -
> >  src/intel/compiler/brw_fs_nir.cpp   | 16
> +++-
> >  src/intel/vulkan/anv_nir_lower_push_constants.c |  2 --
> >  3 files changed, 11 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/compiler/spirv/vtn_variables.c
> > b/src/compiler/spirv/vtn_variables.c
> > index 81658afbd9..87236d0abd 100644
> > --- a/src/compiler/spirv/vtn_variables.c
> > +++ b/src/compiler/spirv/vtn_variables.c
> > @@ -760,7 +760,6 @@ _vtn_load_store_tail(struct vtn_builder *b,
> > nir_intrinsic_op op, bool load,
> > }
> >
> > if (op == nir_intrinsic_load_push_constant) {
> > -  vtn_assert(access_offset % 4 == 0);
> >
> >nir_intrinsic_set_base(instr, access_offset);
> >nir_intrinsic_set_range(instr, access_size);
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > index abf9098252..27611a21d0 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -3887,16 +3887,22 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder &bld, nir_intrinsic_instr *instr
> >break;
> >
> > case nir_intrinsic_load_uniform: {
> > -  /* Offsets are in bytes but they should always be multiples
> > of 4 */
> > -  assert(instr->const_index[0] % 4 == 0);
> > +  /* Offsets are in bytes but they should always be multiple of
> 4
> > +   * or multiple of the size of the destination type. 2 for
> 16-bits
> > +   * types.
> >
> > +   */
> > +  assert(instr->const_index[0] % 4 == 0 ||
> > + instr->const_index[0] % type_sz(dest.type) == 0);
> >
> >
> > Doubles are required to be 8-byte aligned so we can just have the dest
> > type size check.
>
> Changed locally.
>
> >
> >
> >fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type);
> >
> >nir_const_value *const_offset =
> > nir_src_as_const_value(instr->src[0]);
> >if (const_offset) {
> > - /* Offsets are in bytes but they should always be
> > multiples of 4 */
> > - assert(const_offset->u32[0] % 4 == 0);
> > - src.offset = const_offset->u32[0];
> > + assert(const_offset->u32[0] % 4 == 0 ||
> > +const_offset->u32[0] % type_sz(dest.type) == 0);
> >
> >
> > Same here.
>
> Changed locally.
>
> > + /* For 16-bit types we add the module of the const_index[0]
> > +  * offset to access to not 32-bit aligned element */
> > + src.offset = const_offset->u32[0] + instr->const_index[0]
> % 4;
> >
> >   for (unsigned j = 0; j < instr->num_components; j++) {
> >  bld.MOV(offset(dest, bld, j), offset(src, bld, j));
> > diff --git a/src/intel/vulkan/anv_nir_lower_push_constants.c
> > b/src/intel/vulkan/anv_nir_lower_push_constants.c
> > index b66552825b..ad60d0c824 100644
> > --- a/src/intel/vulkan/anv_nir_lower_push_constants.c
> > +++ b/src/intel/vulkan/anv_nir_lower_push_constants.c
> > @@ -41,8 +41,6 @@ anv_nir_lower_push_constants(nir_shader *shader)
> >  if (intrin->intrinsic != nir_intrinsic_load_push_
> constant)
> > continue;
> >
> > -assert(intrin->const_index[0] % 4 == 0);
> > -
> >  /* We just turn them into uniform loads */
> >  intrin->intrinsic = nir_intrinsic_load_uniform;
> >   }
> > --
> > 2.14.3
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/7] spirv: Calculate properly 16-bit vector sizes (v2)

2018-02-23 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Fri, Feb 23, 2018 at 1:30 PM, Jose Maria Casanova Crespo <
jmcasan...@igalia.com> wrote:

> Range in 16-bit push constants load was being calculated
> wrongly using 4-bytes per element instead of 2-bytes as it
> should be.
>
> v2: Use glsl_get_bit_size instead of if statement
> (Jason Ekstrand)
> ---
>  src/compiler/spirv/vtn_variables.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_
> variables.c
> index 78adab3ed2..e68551a336 100644
> --- a/src/compiler/spirv/vtn_variables.c
> +++ b/src/compiler/spirv/vtn_variables.c
> @@ -683,12 +683,9 @@ vtn_type_block_size(struct vtn_builder *b, struct
> vtn_type *type)
>if (cols > 1) {
>   vtn_assert(type->stride > 0);
>   return type->stride * cols;
> -  } else if (base_type == GLSL_TYPE_DOUBLE ||
> -base_type == GLSL_TYPE_UINT64 ||
> -base_type == GLSL_TYPE_INT64) {
> - return glsl_get_vector_elements(type->type) * 8;
>} else {
> - return glsl_get_vector_elements(type->type) * 4;
> + unsigned type_size = glsl_get_bit_size(type->type) / 8;
> + return glsl_get_vector_elements(type->type) * type_size;
>}
> }
>
> --
> 2.16.1
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] anv/spirv: SSBO/UBO buffers needs padding size is not multiple of 32-bits

2018-02-23 Thread Jason Ekstrand
On Fri, Feb 23, 2018 at 12:28 PM, Chema Casanova 
wrote:

>
>
> El 23/02/18 a las 17:26, Jason Ekstrand escribió:
> > On Fri, Feb 23, 2018 at 1:26 AM, Jose Maria Casanova Crespo
> > mailto:jmcasan...@igalia.com>> wrote:
> >
> > The surfaces that backup the GPU buffers have a boundary check that
> > considers that access to partial dwords are considered out-of-bounds.
> > For example is basic 16-bit cases of buffers with size 2 or 6 where
> the
> > last two bytes will always be read as 0 or its writting ignored.
> >
> > The introduction of 16-bit types implies that we need to align the
> size
> > to 4-bytes multiples so that partial dwords could be read/written.
> > Adding an inconditional +2 size to buffers not being multiple of 2
> > solves this issue for the general cases of UBO or SSBO.
> >
> > But, when unsized_arrays of 16-bit elements are used it is not
> possible
> > to know if the size was padded or not. To solve this issue the
> > implementation of SSBO calculates the needed size of the surface,
> > as suggested by Jason:
> >
> > surface_size = 2 * aling_u64(buffer_size, 4)  - buffer_size
> >
> > So when we calculate backwards the SpvOpArrayLenght with a nir
> expresion
> > when the array stride is not multiple of 4.
> >
> > array_size = (surface_size & ~3) - (surface_size & 3)
> >
> > It is also exposed this buffer requirements when robust buffer access
> > is enabled so these buffer sizes recommend being multiple of 4.
> > ---
> >
> > I have some doubts if vtn_variables.c is the best place to include
> > this specific to calculate the real buffer size as this is new
> > calculus seems to be quite HW dependent and maybe other drivers
> > different
> > to ANV don't need this kind of solution.
> >
> >  src/compiler/spirv/vtn_variables.c| 14 ++
> >  src/intel/vulkan/anv_descriptor_set.c | 16 
> >  src/intel/vulkan/anv_device.c | 11 +++
> >  3 files changed, 41 insertions(+)
> >
> > diff --git a/src/compiler/spirv/vtn_variables.c
> > b/src/compiler/spirv/vtn_variables.c
> > index 9eb85c24e9..78adab3ed2 100644
> > --- a/src/compiler/spirv/vtn_variables.c
> > +++ b/src/compiler/spirv/vtn_variables.c
> > @@ -2113,6 +2113,20 @@ vtn_handle_variables(struct vtn_builder *b,
> > SpvOp opcode,
> >nir_builder_instr_insert(&b->nb, &instr->instr);
> >nir_ssa_def *buf_size = &instr->dest.ssa;
> >
> > +  /* Calculate real length if padding was done to align the
> buffer
> > +   * to 32-bits. This only could happen is stride is not
> multiple
> > +   * of 4. Introduced to support 16-bit type unsized arrays in
> anv.
> > +   */
> > +  if (stride % 4) {
> > + buf_size = nir_isub(&b->nb,
> > + nir_iand(&b->nb,
> > +  buf_size,
> > +  nir_imm_int(&b->nb, ~3)),
> > + nir_iand (&b->nb,
> > +   buf_size,
> > +   nir_imm_int(&b->nb, 3)));
> >
> >
> > We can't do this in spirv_to_nir as it's also used by radv and they may
> > not have the same issue.  Instead, we need to handle it either in
> > anv_nir_apply_pipeline_layout or in the back-end compiler.  Doing it
> > here has the advantage that we can only do it in the "stride % 4 != 0"
> > case but I don't think the three instructions are all that big of a deal
> > given that we just did a send and are about to do an integer divide.  My
> > preference would be to put most of it in ISL and the back-end compiler
> > if we can.
>
> I've already had my doubts in my commit comment. So I'll implement it
> properly in the backend implementation of nir_intrinsic_get_buffer_size.
> I should have a look to that code before.
>
> >
> > +  }
> > +
> >/* array_length = max(buffer_size - offset, 0) / stride */
> >nir_ssa_def *array_length =
> >   nir_idiv(&b->nb,
> > diff --git a/src/intel/vulkan/anv_descriptor_set.c
> > b/src/intel/vulkan/anv_descriptor_set.c
> > index edb829601e..a97f2f37dc 100644
> > --- a/src/intel/vulkan/anv_descriptor_set.c
> > +++ b/src/intel/vulkan/anv_descriptor_set.c
> > @@ -704,6 +704,22 @@ anv_descriptor_set_write_buffer(struct
> > anv_descriptor_set *set,
> >bview->offset = buffer->offset + offset;
> >bview->range = anv_buffer_get_range(buffer, offset, range);
> >
> > +  /* Uniform and Storage buffers need to have surface size
> > +   * not less that the aligned 32-bit size of the buffer.
> > +   * To calculate the array lenght on unsized arrays
> > +   * in StorageBuffer the last 2 bits store the padding size
> > +   * added to the surface, s

[Mesa-dev] [PATCH 5/7] spirv: Calculate properly 16-bit vector sizes (v2)

2018-02-23 Thread Jose Maria Casanova Crespo
Range in 16-bit push constants load was being calculated
wrongly using 4-bytes per element instead of 2-bytes as it
should be.

v2: Use glsl_get_bit_size instead of if statement
(Jason Ekstrand)
---
 src/compiler/spirv/vtn_variables.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 78adab3ed2..e68551a336 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -683,12 +683,9 @@ vtn_type_block_size(struct vtn_builder *b, struct vtn_type 
*type)
   if (cols > 1) {
  vtn_assert(type->stride > 0);
  return type->stride * cols;
-  } else if (base_type == GLSL_TYPE_DOUBLE ||
-base_type == GLSL_TYPE_UINT64 ||
-base_type == GLSL_TYPE_INT64) {
- return glsl_get_vector_elements(type->type) * 8;
   } else {
- return glsl_get_vector_elements(type->type) * 4;
+ unsigned type_size = glsl_get_bit_size(type->type) / 8;
+ return glsl_get_vector_elements(type->type) * type_size;
   }
}
 
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Update vertex processing mode on _mesa_UseProgram.

2018-02-23 Thread Ilia Mirkin
On Fri, Feb 23, 2018 at 4:24 PM, Mark Janes  wrote:
>  writes:
>
>> From: Mathias Fröhlich 
>>
>> Hi Clayton,
>>
>> The following change fixes the reported problem on my site.
>> Please test/review!!
>>
>> best
>>
>> Mathias
>>
>>
>> The change is a bug fix for 92d76a169:
>>   mesa: Provide an alternative to get_vp_mode()
>> that actually got exposed through 4562a7b0:
>>   vbo: Make use of _DrawVAO from the dlist code.
>>
>> Fixes: KHR-GLES31.core.shader_image_load_store.advanced-sso-simple
>
> The "Fixes:" tag is supposed to indicate the git commit which regresses
> Mesa.  Release automation processes it when preparing branches for the
> stable release.
>
> https://www.mesa3d.org/submittingpatches.html#formatting
>
> In this case, the patch doesn't need to be applied to a stable branch.
> However, it may confuse the automation.

It's pretty common practice to do this... should be easy enough to
teach automation to skip if it's not a hash-looking thing.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Update vertex processing mode on _mesa_UseProgram.

2018-02-23 Thread Mark Janes
 writes:

> From: Mathias Fröhlich 
>
> Hi Clayton,
>
> The following change fixes the reported problem on my site.
> Please test/review!!
>
> best
>
> Mathias
>
>
> The change is a bug fix for 92d76a169:
>   mesa: Provide an alternative to get_vp_mode()
> that actually got exposed through 4562a7b0:
>   vbo: Make use of _DrawVAO from the dlist code.
>
> Fixes: KHR-GLES31.core.shader_image_load_store.advanced-sso-simple

The "Fixes:" tag is supposed to indicate the git commit which regresses
Mesa.  Release automation processes it when preparing branches for the
stable release.

https://www.mesa3d.org/submittingpatches.html#formatting

In this case, the patch doesn't need to be applied to a stable branch.
However, it may confuse the automation.

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105229
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/mesa/main/shaderapi.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
> index d8a3031a38..b7bd7167e1 100644
> --- a/src/mesa/main/shaderapi.c
> +++ b/src/mesa/main/shaderapi.c
> @@ -2069,6 +2069,8 @@ use_program(GLuint program, bool no_error)
>  _mesa_BindProgramPipeline(ctx->Pipeline.Current->Name);
>}
> }
> +
> +   _mesa_update_vertex_processing_mode(ctx);
>  }
>  
>  
> -- 
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Patch submit question

2018-02-23 Thread Kyriazis, George
Hello mesa-dev,

I have a quick patch submission question:

I have a patch that I’d like to also nominate to mesa-stable (specifically 
18.0.1 or 18.0.2), however the same patch is different between master and 
stable branches, due to neighboring code having changed in the meantime.

What’s the best process of submitting / pushing this sort of change?

Thank you!

George

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] spirv/i965/anv: Relax push constant offset assertions being 32-bit aligned

2018-02-23 Thread Chema Casanova
On 23/02/18 20:09, Jason Ekstrand wrote:
> On Fri, Feb 23, 2018 at 1:26 AM, Jose Maria Casanova Crespo
> mailto:jmcasan...@igalia.com>> wrote:
> 
> The introduction of 16-bit types with VK_KHR_16bit_storages implies that
> push constant offsets could be multiple of 2-bytes. Some assertions are
> relaxed so offsets can be multiple of 4-bytes or multiple of size of the
> base type.
> 
> For 16-bit types, the push constant offset takes into account the
> internal offset in the 32-bit uniform bucket adding 2-bytes when we
> access
> not 32-bit aligned elements. In all 32-bit aligned cases it just
> becomes 0.
> ---
>  src/compiler/spirv/vtn_variables.c              |  1 -
>  src/intel/compiler/brw_fs_nir.cpp               | 16 +++-
>  src/intel/vulkan/anv_nir_lower_push_constants.c |  2 --
>  3 files changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/src/compiler/spirv/vtn_variables.c
> b/src/compiler/spirv/vtn_variables.c
> index 81658afbd9..87236d0abd 100644
> --- a/src/compiler/spirv/vtn_variables.c
> +++ b/src/compiler/spirv/vtn_variables.c
> @@ -760,7 +760,6 @@ _vtn_load_store_tail(struct vtn_builder *b,
> nir_intrinsic_op op, bool load,
>     }
> 
>     if (op == nir_intrinsic_load_push_constant) {
> -      vtn_assert(access_offset % 4 == 0);
> 
>        nir_intrinsic_set_base(instr, access_offset);
>        nir_intrinsic_set_range(instr, access_size);
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index abf9098252..27611a21d0 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -3887,16 +3887,22 @@ fs_visitor::nir_emit_intrinsic(const
> fs_builder &bld, nir_intrinsic_instr *instr
>        break;
> 
>     case nir_intrinsic_load_uniform: {
> -      /* Offsets are in bytes but they should always be multiples
> of 4 */
> -      assert(instr->const_index[0] % 4 == 0);
> +      /* Offsets are in bytes but they should always be multiple of 4
> +       * or multiple of the size of the destination type. 2 for 16-bits
> +       * types.
> 
> +       */
> +      assert(instr->const_index[0] % 4 == 0 ||
> +             instr->const_index[0] % type_sz(dest.type) == 0);
> 
> 
> Doubles are required to be 8-byte aligned so we can just have the dest
> type size check.

Changed locally.

>  
> 
>        fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type);
> 
>        nir_const_value *const_offset =
> nir_src_as_const_value(instr->src[0]);
>        if (const_offset) {
> -         /* Offsets are in bytes but they should always be
> multiples of 4 */
> -         assert(const_offset->u32[0] % 4 == 0);
> -         src.offset = const_offset->u32[0];
> +         assert(const_offset->u32[0] % 4 == 0 ||
> +                const_offset->u32[0] % type_sz(dest.type) == 0);
> 
> 
> Same here.

Changed locally.

> +         /* For 16-bit types we add the module of the const_index[0]
> +          * offset to access to not 32-bit aligned element */
> +         src.offset = const_offset->u32[0] + instr->const_index[0] % 4;
> 
>           for (unsigned j = 0; j < instr->num_components; j++) {
>              bld.MOV(offset(dest, bld, j), offset(src, bld, j));
> diff --git a/src/intel/vulkan/anv_nir_lower_push_constants.c
> b/src/intel/vulkan/anv_nir_lower_push_constants.c
> index b66552825b..ad60d0c824 100644
> --- a/src/intel/vulkan/anv_nir_lower_push_constants.c
> +++ b/src/intel/vulkan/anv_nir_lower_push_constants.c
> @@ -41,8 +41,6 @@ anv_nir_lower_push_constants(nir_shader *shader)
>              if (intrin->intrinsic != nir_intrinsic_load_push_constant)
>                 continue;
> 
> -            assert(intrin->const_index[0] % 4 == 0);
> -
>              /* We just turn them into uniform loads */
>              intrin->intrinsic = nir_intrinsic_load_uniform;
>           }
> --
> 2.14.3
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Update vertex processing mode on _mesa_UseProgram.

2018-02-23 Thread Mathias Fröhlich
Hi Brian,

On Friday, 23 February 2018 21:04:54 CET Brian Paul wrote:
> LGTM.
> 
> Reviewed-by: Brian Paul 

Thanks!!
And pushed.

Mathias




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] i965/fs: Support 16-bit do_read_vector with VK_KHR_relaxed_block_layout

2018-02-23 Thread Jason Ekstrand
On Fri, Feb 23, 2018 at 1:26 AM, Jose Maria Casanova Crespo <
jmcasan...@igalia.com> wrote:

> 16-bit load_ubo/ssbo operations that call do_untyped_read_vector doesn't
> guarantee that offsets are multiple of 4-bytes as required by untyped_read
> message. This happens for example on 16-bit scalar arrays and in the case
> of f16vec3 when then VK_KHR_relaxed_block_layoud is enabled.
>
> Vectors reads when we have non-constant offsets are implemented with
> multiple byte_scattered_read messages that not require 32-bit aligned
> offsets.
> The same applies for constant offsets not aligned to 32-bits.
>
> Untyped_read_surface is used message when there is a constant offset
> 32-bit aligned and we have more than 1 component to read.
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 60 --
> -
>  1 file changed, 44 insertions(+), 16 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 8efec34cc9..45b8e8b637 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -2305,27 +2305,55 @@ do_untyped_vector_read(const fs_builder &bld,
> if (type_sz(dest.type) <= 2) {
>assert(dest.stride == 1);
>
> -  if (num_components > 1) {
> - /* Pairs of 16-bit components can be read with untyped read, for
> 16-bit
> -  * vec3 4th component is ignored.
> +  unsigned pending_components = num_components;
> +  unsigned first_component = 0;
> +  boolean is_const_offset = offset_reg.file == BRW_IMMEDIATE_VALUE;
> +  fs_reg read_offset;
> +  if (is_const_offset)
> + read_offset = offset_reg;
> +  else {
> + read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
> + bld.MOV(read_offset, offset_reg);
> +  }
> +  while (pending_components > 0 &&
> + (pending_components == 1 ||
> +  !is_const_offset ||
> +  (offset_reg.ud + first_component * 2) % 4)) {
>

I think this would be easier to read as

while (pending_components > 0) {
   if (!is_const_offset || (offset_reg.ud + first_component *
type_sz(dst.type)) % 4) {
  /* Do a single-component load with a byte message */
  first_compoent++;
  pending_components--;
   } else {
  /* Do a load with an untyped read */
  pending_components = 0;
  break;
   }
}

Or, alternatively,

if (is_const_offset) {
   uint32_t start = offset_reg.ud & 3;
   uint32_t end = offset_reg + num_components * type_sz(dst.type);
   end = ALIGN(end, 4);
   assert(end - start <= 16);
   /* Do a load with an untyped read and then throw away unneeded data at
the beginning and the end */
} else {
   /* Do the load using a loop and byte scattered reads */
}

This second one would require a small extension to
shuffle_32bit_load_result_to_16bit_data to handle throwing away data at the
start.


> + /* Non constant offsets, 16-bit scalars and constant offsets not
> +  * aligned 32-bits are read using one byte_scattered_read message
> +  * for eache  component untyped_read requires 32-bit aligned
> offsets.
>*/
>   fs_reg read_result =
> -emit_untyped_read(bld, surf_index, offset_reg,
> -  1 /* dims */, DIV_ROUND_UP(num_components,
> 2),
> -  BRW_PREDICATE_NONE);
> - shuffle_32bit_load_result_to_16bit_data(bld,
> -   retype(dest, BRW_REGISTER_TYPE_W),
> -   retype(read_result, BRW_REGISTER_TYPE_D),
> -   num_components);
> -  } else {
> - assert(num_components == 1);
> - /* scalar 16-bit are read using one byte_scattered_read message
> */
> - fs_reg read_result =
> -emit_byte_scattered_read(bld, surf_index, offset_reg,
> +emit_byte_scattered_read(bld, surf_index, read_offset,
>   1 /* dims */, 1,
>   type_sz(dest.type) * 8 /* bit_size
> */,
>   BRW_PREDICATE_NONE);
> - bld.MOV(dest, subscript(read_result, dest.type, 0));
> + shuffle_32bit_load_result_to_16bit_data(bld,
> +   retype(offset(dest, bld, first_component),
> BRW_REGISTER_TYPE_W),
> +   retype(read_result, BRW_REGISTER_TYPE_D),
> +   1);
> + pending_components--;
> + first_component ++;
> + if (is_const_offset)
> +read_offset.ud += 2;
> + else
> +bld.ADD(read_offset, offset_reg, brw_imm_ud(2 *
> first_component));
> +  }
> +  assert(pending_components != 1);
> +  if (pending_components > 1) {
> + assert (is_const_offset &&
> + (offset_reg.ud + first_component * 2) % 4 == 0);
> + /* At this point we have multiple 16-bit components that have
> constant
> +  * offset multiple of 4-bytes that can be read with
> untyped_reads.
> +  */
> +

Re: [Mesa-dev] [PATCH 1/7] anv/spirv: SSBO/UBO buffers needs padding size is not multiple of 32-bits

2018-02-23 Thread Chema Casanova


El 23/02/18 a las 17:26, Jason Ekstrand escribió:
> On Fri, Feb 23, 2018 at 1:26 AM, Jose Maria Casanova Crespo
> mailto:jmcasan...@igalia.com>> wrote:
> 
> The surfaces that backup the GPU buffers have a boundary check that
> considers that access to partial dwords are considered out-of-bounds.
> For example is basic 16-bit cases of buffers with size 2 or 6 where the
> last two bytes will always be read as 0 or its writting ignored.
> 
> The introduction of 16-bit types implies that we need to align the size
> to 4-bytes multiples so that partial dwords could be read/written.
> Adding an inconditional +2 size to buffers not being multiple of 2
> solves this issue for the general cases of UBO or SSBO.
> 
> But, when unsized_arrays of 16-bit elements are used it is not possible
> to know if the size was padded or not. To solve this issue the
> implementation of SSBO calculates the needed size of the surface,
> as suggested by Jason:
> 
> surface_size = 2 * aling_u64(buffer_size, 4)  - buffer_size
> 
> So when we calculate backwards the SpvOpArrayLenght with a nir expresion
> when the array stride is not multiple of 4.
> 
> array_size = (surface_size & ~3) - (surface_size & 3)
> 
> It is also exposed this buffer requirements when robust buffer access
> is enabled so these buffer sizes recommend being multiple of 4.
> ---
> 
> I have some doubts if vtn_variables.c is the best place to include
> this specific to calculate the real buffer size as this is new
> calculus seems to be quite HW dependent and maybe other drivers
> different
> to ANV don't need this kind of solution.
> 
>  src/compiler/spirv/vtn_variables.c    | 14 ++
>  src/intel/vulkan/anv_descriptor_set.c | 16 
>  src/intel/vulkan/anv_device.c         | 11 +++
>  3 files changed, 41 insertions(+)
> 
> diff --git a/src/compiler/spirv/vtn_variables.c
> b/src/compiler/spirv/vtn_variables.c
> index 9eb85c24e9..78adab3ed2 100644
> --- a/src/compiler/spirv/vtn_variables.c
> +++ b/src/compiler/spirv/vtn_variables.c
> @@ -2113,6 +2113,20 @@ vtn_handle_variables(struct vtn_builder *b,
> SpvOp opcode,
>        nir_builder_instr_insert(&b->nb, &instr->instr);
>        nir_ssa_def *buf_size = &instr->dest.ssa;
> 
> +      /* Calculate real length if padding was done to align the buffer
> +       * to 32-bits. This only could happen is stride is not multiple
> +       * of 4. Introduced to support 16-bit type unsized arrays in anv.
> +       */
> +      if (stride % 4) {
> +         buf_size = nir_isub(&b->nb,
> +                             nir_iand(&b->nb,
> +                                      buf_size,
> +                                      nir_imm_int(&b->nb, ~3)),
> +                             nir_iand (&b->nb,
> +                                       buf_size,
> +                                       nir_imm_int(&b->nb, 3)));
> 
> 
> We can't do this in spirv_to_nir as it's also used by radv and they may
> not have the same issue.  Instead, we need to handle it either in
> anv_nir_apply_pipeline_layout or in the back-end compiler.  Doing it
> here has the advantage that we can only do it in the "stride % 4 != 0"
> case but I don't think the three instructions are all that big of a deal
> given that we just did a send and are about to do an integer divide.  My
> preference would be to put most of it in ISL and the back-end compiler
> if we can.

I've already had my doubts in my commit comment. So I'll implement it
properly in the backend implementation of nir_intrinsic_get_buffer_size.
I should have a look to that code before.

> 
> +      }
> +
>        /* array_length = max(buffer_size - offset, 0) / stride */
>        nir_ssa_def *array_length =
>           nir_idiv(&b->nb,
> diff --git a/src/intel/vulkan/anv_descriptor_set.c
> b/src/intel/vulkan/anv_descriptor_set.c
> index edb829601e..a97f2f37dc 100644
> --- a/src/intel/vulkan/anv_descriptor_set.c
> +++ b/src/intel/vulkan/anv_descriptor_set.c
> @@ -704,6 +704,22 @@ anv_descriptor_set_write_buffer(struct
> anv_descriptor_set *set,
>        bview->offset = buffer->offset + offset;
>        bview->range = anv_buffer_get_range(buffer, offset, range);
> 
> +      /* Uniform and Storage buffers need to have surface size
> +       * not less that the aligned 32-bit size of the buffer.
> +       * To calculate the array lenght on unsized arrays
> +       * in StorageBuffer the last 2 bits store the padding size
> +       * added to the surface, so we can calculate latter the original
> +       * buffer size to know the number of elements.
> +       *
> +       *  surface_size = 2 * aling_u64(buffer_size, 4)  - buffer_size
> +       *
> +       *  array_size = (surface_size & ~

Re: [Mesa-dev] [PATCH 3/7] i965/fs: Support 16-bit store_ssbo with VK_KHR_relaxed_block_layout

2018-02-23 Thread Jason Ekstrand
On Fri, Feb 23, 2018 at 1:26 AM, Jose Maria Casanova Crespo <
jmcasan...@igalia.com> wrote:

> Restrict the use of untyped_surface_write with 16-bit pairs in
> ssbo to the cases where we can guarantee that offset is multiple
> of 4.
>
> Taking into account that VK_KHR_relaxed_block_layout is available
> in ANV we can only guarantee that when we have a constant offset
> that is multiple of 4. For non constant offsets we will always use
> byte_scattered_write.
>

I double-checked the rules and we can't even guarantee that a f16vec2 is
dword-aligned.


> ---
>  src/intel/compiler/brw_fs_nir.cpp | 22 +++---
>  1 file changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 45b8e8b637..abf9098252 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4135,6 +4135,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
>   unsigned num_components = ffs(~(writemask >> first_component)) -
> 1;
>   fs_reg write_src = offset(val_reg, bld, first_component);
>
> + nir_const_value *const_offset = nir_src_as_const_value(instr->
> src[2]);
> +
>   if (type_size > 4) {
>  /* We can't write more than 2 64-bit components at once. Limit
>   * the num_components of the write to what we can do and let
> the next
> @@ -4150,14 +4152,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
>   * 32-bit-aligned we need to use byte-scattered writes because
>   * untyped writes works with 32-bit components with 32-bit
>   * alignment. byte_scattered_write messages only support one
> - * 16-bit component at a time.
> + * 16-bit component at a time. As VK_KHR_relaxed_block_layout
> + * could be enabled we can not guarantee that not constant
> offsets
> + * to be 32-bit aligned for 16-bit types. For example an
> array, of
> + * 16-bit vec3 with array element stride of 6.
>   *
> - * For example, if there is a 3-components vector we submit
> one
> - * untyped-write message of 32-bit (first two components),
> and one
> - * byte-scattered write message (the last component).
> + * In the case of 32-bit aligned constant offsets if there is
> + * a 3-components vector we submit one untyped-write message
> + * of 32-bit (first two components), and one byte-scattered
> + * write message (the last component).
>   */
>
> -if (first_component % 2) {
> +if ( !const_offset || ((const_offset->u32[0] +
> +   type_size * first_component) % 4)) {
> /* If we use a .yz writemask we also need to emit 2
>  * byte-scattered write messages because of y-component not
>  * being aligned to 32-bit.
> @@ -4183,7 +4190,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
>   }
>
>   fs_reg offset_reg;
> - nir_const_value *const_offset = nir_src_as_const_value(instr->
> src[2]);
> +
>   if (const_offset) {
>  offset_reg = brw_imm_ud(const_offset->u32[0] +
>  type_size * first_component);
> @@ -4222,7 +4229,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
>   } else {
>  assert(num_components * type_size <= 16);
>  assert((num_components * type_size) % 4 == 0);
> -assert((first_component * type_size) % 4 == 0);
> +assert(!const_offset ||
> +   (const_offset->u32[0] + type_size * first_component) %
> 4 == 0);
>

How about

assert(offset_reg.file != BRW_IMMEDIATE_VALUE || offset_reg.ud % 4 == 0);

We've already done the above calculation and stored it in offset_reg.

Reviewed-by: Jason Ekstrand 


>  unsigned num_slots = (num_components * type_size) / 4;
>
>  emit_untyped_write(bld, surf_index, offset_reg,
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Update vertex processing mode on _mesa_UseProgram.

2018-02-23 Thread Brian Paul

On 02/23/2018 01:01 PM, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

Hi Clayton,

The following change fixes the reported problem on my site.
Please test/review!!

best

Mathias


The change is a bug fix for 92d76a169:
   mesa: Provide an alternative to get_vp_mode()
that actually got exposed through 4562a7b0:
   vbo: Make use of _DrawVAO from the dlist code.

Fixes: KHR-GLES31.core.shader_image_load_store.advanced-sso-simple
Bugzilla: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D105229&d=DwIDaQ&c=uilaK90D4TOVoH58JNXRgQ&r=Ie7_encNUsqxbSRbqbNgofw0ITcfE8JKfaUjIQhncGA&m=LBvaMymXijZu_rp52IqZ2veWUoxN9q6Gqh2JNWfIPc4&s=61pPMG-_G1myc2dJVu_9Ar9ghESnHBQE61Z9O4TQrAo&e=
Signed-off-by: Mathias Fröhlich 
---
  src/mesa/main/shaderapi.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index d8a3031a38..b7bd7167e1 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2069,6 +2069,8 @@ use_program(GLuint program, bool no_error)
  _mesa_BindProgramPipeline(ctx->Pipeline.Current->Name);
}
 }
+
+   _mesa_update_vertex_processing_mode(ctx);
  }
  
  



LGTM.

Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Update vertex processing mode on _mesa_UseProgram.

2018-02-23 Thread Mathias . Froehlich
From: Mathias Fröhlich 

Hi Clayton,

The following change fixes the reported problem on my site.
Please test/review!!

best

Mathias


The change is a bug fix for 92d76a169:
  mesa: Provide an alternative to get_vp_mode()
that actually got exposed through 4562a7b0:
  vbo: Make use of _DrawVAO from the dlist code.

Fixes: KHR-GLES31.core.shader_image_load_store.advanced-sso-simple
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105229
Signed-off-by: Mathias Fröhlich 
---
 src/mesa/main/shaderapi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index d8a3031a38..b7bd7167e1 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2069,6 +2069,8 @@ use_program(GLuint program, bool no_error)
 _mesa_BindProgramPipeline(ctx->Pipeline.Current->Name);
   }
}
+
+   _mesa_update_vertex_processing_mode(ctx);
 }
 
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] ac: use correct LLVM opcodes for ordered comparisons

2018-02-23 Thread Marek Olšák
On Fri, Feb 23, 2018 at 8:18 PM, Bas Nieuwenhuizen
 wrote:
> On Fri, Feb 23, 2018 at 6:39 PM, Connor Abbott  wrote:
>> On Fri, Feb 23, 2018 at 8:30 AM, Bas Nieuwenhuizen  
>> wrote:
>>>
>>>
>>> On Thu, Feb 15, 2018 at 8:54 AM, Connor Abbott  wrote:

 On Wed, Feb 14, 2018 at 11:53 PM, Timothy Arceri 
 wrote:
 >
 >
 > On 15/02/18 04:39, Marek Olšák wrote:
 >>
 >> Reviewed-by: Marek Olšák 
 >>
 >> Marek
 >>
 >> On Wed, Feb 14, 2018 at 7:29 AM, Timothy Arceri 
 >> wrote:
 >>>
 >>> Fixes glsl-1.30/execution/isinf-and-isnan* piglit tests for
 >>> radeonsi and should fix SPIRV errors when LLVM optimises away
 >>> the workarounds in vtn_handle_alu() for handling ordered
 >>> comparisons.
 >>>
 >>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104905
 >>> ---
 >>>   src/amd/common/ac_nir_to_llvm.c | 8 
 >>>   1 file changed, 4 insertions(+), 4 deletions(-)
 >>>
 >>> diff --git a/src/amd/common/ac_nir_to_llvm.c
 >>> b/src/amd/common/ac_nir_to_llvm.c
 >>> index a0c5680205..e81f86bb08 100644
 >>> --- a/src/amd/common/ac_nir_to_llvm.c
 >>> +++ b/src/amd/common/ac_nir_to_llvm.c
 >>> @@ -1792,16 +1792,16 @@ static void visit_alu(struct ac_nir_context
 >>> *ctx,
 >>> const nir_alu_instr *instr)
 >>>  result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0],
 >>> src[1]);
 >>>  break;
 >>>  case nir_op_feq:
 >>> -   result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0],
 >>> src[1]);
 >>> +   result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0],
 >>> src[1]);
 >>>  break;
 >>>  case nir_op_fne:
 >>> -   result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0],
 >>> src[1]);
 >>> +   result = emit_float_cmp(&ctx->ac, LLVMRealONE, src[0],
 >>> src[1]);
 >
 >
 > It seems we need to leave this one as is to avoid regressions. This is
 > also
 > what radeonsi does.

 So, the thing you have to understand is that in LLVM unordered
 comparisons are precisely the inverse of the ordered comparisons. That
 is, (a <=(ordered) b) == !(a >(unordered b), (a ==(ordered) b) == !(a
 !=(unordered) b), and  so on. C defines that all comparsions are
 ordered except !=, so that (a == b) == !(a != b) always holds true.
 Most hardware follows this convention -- offhand, x86 SSE is the only
 ISA I know of with separate ordered and unordered comparisons, and
 LLVM appears to have copied the distinction from them, but no one else
 has both. I'm not even sure if it's in the IEEE spec. GLSL follows the
 C convention, so glsl_to_nir just uses nir_op_fne to mean unordered
 not-equal. spirv_to_nir generates some extra instructions, which then
 get stripped away later... sigh.

 I think the right way to untangle this mess is to define that the NIR
 opcodes should always match the C convention. The separate ordered and
 unordered opcodes are unnecesary, since one is just the logical
 negation of the other, and LLVM was a little overzealous -- I'm sure
 they would get rid of the distinction if they had the chance -- and
 then they were blindly copied to SPIR-V. spirv_to_nir should just
 negate the result if necessary rather than emitting the extra code to
 handle NaN, and ac should use ordered except for not-equals.
>>>
>>>
>>> I think we should also use ordered for not-equal. Otherwise we have no way
>>> to contruct an unordered equal  or ordered not-equal using the not-trick. I
>>> think that would be more important than trying to keep it in sync with C?
>>
>> I was thinking about that too... but all the backends (except for
>> radv), frontends, opt_algebraic patterns, etc. currently assume fne
>> means unordered not-equals. We'd have to rewrite a lot of stuff to
>> flip the meaning. But if you're willing to do all the mechanical
>> rewriting, sure :).
>
> Well, nir_opt_algebraic completely disregards whether it is ordered
> and will happily flip it the wrong way
> (https://cgit.freedesktop.org/mesa/mesa/tree/src/compiler/nir/nir_opt_algebraic.py#n137)
> and it seems like spirv does try to be extra careful by doing the NaN
> checks manually for all the comparisons (but in the process still
> relies on feq being ordered). So I'd argue that we need some
> non-backend work either way.
>
> I agree that switching over all backends is annoying though,
> especially because not all of them have a great instruction selector
> that we can trust to optimize out any of the not's if we don't lower
> them in nir_opt_algebraic to instructions with explicit orderedness.
> So radv switching over to what intel uses seems OK for now.
>
> btw should we document this somewhere? Either in the instruction name,
> or at least in the definition in nir_opcodes.py?

I guess we 

[Mesa-dev] [PATCH] st/mesa: expose 0 shader binary formats for compat profiles for Qt

2018-02-23 Thread Marek Olšák
From: Marek Olšák 

Bugzilla: https://bugreports.qt.io/browse/QTBUG-66420
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105065
Cc: "18.0" 
---
 src/mesa/state_tracker/st_context.c|  2 +-
 src/mesa/state_tracker/st_extensions.c | 14 +++---
 src/mesa/state_tracker/st_extensions.h |  3 ++-
 src/mesa/state_tracker/st_manager.c|  2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index e23f9fd..dfdd92f 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -461,21 +461,21 @@ st_create_context_priv(struct gl_context *ctx, struct 
pipe_context *pipe,
   screen->get_param(screen, PIPE_CAP_TGSI_PACK_HALF_FLOAT);
st->has_multi_draw_indirect =
   screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT);
 
st->has_hw_atomics =
   screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS)
   ? true : false;
 
/* GL limits and extensions */
-   st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions);
+   st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API);
st_init_extensions(pipe->screen, &ctx->Const,
   &ctx->Extensions, &st->options, ctx->API);
 
if (st_have_perfmon(st)) {
   ctx->Extensions.AMD_performance_monitor = GL_TRUE;
}
 
/* Enable shader-based fallbacks for ARB_color_buffer_float if needed. */
if (screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_UNCLAMPED)) {
   if (!screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_CLAMPED)) {
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 42d53cb..4bd5404 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -68,21 +68,22 @@ static int _clamp(int a, int min, int max)
else
   return a;
 }
 
 
 /**
  * Query driver to get implementation limits.
  * Note that we have to limit/clamp against Mesa's internal limits too.
  */
 void st_init_limits(struct pipe_screen *screen,
-struct gl_constants *c, struct gl_extensions *extensions)
+struct gl_constants *c, struct gl_extensions *extensions,
+gl_api api)
 {
int supported_irs;
unsigned sh;
boolean can_ubo = TRUE;
int temp;
bool ssbo_atomic = true;
 
c->MaxTextureLevels
   = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
 MAX_TEXTURE_LEVELS);
@@ -430,22 +431,29 @@ void st_init_limits(struct pipe_screen *screen,
  c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks +
  c->Program[MESA_SHADER_COMPUTE].MaxUniformBlocks;
   assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
}
 
c->GLSLFragCoordIsSysVal =
   screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
c->GLSLFrontFacingIsSysVal =
   screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
 
-   /* GL_ARB_get_program_binary */
-   if (screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen))
+   /* GL_ARB_get_program_binary
+*
+* The QT framework has a bug in their shader program cache, which is built
+* on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
+* we don't enable more than 1 binary format for compatibility profiles.
+* This is only being done on the 18.0 release branch.
+*/
+   if (api != API_OPENGL_COMPAT &&
+   screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen))
   c->NumProgramBinaryFormats = 1;
 
c->MaxAtomicBufferBindings =
   c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
 
if (!ssbo_atomic) {
   /* for separate atomic buffers - there atomic buffer size will be
  limited */
   c->MaxAtomicBufferSize = 
c->Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters * ATOMIC_COUNTER_SIZE;
   /* on all HW with separate atomic (evergreen) the following
diff --git a/src/mesa/state_tracker/st_extensions.h 
b/src/mesa/state_tracker/st_extensions.h
index 7bf1aa8..fdfac7e 100644
--- a/src/mesa/state_tracker/st_extensions.h
+++ b/src/mesa/state_tracker/st_extensions.h
@@ -28,20 +28,21 @@
 
 #ifndef ST_EXTENSIONS_H
 #define ST_EXTENSIONS_H
 
 
 struct st_context;
 struct pipe_screen;
 
 extern void st_init_limits(struct pipe_screen *screen,
struct gl_constants *c,
-   struct gl_extensions *extensions);
+   struct gl_extensions *extensions,
+   gl_api api);
 
 extern void st_init_extensions(struct pipe_screen *screen,
struct gl_constants *consts,
struct gl_extensions *extensions,
struct st_config_options *options,
gl_api api);
 
 

Re: [Mesa-dev] Killing broken configure toggles and fixing autotools gl.pc

2018-02-23 Thread Brian Paul

On 02/23/2018 12:31 PM, Emil Velikov wrote:

Hi all,

While fixing a gl.pc + glvnd bug (well sort of), I noticed the
following:

With GLVND, we started abusing GL_LIB a lot more. Currently autotools
can do a) custom libGL name, b) mangled GL and
c) GLVND.

Turns out that a + b was broken, since b was introduced. There is very
limited reason why a would exist and since it's broken I'm just nuking
it.

OSMesa was in the same a/b shape so it gets the same axe.

Note: now configure will error out if anyone explicitly touches the
toggles.
Note: the whole series should be applicable for stable - I will check
the exact commits/branches and annotate accordingly

WARNINING: The series isn't even build tested, but I'm sending it out
for feedback since the topic be a bit ...

Any feedback, even a general ACK will be appreciated.
Emil

Cc: Brian Paul 
Cc: Dylan Baker 



Sounds OK to me.  Maybe you could add a note about this in the 
appropriate release notes file.


Acked-by: Brian Paul 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   >