[Mesa-dev] [PATCH] nir: fix clip cull lowering to not assert if GLSL already lowered.

2018-10-14 Thread Kenneth Graunke
From: Dave Airlie 

If GLSL has already done the lowering, we'd rather not crash in this pass.

Reviewed-by: Kenneth Graunke 
---
 src/compiler/nir/nir_lower_clip_cull_distance_arrays.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c 
b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
index 86ce5fb1f86..2afbf9285c0 100644
--- a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
+++ b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
@@ -144,6 +144,12 @@ combine_clip_cull(nir_shader *nir,
  cull = var;
}
 
+   /* if the GLSL lowering pass has already run, don't bother repeating */
+   if (!cull && clip) {
+  if (!glsl_type_is_array(clip->type))
+ return false;
+   }
+
const unsigned clip_array_size = get_unwrapped_array_length(nir, clip);
const unsigned cull_array_size = get_unwrapped_array_length(nir, cull);
 
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: Pass index to pipe->create_query() for statistics queries.

2018-10-14 Thread Kenneth Graunke
GL exposes separate queries for each pipeline statistics counter.
For some reason, Gallium chose to map them all to a single target,
PIPE_QUERY_PIPELINE_STATISTICS.  Radeon hardware appears to query
them all as a group.  pipe->get_query_result_resource() takes an
index, indicating which to write to the buffer.  The CPU-side hook,
pipe->get_query_result(), simply writes them all, and st/mesa returns
the one that was actually desired.

On Intel hardware, each individual pipeline statistics value is handled
as a separate counter and query.  We can query each individually, and
that is more efficient than querying all 11 counters each time.  But,
we need pipe->get_query_result() to know which one to return.

To handle this, we pass the index into pipe->create_query(), which
was previously always 0 for these queries.  Drivers which return all
of the counters as a group can simply ignore it; drivers querying one
at a time can use it to distinguish between the counters.

This is the least invasive fix, but it is kind of ugly, and I wonder
whether we'd be better off just adding PIPE_QUERY_IA_VERTICES (etc.)
targets...
---
 src/mesa/state_tracker/st_cb_queryobj.c | 76 -
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
b/src/mesa/state_tracker/st_cb_queryobj.c
index 69e6004c3f1..0dc06ceb574 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -88,6 +88,40 @@ st_DeleteQuery(struct gl_context *ctx, struct 
gl_query_object *q)
free(stq);
 }
 
+static int
+target_to_index(const struct gl_query_object *q)
+{
+   switch (q->Target) {
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  return q->Stream;
+   case GL_VERTICES_SUBMITTED_ARB:
+  return 0;
+   case GL_PRIMITIVES_SUBMITTED_ARB:
+  return 1;
+   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+  return 2;
+   case GL_GEOMETRY_SHADER_INVOCATIONS:
+  return 3;
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+  return 4;
+   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+  return 5;
+   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+  return 6;
+   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+  return 7;
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+  return 8;
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+  return 9;
+   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+  return 10;
+   default:
+  return 0;
+   }
+}
 
 static void
 st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
@@ -164,7 +198,7 @@ st_BeginQuery(struct gl_context *ctx, struct 
gl_query_object *q)
  ret = pipe->end_query(pipe, stq->pq_begin);
} else {
   if (!stq->pq) {
- stq->pq = pipe->create_query(pipe, type, q->Stream);
+ stq->pq = pipe->create_query(pipe, type, target_to_index(q));
  stq->type = type;
   }
   if (stq->pq)
@@ -383,46 +417,8 @@ st_StoreQueryResult(struct gl_context *ctx, struct 
gl_query_object *q,
 
if (pname == GL_QUERY_RESULT_AVAILABLE) {
   index = -1;
-   } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
-  switch (q->Target) {
-  case GL_VERTICES_SUBMITTED_ARB:
- index = 0;
- break;
-  case GL_PRIMITIVES_SUBMITTED_ARB:
- index = 1;
- break;
-  case GL_VERTEX_SHADER_INVOCATIONS_ARB:
- index = 2;
- break;
-  case GL_GEOMETRY_SHADER_INVOCATIONS:
- index = 3;
- break;
-  case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
- index = 4;
- break;
-  case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
- index = 5;
- break;
-  case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
- index = 6;
- break;
-  case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
- index = 7;
- break;
-  case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
- index = 8;
- break;
-  case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- index = 9;
- break;
-  case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
- index = 10;
- break;
-  default:
- unreachable("Unexpected target");
-  }
} else {
-  index = 0;
+  index = target_to_index(q);
}
 
pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,
-- 
2.19.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/15] radv: add support for VK_EXT_transform_feedback

2018-10-14 Thread Andre Heider

Hi,

On 13/10/2018 14:57, Samuel Pitoiset wrote:

Hi,

This series implements VK_EXT_transform_feedback for RADV. We tested it
quite a lot with DXVK and also with RenderDoc, it should be stable enough.

Please review, thanks!


on Tonga, this series is:

Tested-by: Andre Heider 

On wine/dxvk/witcher3 I can now see those annoying sirens too ;)

Thanks,
Andre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/13] nir: Add a pass for gathering transform feedback info

2018-10-14 Thread Alejandro Piñeiro
Just in case you missed my Rb the first time you sent this patch:

Reviewed-by: Alejandro Piñeiro 


On 13/10/18 15:09, Jason Ekstrand wrote:
> This is different from the GL_ARB_spirv pass because it generates a much
> simpler data structure that isn't tied to OpenGL and mtypes.h.
> ---
>  src/compiler/Makefile.sources  |   4 +-
>  src/compiler/nir/meson.build   |   2 +
>  src/compiler/nir/nir_gather_xfb_info.c | 150 +
>  src/compiler/nir/nir_xfb_info.h|  59 ++
>  4 files changed, 214 insertions(+), 1 deletion(-)
>  create mode 100644 src/compiler/nir/nir_gather_xfb_info.c
>  create mode 100644 src/compiler/nir/nir_xfb_info.h
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index d3b06564832..46ed5e47b46 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -216,6 +216,7 @@ NIR_FILES = \
>   nir/nir_format_convert.h \
>   nir/nir_from_ssa.c \
>   nir/nir_gather_info.c \
> + nir/nir_gather_xfb_info.c \
>   nir/nir_gs_count_vertices.c \
>   nir/nir_inline_functions.c \
>   nir/nir_instr_set.c \
> @@ -307,7 +308,8 @@ NIR_FILES = \
>   nir/nir_validate.c \
>   nir/nir_vla.h \
>   nir/nir_worklist.c \
> - nir/nir_worklist.h
> + nir/nir_worklist.h \
> + nir/nir_xfb_info.h
>  
>  SPIRV_GENERATED_FILES = \
>   spirv/spirv_info.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index 090aa7a628f..b416e561eb0 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -100,6 +100,7 @@ files_libnir = files(
>'nir_format_convert.h',
>'nir_from_ssa.c',
>'nir_gather_info.c',
> +  'nir_gather_xfb_info.c',
>'nir_gs_count_vertices.c',
>'nir_inline_functions.c',
>'nir_instr_set.c',
> @@ -192,6 +193,7 @@ files_libnir = files(
>'nir_vla.h',
>'nir_worklist.c',
>'nir_worklist.h',
> +  'nir_xfb_info.h',
>'../spirv/GLSL.ext.AMD.h',
>'../spirv/GLSL.std.450.h',
>'../spirv/gl_spirv.c',
> diff --git a/src/compiler/nir/nir_gather_xfb_info.c 
> b/src/compiler/nir/nir_gather_xfb_info.c
> new file mode 100644
> index 000..a53703bb9bf
> --- /dev/null
> +++ b/src/compiler/nir/nir_gather_xfb_info.c
> @@ -0,0 +1,150 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir_xfb_info.h"
> +
> +#include 
> +
> +static void
> +add_var_xfb_outputs(nir_xfb_info *xfb,
> +nir_variable *var,
> +unsigned *location,
> +unsigned *offset,
> +const struct glsl_type *type)
> +{
> +   if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
> +  unsigned length = glsl_get_length(type);
> +  const struct glsl_type *child_type = glsl_get_array_element(type);
> +  for (unsigned i = 0; i < length; i++)
> + add_var_xfb_outputs(xfb, var, location, offset, child_type);
> +   } else if (glsl_type_is_struct(type)) {
> +  unsigned length = glsl_get_length(type);
> +  for (unsigned i = 0; i < length; i++) {
> + const struct glsl_type *child_type = glsl_get_struct_field(type, i);
> + add_var_xfb_outputs(xfb, var, location, offset, child_type);
> +  }
> +   } else {
> +  assert(var->data.xfb_buffer < NIR_MAX_XFB_BUFFERS);
> +  if (xfb->buffers_written & (1 << var->data.xfb_buffer)) {
> + assert(xfb->strides[var->data.xfb_buffer] == var->data.xfb_stride);
> + assert(xfb->buffer_to_stream[var->data.xfb_buffer] == 
> var->data.stream);
> +  } else {
> + xfb->buffers_written |= (1 << var->data.xfb_buffer);
> + xfb->strides[var->data.xfb_buffer] = var->data.xfb_stride;
> + xfb->buffer_to_stream[var->data.xfb_buffer] = var->data.stream;
> +

[Mesa-dev] [PATCH 2/3] anv: Stop generating weak references for instance entrypoints

2018-10-14 Thread Jason Ekstrand
We don't need weak references to instance entrypoints because we never
have more than one of each so we don't need the NULL fall-back.  This
also helps us avoid forgetting things because we now get link errors for
missing instance entrypoints.
---
 src/intel/vulkan/anv_entrypoints_gen.py | 13 -
 1 file changed, 13 deletions(-)

diff --git a/src/intel/vulkan/anv_entrypoints_gen.py 
b/src/intel/vulkan/anv_entrypoints_gen.py
index beb658b8660..25a532fd706 100644
--- a/src/intel/vulkan/anv_entrypoints_gen.py
+++ b/src/intel/vulkan/anv_entrypoints_gen.py
@@ -227,19 +227,6 @@ ${strmap(device_strmap, 'device')}
  * either pick the correct entry point.
  */
 
-% for e in instance_entrypoints:
-  % if e.alias:
-<% continue %>
-  % endif
-  % if e.guard is not None:
-#ifdef ${e.guard}
-  % endif
-  ${e.return_type} ${e.prefixed_name('anv')}(${e.decl_params()}) __attribute__ 
((weak));
-  % if e.guard is not None:
-#endif // ${e.guard}
-  % endif
-% endfor
-
 const struct anv_instance_dispatch_table anv_instance_dispatch_table = {
 % for e in instance_entrypoints:
   % if e.guard is not None:
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] anv: Add a dummy implementation of GetPhysicalDevicePresentRectanglesKHR

2018-10-14 Thread Jason Ekstrand
Not really sure what we're supposed to do with this one but we should do
something.
---
 src/intel/vulkan/anv_wsi.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 5ed1d711689..4f3f3fc57af 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -294,3 +294,14 @@ VkResult anv_GetDeviceGroupSurfacePresentModesKHR(
 
return VK_SUCCESS;
 }
+
+VkResult anv_GetPhysicalDevicePresentRectanglesKHR(
+VkPhysicalDevicephysicalDevice,
+VkSurfaceKHRsurface,
+uint32_t*   pRectCount,
+VkRect2D*   pRects)
+{
+   /* TODO: What should I be doing here? */
+   *pRectCount = 0;
+   return VK_SUCCESS;
+}
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] anv: Define trampolines as the weak functions

2018-10-14 Thread Jason Ekstrand
Instead of having weak references to the anv functions and separate
trampoline functions with their own dispatch table, just make the
trampoline functions weak.  This gets rid of a dispatch table and
potentially lets the compiler delete the unused weak function.  The
end result is a reduction in the .text section of 5.7K and a reduction
in the .data section of 1.4K.

Before:

   textdata bss dec hex filename
3190329  2822328960 3481521  351fb1 _install/lib64/libvulkan_intel.so

After:

   textdata bss dec hex filename
3184548  2807928960 3474300  35037c _install/lib64/libvulkan_intel.so
---
 src/intel/vulkan/anv_device.c   |  5 +-
 src/intel/vulkan/anv_entrypoints_gen.py | 65 -
 2 files changed, 21 insertions(+), 49 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index a2551452eb1..f68226217b4 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -653,12 +653,9 @@ VkResult anv_CreateInstance(
   if (!anv_device_entrypoint_is_enabled(i, instance->app_info.api_version,
 &instance->enabled_extensions, 
NULL)) {
  instance->device_dispatch.entrypoints[i] = NULL;
-  } else if (anv_device_dispatch_table.entrypoints[i] != NULL) {
- instance->device_dispatch.entrypoints[i] =
-anv_device_dispatch_table.entrypoints[i];
   } else {
  instance->device_dispatch.entrypoints[i] =
-anv_tramp_device_dispatch_table.entrypoints[i];
+anv_device_dispatch_table.entrypoints[i];
   }
}
 
diff --git a/src/intel/vulkan/anv_entrypoints_gen.py 
b/src/intel/vulkan/anv_entrypoints_gen.py
index 25a532fd706..792e3b6c90c 100644
--- a/src/intel/vulkan/anv_entrypoints_gen.py
+++ b/src/intel/vulkan/anv_entrypoints_gen.py
@@ -92,7 +92,6 @@ extern const struct anv_instance_dispatch_table 
anv_instance_dispatch_table;
 %for layer in LAYERS:
 extern const struct anv_device_dispatch_table ${layer}_device_dispatch_table;
 %endfor
-extern const struct anv_device_dispatch_table anv_tramp_device_dispatch_table;
 
 % for e in instance_entrypoints:
   % if e.alias:
@@ -247,7 +246,26 @@ const struct anv_instance_dispatch_table 
anv_instance_dispatch_table = {
 % if e.guard is not None:
 #ifdef ${e.guard}
 % endif
-${e.return_type} ${e.prefixed_name(layer)}(${e.decl_params()}) 
__attribute__ ((weak));
+% if layer == 'anv':
+  ${e.return_type} __attribute__ ((weak))
+  ${e.prefixed_name('anv')}(${e.decl_params()})
+  {
+% if e.params[0].type == 'VkDevice':
+  ANV_FROM_HANDLE(anv_device, anv_device, ${e.params[0].name});
+  return anv_device->dispatch.${e.name}(${e.call_params()});
+% elif e.params[0].type == 'VkCommandBuffer':
+  ANV_FROM_HANDLE(anv_cmd_buffer, anv_cmd_buffer, ${e.params[0].name});
+  return 
anv_cmd_buffer->device->dispatch.${e.name}(${e.call_params()});
+% elif e.params[0].type == 'VkQueue':
+  ANV_FROM_HANDLE(anv_queue, anv_queue, ${e.params[0].name});
+  return anv_queue->device->dispatch.${e.name}(${e.call_params()});
+% else:
+  assert(!"Unhandled device child trampoline case: 
${e.params[0].type}");
+% endif
+  }
+% else:
+  ${e.return_type} ${e.prefixed_name(layer)}(${e.decl_params()}) 
__attribute__ ((weak));
+% endif
 % if e.guard is not None:
 #endif // ${e.guard}
 % endif
@@ -267,49 +285,6 @@ const struct anv_instance_dispatch_table 
anv_instance_dispatch_table = {
 % endfor
 
 
-/** Trampoline entrypoints for all device functions */
-
-% for e in device_entrypoints:
-  % if e.alias:
-<% continue %>
-  % endif
-  % if e.guard is not None:
-#ifdef ${e.guard}
-  % endif
-  static ${e.return_type}
-  ${e.prefixed_name('anv_tramp')}(${e.decl_params()})
-  {
-% if e.params[0].type == 'VkDevice':
-  ANV_FROM_HANDLE(anv_device, anv_device, ${e.params[0].name});
-  return anv_device->dispatch.${e.name}(${e.call_params()});
-% elif e.params[0].type == 'VkCommandBuffer':
-  ANV_FROM_HANDLE(anv_cmd_buffer, anv_cmd_buffer, ${e.params[0].name});
-  return anv_cmd_buffer->device->dispatch.${e.name}(${e.call_params()});
-% elif e.params[0].type == 'VkQueue':
-  ANV_FROM_HANDLE(anv_queue, anv_queue, ${e.params[0].name});
-  return anv_queue->device->dispatch.${e.name}(${e.call_params()});
-% else:
-  assert(!"Unhandled device child trampoline case: ${e.params[0].type}");
-% endif
-  }
-  % if e.guard is not None:
-#endif // ${e.guard}
-  % endif
-% endfor
-
-const struct anv_device_dispatch_table anv_tramp_device_dispatch_table = {
-% for e in device_entrypoints:
-  % if e.guard is not None:
-#ifdef ${e.guard}
-  % endif
-.${e.name} = ${e.prefixed_name('anv_tramp')},
-  % if e.guard is not None:
-#endif // ${e.guard}
-  % endif
-% endfor
-};
-
-
 /** Return true if the core version 

Re: [Mesa-dev] [PATCH 7/7] i965/fs: Lower 64-bit MOV operations

2018-10-14 Thread Jason Ekstrand
Yeah

This is where things get interesting.  Do you know where the MOVs are
coming from?  Any scalar 64-bit SSA MOVs with scalar sources I would expect
to get deleted by copy propagation.  We could still get some from phis but
the right thing to do there is likely to have a 64-bit phi lowering pass
that chews them up and dumps out pairs of 32-bit phis or maybe vec2s
instead of uint64_ts.  Apart from those, we still have 64-bit UBO and input
pulls which look 64-bit in NIR but are actually 32-bit in the back-end.  We
could probably lower those in NIR if we wanted.

Sorry for rambling; I'm just trying to iterate all the remaining sources of
64-bit values once all the ALU ops are gone.  It may or may not be worth it
to try and get rid of them all vs. just letting the back end deal with it.
No real solutions yet, just disjointed thoughts, I'm afraid.

--Jason

On Sun, Oct 14, 2018 at 5:12 PM Matt Turner  wrote:

> FINISHME: Lower them in NIR instead?
> ---
>  src/intel/compiler/brw_fs.cpp | 21 -
>  1 file changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 69726ed70e8..9e50df59356 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2403,9 +2403,28 @@ fs_visitor::opt_algebraic()
>  {
> bool progress = false;
>
> -   foreach_block_and_inst(block, fs_inst, inst, cfg) {
> +   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
>switch (inst->opcode) {
>case BRW_OPCODE_MOV:
> + if (!devinfo->has_64bit_types &&
> + (inst->dst.type == BRW_REGISTER_TYPE_DF ||
> +  inst->dst.type == BRW_REGISTER_TYPE_UQ ||
> +  inst->dst.type == BRW_REGISTER_TYPE_Q)) {
> +assert(inst->dst.type == inst->src[0].type);
> +assert(!inst->saturate);
> +assert(!inst->src[0].abs);
> +assert(!inst->src[0].negate);
> +const brw::fs_builder ibld(this, block, inst);
> +
> +ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
> + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
> +ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
> + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
> +
> +inst->remove(block);
> +progress = true;
> + }
> +
>   if (inst->src[0].file != IMM)
>  break;
>
> --
> 2.16.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] i965/fs: Add 64-bit int immediate support to dump_instructions()

2018-10-14 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Sun, Oct 14, 2018 at 5:12 PM Matt Turner  wrote:

> ---
>  src/intel/compiler/brw_fs.cpp   | 6 ++
>  src/intel/compiler/brw_shader.h | 2 ++
>  2 files changed, 8 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 23a25fedca5..69726ed70e8 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -6023,6 +6023,12 @@ fs_visitor::dump_instruction(backend_instruction
> *be_inst, FILE *file)
>   case BRW_REGISTER_TYPE_UD:
>  fprintf(file, "%uu", inst->src[i].ud);
>  break;
> + case BRW_REGISTER_TYPE_Q:
> +fprintf(file, "%" PRId64 "q", inst->src[i].d64);
> +break;
> + case BRW_REGISTER_TYPE_UQ:
> +fprintf(file, "%" PRIu64 "uq", inst->src[i].u64);
> +break;
>   case BRW_REGISTER_TYPE_VF:
>  fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
>  brw_vf_to_float((inst->src[i].ud >>  0) & 0xff),
> diff --git a/src/intel/compiler/brw_shader.h
> b/src/intel/compiler/brw_shader.h
> index 7d97ddbd868..e1d598b8781 100644
> --- a/src/intel/compiler/brw_shader.h
> +++ b/src/intel/compiler/brw_shader.h
> @@ -89,6 +89,8 @@ struct backend_reg : private brw_reg
> using brw_reg::f;
> using brw_reg::d;
> using brw_reg::ud;
> +   using brw_reg::d64;
> +   using brw_reg::u64;
>  };
>  #endif
>
> --
> 2.16.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] nir/int64: Implement lowering of shift operations

2018-10-14 Thread Jason Ekstrand
On Sun, Oct 14, 2018 at 5:12 PM Matt Turner  wrote:

> ---
>  src/compiler/nir/nir.h |   1 +
>  src/compiler/nir/nir_lower_int64.c | 142
> +
>  2 files changed, 143 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 12cbd030e21..2c477126acc 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3001,6 +3001,7 @@ typedef enum {
> nir_lower_ineg64= (1 << 7),
> nir_lower_logic64   = (1 << 8),
> nir_lower_minmax64  = (1 << 9),
> +   nir_lower_shift64   = (1 << 10),
>  } nir_lower_int64_options;
>
>  bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
> diff --git a/src/compiler/nir/nir_lower_int64.c
> b/src/compiler/nir/nir_lower_int64.c
> index 9cdc8a9d592..25882d3a858 100644
> --- a/src/compiler/nir/nir_lower_int64.c
> +++ b/src/compiler/nir/nir_lower_int64.c
> @@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x,
> nir_ssa_def *y)
>  nir_ixor(b, x_hi, y_hi));
>  }
>
> +static nir_ssa_def *
> +lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   /* Implemented as
> +*
> +* uint64_t lshift(uint64_t x, int c)
> +* {
> +*if (c == 0) return x;
> +*
> +*uint32_t lo = LO(x), hi = HI(x);
> +*
> +*if (c < 32) {
> +*   uint32_t lo_shifted = lo << (c & 0x1f);
> +*   uint32_t hi_shifted = hi << (c & 0x1f);
> +*   uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);
>

Why the abs and the &?  it's already predicated on c < 32 and negative or
OOB shifts already have undefined results.


> +*   return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
> +*} else {
> +*   uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);
> +*   return pack_64(0, lo_shifted_hi);
> +*}
> +* }
> +*/
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> +   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b,
> -32)));
>

This is iabs(c - 32) (which yields the same result but isn't the same
expression) and doesn't have the & 0x1f.


> +   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
> +   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
>

In general, all of the 0x1f are missing.  While not having them works on
i965, there's no guarantee it works in general.  Maybe we should add them
in and have an i965-specific optimization to delete them again?  Maybe it's
ok to just not have them.  In any case, the code down here should match the
code above or there should be a very good comment saying why it doesn't.


> +   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
> +
> +   nir_ssa_def *res_if_lt_32 =
> +  nir_pack_64_2x32_split(b, lo_shifted,
> +nir_ior(b, hi_shifted, lo_shifted_hi));
> +   nir_ssa_def *res_if_ge_32 =
> +  nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
> +nir_ishl(b, x_lo, reverse_count));
> +
> +   return nir_bcsel(b,
> +nir_ieq(b, y, nir_imm_int(b, 0)), x,
> +nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
> + res_if_ge_32, res_if_lt_32));
> +}
> +
> +static nir_ssa_def *
> +lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> +   /* Implemented as
> +*
> +* uint64_t arshift(uint64_t x, int c)
> +* {
> +*if (c == 0) return x;
> +*
> +*uint32_t lo = LO(x);
> +*int32_t  hi = HI(x);
> +*
> +*if (c < 32) {
> +*   uint32_t lo_shifted = lo >> (c & 0x1f);
> +*   uint32_t hi_shifted = hi >> (c & 0x1f);
> +*   uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
> +*   return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
> +*} else {
> +*   uint32_t hi_shifted = hi >> 31;
> +*   uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
> +*   return pack_64(hi_shifted, hi_shifted_lo);
> +*}
> +* }
> +*/
> +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> +   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b,
> -32)));
> +   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
> +   nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
> +   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
> +
> +   nir_ssa_def *res_if_lt_32 =
> +  nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
> +hi_shifted);
> +   nir_ssa_def *res_if_ge_32 =
> +  nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
> +nir_ishr(b, x_hi, nir_imm_int(b, 31)));
> +
> +   return nir_bcsel(b,
> +nir_ieq(b, y, nir_imm_int(b, 0)), x,
> +nir_b

[Mesa-dev] [PATCH 1/2] nir/bitmap: emit bitmap sampler definition for bitmap lowering.

2018-10-14 Thread Dave Airlie
From: Dave Airlie 

The bitmap lowering code references a sampler that we never declare,
this fixes it by declaring the sampler.
---
 src/compiler/nir/nir_lower_bitmap.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/compiler/nir/nir_lower_bitmap.c 
b/src/compiler/nir/nir_lower_bitmap.c
index a4d9498576c..e32e1bb83c1 100644
--- a/src/compiler/nir/nir_lower_bitmap.c
+++ b/src/compiler/nir/nir_lower_bitmap.c
@@ -77,6 +77,17 @@ get_texcoord(nir_shader *shader)
return texcoord;
 }
 
+static void
+add_sampler(nir_shader *shader,
+   const nir_lower_bitmap_options *options)
+{
+   nir_variable *sampler = nir_variable_create(shader,
+   nir_var_uniform,
+   
glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT),
+   "s_bitmap");
+   sampler->data.location = options->sampler;
+}
+
 static void
 lower_bitmap(nir_shader *shader, nir_builder *b,
  const nir_lower_bitmap_options *options)
@@ -86,6 +97,7 @@ lower_bitmap(nir_shader *shader, nir_builder *b,
nir_ssa_def *cond;
nir_intrinsic_instr *discard;
 
+   add_sampler(shader, options);
texcoord = nir_load_var(b, get_texcoord(shader));
 
tex = nir_tex_instr_create(shader, 1);
-- 
2.17.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nir/drawpix: add missing samplers for draw pixel lowering.

2018-10-14 Thread Dave Airlie
From: Dave Airlie 

The drawpixel lowering references undeclared samplers, but also
missing a texture handle.
---
 src/compiler/nir/nir_lower_drawpixels.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/compiler/nir/nir_lower_drawpixels.c 
b/src/compiler/nir/nir_lower_drawpixels.c
index 462b9c308b2..3e86cc08470 100644
--- a/src/compiler/nir/nir_lower_drawpixels.c
+++ b/src/compiler/nir/nir_lower_drawpixels.c
@@ -66,6 +66,24 @@ get_texcoord(lower_drawpixels_state *state)
return nir_load_var(&state->b, state->texcoord);
 }
 
+static void
+add_samplers(lower_drawpixels_state *state)
+{
+   nir_variable *sampler = nir_variable_create(state->shader,
+   nir_var_uniform,
+   
glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT),
+   "s_drawpix");
+   sampler->data.location = state->options->drawpix_sampler;
+
+   if (state->options->pixel_maps) {
+  nir_variable *sampler = nir_variable_create(state->shader,
+  nir_var_uniform,
+  
glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT),
+  "s_pixelmap");
+  sampler->data.location = state->options->pixelmap_sampler;
+   }
+}
+
 static nir_variable *
 create_uniform(nir_shader *shader, const char *name,
const gl_state_index16 state_tokens[STATE_LENGTH])
@@ -123,6 +141,7 @@ lower_color(lower_drawpixels_state *state, 
nir_intrinsic_instr *intr)
 
b->cursor = nir_before_instr(&intr->instr);
 
+   add_samplers(state);
texcoord = get_texcoord(state);
 
/* replace load_var(gl_Color) w/ texture sample:
@@ -175,6 +194,7 @@ lower_color(lower_drawpixels_state *state, 
nir_intrinsic_instr *intr)
   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
   tex->coord_components = 2;
   tex->sampler_index = state->options->pixelmap_sampler;
+  tex->texture_index = state->options->pixelmap_sampler;
   tex->dest_type = nir_type_float;
   tex->src[0].src_type = nir_tex_src_coord;
   tex->src[0].src = nir_src_for_ssa(nir_channels(b, def, 0xc));
-- 
2.17.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

2018-10-14 Thread Jason Ekstrand

On October 14, 2018 17:12:34 Matt Turner  wrote:


From: Jason Ekstrand 

[mattst88]: Found in an old branch of Jason's.

Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
  imin, imax, umin, umax
Matt implemented:  ixor, imov, bcsel
---
src/compiler/nir/nir_lower_int64.c | 186 +
1 file changed, 186 insertions(+)

diff --git a/src/compiler/nir/nir_lower_int64.c 
b/src/compiler/nir/nir_lower_int64.c

index 0d7f165b406..6b269830801 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -24,6 +24,192 @@
#include "nir.h"
#include "nir_builder.h"

+static nir_ssa_def *
+lower_imov64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b, x_hi));


You don't really need the movs...


+}
+
+static nir_ssa_def *
+lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, 
nir_ssa_def *y)

+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
+nir_bcsel(b, cond, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_inot64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
+}
+
+static nir_ssa_def *
+lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
+nir_iand(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
+nir_ior(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
+nir_ixor(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
+   nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo));
+   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
+   /* In NIR, true is represented by ~0 which is -1 */


We've had discussions (had some at XDC this year) about changing booleans 
to one-bit which would break this.  Doing b2i would be safer but this does 
work for now.



+   nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo);
+   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_ineg64(nir_builder *b, nir_ssa_def *x)
+{
+   /* Since isub is the same number of instructions (with better dependencies)
+* as iadd, subtraction is actually more efficient for ineg than the usual
+* 2's complement "flip the bits and add one".
+*/
+   return lower_isub64(b, nir_imm_int64(b, 0), x);
+}
+
+static nir_ssa_def *
+lower_iabs64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
+   return nir_bcsel(b, x_is_neg, lowe

[Mesa-dev] [PATCH 3/7] nir/int64: Wire up lowering functions

2018-10-14 Thread Matt Turner
FINISHME: Figure out what to do about lowering mov operations. Currently
if we attempt to lower them in NIR we get stuck in an infinite loop. The
last patch of this series lowers them in the backend instead, but I'm
certainly open to ideas.
---
 src/compiler/nir/nir.h |  9 +++-
 src/compiler/nir/nir_lower_int64.c | 93 --
 2 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9527079a9ef..12cbd030e21 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2993,7 +2993,14 @@ typedef enum {
nir_lower_imul64 = (1 << 0),
nir_lower_isign64 = (1 << 1),
/** Lower all int64 modulus and division opcodes */
-   nir_lower_divmod64 = (1 << 2),
+   nir_lower_divmod64  = (1 << 2),
+   nir_lower_bcsel64   = (1 << 3),
+   nir_lower_icmp64= (1 << 4),
+   nir_lower_iadd64= (1 << 5),
+   nir_lower_iabs64= (1 << 6),
+   nir_lower_ineg64= (1 << 7),
+   nir_lower_logic64   = (1 << 8),
+   nir_lower_minmax64  = (1 << 9),
 } nir_lower_int64_options;
 
 bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
diff --git a/src/compiler/nir/nir_lower_int64.c 
b/src/compiler/nir/nir_lower_int64.c
index 6b269830801..9cdc8a9d592 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -403,6 +403,33 @@ opcode_to_options_mask(nir_op opcode)
case nir_op_imod:
case nir_op_irem:
   return nir_lower_divmod64;
+   // FINISHME: case nir_op_imov: 
+   case nir_op_bcsel:
+  return nir_lower_bcsel64;
+   case nir_op_ieq:
+   case nir_op_ine:
+   case nir_op_ult:
+   case nir_op_ilt:
+   case nir_op_uge:
+   case nir_op_ige:
+  return nir_lower_icmp64;
+   case nir_op_iadd:
+   case nir_op_isub:
+  return nir_lower_iadd64;
+   case nir_op_imin:
+   case nir_op_imax:
+   case nir_op_umin:
+   case nir_op_umax:
+  return nir_lower_minmax64;
+   case nir_op_iabs:
+  return nir_lower_iabs64;
+   case nir_op_ineg:
+  return nir_lower_ineg64;
+   case nir_op_iand:
+   case nir_op_ior:
+   case nir_op_ixor:
+   case nir_op_inot:
+  return nir_lower_logic64;
default:
   return 0;
}
@@ -430,6 +457,41 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
   return lower_imod64(b, src[0], src[1]);
case nir_op_irem:
   return lower_irem64(b, src[0], src[1]);
+   case nir_op_imov:
+  return lower_imov64(b, src[0]);
+   case nir_op_bcsel:
+  return lower_bcsel64(b, src[0], src[1], src[2]);
+   case nir_op_ieq:
+   case nir_op_ine:
+   case nir_op_ult:
+   case nir_op_ilt:
+   case nir_op_uge:
+   case nir_op_ige:
+  return lower_int64_compare(b, alu->op, src[0], src[1]);
+   case nir_op_iadd:
+  return lower_iadd64(b, src[0], src[1]);
+   case nir_op_isub:
+  return lower_isub64(b, src[0], src[1]);
+   case nir_op_imin:
+  return lower_imin64(b, src[0], src[1]);
+   case nir_op_imax:
+  return lower_imax64(b, src[0], src[1]);
+   case nir_op_umin:
+  return lower_umin64(b, src[0], src[1]);
+   case nir_op_umax:
+  return lower_umax64(b, src[0], src[1]);
+   case nir_op_iabs:
+  return lower_iabs64(b, src[0]);
+   case nir_op_ineg:
+  return lower_ineg64(b, src[0]);
+   case nir_op_iand:
+  return lower_iand64(b, src[0], src[1]);
+   case nir_op_ior:
+  return lower_ior64(b, src[0], src[1]);
+   case nir_op_ixor:
+  return lower_ixor64(b, src[0], src[1]);
+   case nir_op_inot:
+  return lower_inot64(b, src[0]);
default:
   unreachable("Invalid ALU opcode to lower");
}
@@ -448,9 +510,34 @@ lower_int64_impl(nir_function_impl *impl, 
nir_lower_int64_options options)
 continue;
 
  nir_alu_instr *alu = nir_instr_as_alu(instr);
- assert(alu->dest.dest.is_ssa);
- if (alu->dest.dest.ssa.bit_size != 64)
-continue;
+ switch (alu->op) {
+ case nir_op_bcsel:
+assert(alu->src[1].src.is_ssa);
+assert(alu->src[2].src.is_ssa);
+assert(alu->src[1].src.ssa->bit_size ==
+   alu->src[2].src.ssa->bit_size);
+if (alu->src[1].src.ssa->bit_size != 64)
+   continue;
+break;
+ case nir_op_ieq:
+ case nir_op_ine:
+ case nir_op_ult:
+ case nir_op_ilt:
+ case nir_op_uge:
+ case nir_op_ige:
+assert(alu->src[0].src.is_ssa);
+assert(alu->src[1].src.is_ssa);
+assert(alu->src[0].src.ssa->bit_size ==
+   alu->src[1].src.ssa->bit_size);
+if (alu->src[0].src.ssa->bit_size != 64)
+   continue;
+break;
+ default:
+assert(alu->dest.dest.is_ssa);
+if (alu->dest.dest.ssa.bit_size != 64)
+   continue;
+break;
+ }
 
  if (!(options & opcode_to_options_mask(alu->op)))
 continue;
-- 
2.16.4

___

[Mesa-dev] [PATCH 5/7] nir/int64: Call nir_lower_int64() in a loop

2018-10-14 Thread Matt Turner
Unfortunately some int64 lowerings generate more int64 operations, so we
need to call this function a few times. Also call
nir_lower_alu_to_scalar() beforehand to make more int64 operations
available for lowering.
---
 src/intel/compiler/brw_nir.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index f61baee230a..066724c58a6 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -670,12 +670,16 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 */
OPT(nir_opt_algebraic);
 
+   if (is_scalar) {
+  OPT(nir_lower_alu_to_scalar);
+   }
+
/* Lower int64 instructions before nir_optimize so that loop unrolling
 * sees their actual cost.
 */
-   nir_lower_int64(nir, nir_lower_imul64 |
-nir_lower_isign64 |
-nir_lower_divmod64);
+   while (nir_lower_int64(nir, nir_lower_imul64 |
+   nir_lower_isign64 |
+   nir_lower_divmod64));
 
nir = brw_nir_optimize(nir, compiler, is_scalar, true);
 
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] i965: Lower int64 operations if there is not hardware support

2018-10-14 Thread Matt Turner
---
 src/intel/compiler/brw_nir.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 066724c58a6..7eb9c385694 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -677,9 +677,17 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
/* Lower int64 instructions before nir_optimize so that loop unrolling
 * sees their actual cost.
 */
-   while (nir_lower_int64(nir, nir_lower_imul64 |
-   nir_lower_isign64 |
-   nir_lower_divmod64));
+   nir_lower_int64_options options =
+  nir_lower_imul64 | nir_lower_isign64 | nir_lower_divmod64;
+   options |= devinfo->has_64bit_types ? 0 : (nir_lower_bcsel64 |
+  nir_lower_icmp64 |
+  nir_lower_iadd64 |
+  nir_lower_iabs64 |
+  nir_lower_ineg64 |
+  nir_lower_logic64 |
+  nir_lower_minmax64 |
+  nir_lower_shift64);
+   while (nir_lower_int64(nir, options));
 
nir = brw_nir_optimize(nir, compiler, is_scalar, true);
 
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] nir/int64: Implement lowering of shift operations

2018-10-14 Thread Matt Turner
---
 src/compiler/nir/nir.h |   1 +
 src/compiler/nir/nir_lower_int64.c | 142 +
 2 files changed, 143 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 12cbd030e21..2c477126acc 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3001,6 +3001,7 @@ typedef enum {
nir_lower_ineg64= (1 << 7),
nir_lower_logic64   = (1 << 8),
nir_lower_minmax64  = (1 << 9),
+   nir_lower_shift64   = (1 << 10),
 } nir_lower_int64_options;
 
 bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
diff --git a/src/compiler/nir/nir_lower_int64.c 
b/src/compiler/nir/nir_lower_int64.c
index 9cdc8a9d592..25882d3a858 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
 nir_ixor(b, x_hi, y_hi));
 }
 
+static nir_ssa_def *
+lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+*
+* uint64_t lshift(uint64_t x, int c)
+* {
+*if (c == 0) return x;
+*
+*uint32_t lo = LO(x), hi = HI(x);
+*
+*if (c < 32) {
+*   uint32_t lo_shifted = lo << (c & 0x1f);
+*   uint32_t hi_shifted = hi << (c & 0x1f);
+*   uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);
+*   return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
+*} else {
+*   uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);
+*   return pack_64(0, lo_shifted_hi);
+*}
+* }
+*/
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, 
-32)));
+   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
+   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+  nir_pack_64_2x32_split(b, lo_shifted,
+nir_ior(b, hi_shifted, lo_shifted_hi));
+   nir_ssa_def *res_if_ge_32 =
+  nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
+nir_ishl(b, x_lo, reverse_count));
+
+   return nir_bcsel(b,
+nir_ieq(b, y, nir_imm_int(b, 0)), x,
+nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+ res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+*
+* uint64_t arshift(uint64_t x, int c)
+* {
+*if (c == 0) return x;
+*
+*uint32_t lo = LO(x);
+*int32_t  hi = HI(x);
+*
+*if (c < 32) {
+*   uint32_t lo_shifted = lo >> (c & 0x1f);
+*   uint32_t hi_shifted = hi >> (c & 0x1f);
+*   uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
+*   return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
+*} else {
+*   uint32_t hi_shifted = hi >> 31;
+*   uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
+*   return pack_64(hi_shifted, hi_shifted_lo);
+*}
+* }
+*/
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, 
-32)));
+   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
+   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+  nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
+hi_shifted);
+   nir_ssa_def *res_if_ge_32 =
+  nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
+nir_ishr(b, x_hi, nir_imm_int(b, 31)));
+
+   return nir_bcsel(b,
+nir_ieq(b, y, nir_imm_int(b, 0)), x,
+nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+ res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+*
+* uint64_t rshift(uint64_t x, int c)
+* {
+*if (c == 0) return x;
+*
+*uint32_t lo = LO(x), hi = HI(x);
+*
+*if (c < 32) {
+*   uint32_t lo_shifted = lo >> (c & 0x1f);
+*   uint32_t hi_shifted = hi >> (c & 0x1f);
+*   uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
+*   return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
+*} else {
+*   uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
+*   return pack_64(0, hi_shifted_lo);
+*}
+* }
+*/
+
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_6

[Mesa-dev] [PATCH 7/7] i965/fs: Lower 64-bit MOV operations

2018-10-14 Thread Matt Turner
FINISHME: Lower them in NIR instead?
---
 src/intel/compiler/brw_fs.cpp | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 69726ed70e8..9e50df59356 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2403,9 +2403,28 @@ fs_visitor::opt_algebraic()
 {
bool progress = false;
 
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
   switch (inst->opcode) {
   case BRW_OPCODE_MOV:
+ if (!devinfo->has_64bit_types &&
+ (inst->dst.type == BRW_REGISTER_TYPE_DF ||
+  inst->dst.type == BRW_REGISTER_TYPE_UQ ||
+  inst->dst.type == BRW_REGISTER_TYPE_Q)) {
+assert(inst->dst.type == inst->src[0].type);
+assert(!inst->saturate);
+assert(!inst->src[0].abs);
+assert(!inst->src[0].negate);
+const brw::fs_builder ibld(this, block, inst);
+
+ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
+ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
+ subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
+
+inst->remove(block);
+progress = true;
+ }
+
  if (inst->src[0].file != IMM)
 break;
 
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

2018-10-14 Thread Matt Turner
From: Jason Ekstrand 

[mattst88]: Found in an old branch of Jason's.

Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
   imin, imax, umin, umax
Matt implemented:  ixor, imov, bcsel
---
 src/compiler/nir/nir_lower_int64.c | 186 +
 1 file changed, 186 insertions(+)

diff --git a/src/compiler/nir/nir_lower_int64.c 
b/src/compiler/nir/nir_lower_int64.c
index 0d7f165b406..6b269830801 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -24,6 +24,192 @@
 #include "nir.h"
 #include "nir_builder.h"
 
+static nir_ssa_def *
+lower_imov64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b, x_hi));
+}
+
+static nir_ssa_def *
+lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def 
*y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
+nir_bcsel(b, cond, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_inot64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
+}
+
+static nir_ssa_def *
+lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
+nir_iand(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
+nir_ior(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
+nir_ixor(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
+   nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo));
+   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
+   /* In NIR, true is represented by ~0 which is -1 */
+   nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo);
+   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_ineg64(nir_builder *b, nir_ssa_def *x)
+{
+   /* Since isub is the same number of instructions (with better dependencies)
+* as iadd, subtraction is actually more efficient for ineg than the usual
+* 2's complement "flip the bits and add one".
+*/
+   return lower_isub64(b, nir_imm_int64(b, 0), x);
+}
+
+static nir_ssa_def *
+lower_iabs64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
+   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
+}
+
+static nir_ssa_def *
+lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ss

[Mesa-dev] [PATCH 1/7] i965/fs: Add 64-bit int immediate support to dump_instructions()

2018-10-14 Thread Matt Turner
---
 src/intel/compiler/brw_fs.cpp   | 6 ++
 src/intel/compiler/brw_shader.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 23a25fedca5..69726ed70e8 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6023,6 +6023,12 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst, FILE *file)
  case BRW_REGISTER_TYPE_UD:
 fprintf(file, "%uu", inst->src[i].ud);
 break;
+ case BRW_REGISTER_TYPE_Q:
+fprintf(file, "%" PRId64 "q", inst->src[i].d64);
+break;
+ case BRW_REGISTER_TYPE_UQ:
+fprintf(file, "%" PRIu64 "uq", inst->src[i].u64);
+break;
  case BRW_REGISTER_TYPE_VF:
 fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
 brw_vf_to_float((inst->src[i].ud >>  0) & 0xff),
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index 7d97ddbd868..e1d598b8781 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -89,6 +89,8 @@ struct backend_reg : private brw_reg
using brw_reg::f;
using brw_reg::d;
using brw_reg::ud;
+   using brw_reg::d64;
+   using brw_reg::u64;
 };
 #endif
 
-- 
2.16.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/15] nir: fix compacting varyings when XFB outputs are present

2018-10-14 Thread Timothy Arceri

What happened to v2?

On 13/10/18 11:57 pm, Samuel Pitoiset wrote:

We shouldn't try to compact any varyings known as always
active IO, especially XFB outputs. For example, if one
component of an xfb output is also used as input varying
in the next stage, it shouldn't be compacted.

Because we look at the input varyings from the consumer
stage, we don't know if one of them is an XFB output. One
solution is to mark all components as used when
always_active_io is true to avoid wrong remapping.

Signed-off-by: Samuel Pitoiset 
---
  src/compiler/nir/nir_linking_helpers.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 85712a7cb1..88014e9a1d 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -236,6 +236,15 @@ get_slot_component_masks_and_interp_types(struct exec_list 
*var_list,
 get_interp_type(var, default_to_smooth_interp);
  interp_loc[location + i] = get_interp_loc(var);
  
+if (var->data.always_active_io) {

+   /* Mark all components as used to avoid repacting xfb varyings
+* wrongly. For instance, if one component of an xfb output is
+* also used as input varying in the next stage.
+*/
+   comps[location + i] |= 0xf;
+   continue;
+}
+
  if (dual_slot) {
 if (i & 1) {
comps[location + i] |= ((1 << comps_slot2) - 1);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/13] nir: Preserve offsets in lower_io_to_scalar_early

2018-10-14 Thread Timothy Arceri
For xfb in glsl we just set always_active_io for varyings on each side 
of the interface which should avoid this and the linking problems from 
the two patches before this.


Trying to lower/split xfb varyings just seems like asking for trouble, I 
suggested Samuel create a linking helper to set always_active_io where 
needed [1].


[1] https://lists.freedesktop.org/archives/mesa-dev/2018-October/206750.html

On 14/10/18 12:09 am, Jason Ekstrand wrote:

---
  src/compiler/nir/nir_lower_io_to_scalar.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c 
b/src/compiler/nir/nir_lower_io_to_scalar.c
index f0c2a6a95d6..a7373d0e791 100644
--- a/src/compiler/nir/nir_lower_io_to_scalar.c
+++ b/src/compiler/nir/nir_lower_io_to_scalar.c
@@ -192,6 +192,10 @@ lower_load_to_scalar_early(nir_builder *b, 
nir_intrinsic_instr *intr,
   chan_var = nir_variable_clone(var, b->shader);
   chan_var->data.location_frac =  var->data.location_frac + i;
   chan_var->type = glsl_channel_type(chan_var->type);
+ if (var->data.explicit_offset) {
+unsigned comp_size = glsl_get_bit_size(chan_var->type) / 8;
+chan_var->data.offset = var->data.offset + i * comp_size;
+ }
  
   chan_vars[var->data.location_frac + i] = chan_var;
  
@@ -246,6 +250,10 @@ lower_store_output_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr,

   chan_var = nir_variable_clone(var, b->shader);
   chan_var->data.location_frac =  var->data.location_frac + i;
   chan_var->type = glsl_channel_type(chan_var->type);
+ if (var->data.explicit_offset) {
+unsigned comp_size = glsl_get_bit_size(chan_var->type) / 8;
+chan_var->data.offset = var->data.offset + i * comp_size;
+ }
  
   chan_vars[var->data.location_frac + i] = chan_var;
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Split dispatch tables into device and instance

2018-10-14 Thread Lionel Landwerlin

Nice!

Reviewed-by: Lionel Landwerlin 

On 13/10/2018 19:35, Jason Ekstrand wrote:

There's no reason why we need generate trampoline functions for instance
functions or carry N copies of the instance dispatch table around for
every hardware generation.  Splitting the tables and being more
conservative shaves about 34K off .text and about 4K off .data when
built with clang.

Before splitting dispatch tables:

text   data bss dec hex filename
3224305  2862168960 3519481  35b3f9 _install/lib64/libvulkan_intel.so

After splitting dispatch tables:

text   data bss dec hex filename
3190325  2822328960 3481517  351fad _install/lib64/libvulkan_intel.so
---
  src/intel/vulkan/anv_device.c   |  61 --
  src/intel/vulkan/anv_entrypoints_gen.py | 242 +---
  src/intel/vulkan/anv_private.h  |  18 +-
  3 files changed, 230 insertions(+), 91 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 6a24d1086d8..a2551452eb1 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -637,14 +637,28 @@ VkResult anv_CreateInstance(
/* Vulkan requires that entrypoints for extensions which have not been
 * enabled must not be advertised.
 */
-  if (!anv_entrypoint_is_enabled(i, instance->app_info.api_version,
- &instance->enabled_extensions, NULL)) {
+  if (!anv_instance_entrypoint_is_enabled(i, 
instance->app_info.api_version,
+  &instance->enabled_extensions)) {
   instance->dispatch.entrypoints[i] = NULL;
-  } else if (anv_dispatch_table.entrypoints[i] != NULL) {
- instance->dispatch.entrypoints[i] = anv_dispatch_table.entrypoints[i];
} else {
   instance->dispatch.entrypoints[i] =
-anv_tramp_dispatch_table.entrypoints[i];
+anv_instance_dispatch_table.entrypoints[i];
+  }
+   }
+
+   for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); 
i++) {
+  /* Vulkan requires that entrypoints for extensions which have not been
+   * enabled must not be advertised.
+   */
+  if (!anv_device_entrypoint_is_enabled(i, instance->app_info.api_version,
+&instance->enabled_extensions, 
NULL)) {
+ instance->device_dispatch.entrypoints[i] = NULL;
+  } else if (anv_device_dispatch_table.entrypoints[i] != NULL) {
+ instance->device_dispatch.entrypoints[i] =
+anv_device_dispatch_table.entrypoints[i];
+  } else {
+ instance->device_dispatch.entrypoints[i] =
+anv_tramp_device_dispatch_table.entrypoints[i];
}
 }
  
@@ -1349,11 +1363,15 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr(

 if (instance == NULL)
return NULL;
  
-   int idx = anv_get_entrypoint_index(pName);

-   if (idx < 0)
-  return NULL;
+   int idx = anv_get_instance_entrypoint_index(pName);
+   if (idx >= 0)
+  return instance->dispatch.entrypoints[idx];
+
+   idx = anv_get_device_entrypoint_index(pName);
+   if (idx >= 0)
+  return instance->device_dispatch.entrypoints[idx];
  
-   return instance->dispatch.entrypoints[idx];

+   return NULL;
  }
  
  /* With version 1+ of the loader interface the ICD should expose

@@ -1381,7 +1399,7 @@ PFN_vkVoidFunction anv_GetDeviceProcAddr(
 if (!device || !pName)
return NULL;
  
-   int idx = anv_get_entrypoint_index(pName);

+   int idx = anv_get_device_entrypoint_index(pName);
 if (idx < 0)
return NULL;
  
@@ -1531,25 +1549,25 @@ VkResult anv_EnumerateDeviceExtensionProperties(

  static void
  anv_device_init_dispatch(struct anv_device *device)
  {
-   const struct anv_dispatch_table *genX_table;
+   const struct anv_device_dispatch_table *genX_table;
 switch (device->info.gen) {
 case 11:
-  genX_table = &gen11_dispatch_table;
+  genX_table = &gen11_device_dispatch_table;
break;
 case 10:
-  genX_table = &gen10_dispatch_table;
+  genX_table = &gen10_device_dispatch_table;
break;
 case 9:
-  genX_table = &gen9_dispatch_table;
+  genX_table = &gen9_device_dispatch_table;
break;
 case 8:
-  genX_table = &gen8_dispatch_table;
+  genX_table = &gen8_device_dispatch_table;
break;
 case 7:
if (device->info.is_haswell)
- genX_table = &gen75_dispatch_table;
+ genX_table = &gen75_device_dispatch_table;
else
- genX_table = &gen7_dispatch_table;
+ genX_table = &gen7_device_dispatch_table;
break;
 default:
unreachable("unsupported gen\n");
@@ -1559,14 +1577,15 @@ anv_device_init_dispatch(struct anv_device *device)
/* Vulkan requires that entrypoints for extensions which have not been
 * enabled must not be advertised.
 */
-  if (!anv_entrypoint_is_enable

Re: [Mesa-dev] [PATCH] anv: Implement VK_EXT_pci_bus_info

2018-10-14 Thread Jason Ekstrand
Here I was reveling in the triviality of my fixed-pci-path implementation 
and you had to show me up by implementing it properly. :-P


Implementing it properly is a better plan because we know discrete is coming.

Reviewed-by: Jason Ekstrand 

On October 14, 2018 07:12:58 Lionel Landwerlin 
 wrote:



Even though the Intel GPU are always at the same PCI location, all the
info we need is already provided by libdrm. Let's be future proof.

Signed-off-by: Lionel Landwerlin 
---
src/intel/vulkan/anv_device.c  | 24 +++-
src/intel/vulkan/anv_extensions.py |  1 +
src/intel/vulkan/anv_private.h |  6 ++
3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 6a24d1086d8..1c7942f1b8e 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -309,9 +309,10 @@ anv_physical_device_free_disk_cache(struct 
anv_physical_device *device)

static VkResult
anv_physical_device_init(struct anv_physical_device *device,
 struct anv_instance *instance,
- const char *primary_path,
- const char *path)
+ drmDevicePtr drm_device)
{
+   const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
+   const char *path = drm_device->nodes[DRM_NODE_RENDER];
   VkResult result;
   int fd;
   int master_fd = -1;
@@ -342,6 +343,11 @@ anv_physical_device_init(struct anv_physical_device 
*device,

  device->no_hw = true;
   }

+   device->pci_info.domain = drm_device->businfo.pci->domain;
+   device->pci_info.bus = drm_device->businfo.pci->bus;
+   device->pci_info.device = drm_device->businfo.pci->dev;
+   device->pci_info.function = drm_device->businfo.pci->func;
+
   device->name = gen_get_device_name(device->chipset_id);
   if (!gen_get_device_info(device->chipset_id, &device->info)) {
  result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
@@ -715,9 +721,7 @@ anv_enumerate_devices(struct anv_instance *instance)
  devices[i]->deviceinfo.pci->vendor_id == 0x8086) {

 result = anv_physical_device_init(&instance->physicalDevice,
-instance,
-devices[i]->nodes[DRM_NODE_PRIMARY],
-devices[i]->nodes[DRM_NODE_RENDER]);
+   instance, devices[i]);
 if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
break;
  }
@@ -1166,6 +1170,16 @@ void anv_GetPhysicalDeviceProperties2(
 break;
  }

+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
+ VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
+(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+ properties->pciDomain = pdevice->pci_info.domain;
+ properties->pciBus = pdevice->pci_info.bus;
+ properties->pciDevice = pdevice->pci_info.device;
+ properties->pciFunction = pdevice->pci_info.function;
+ break;
+  }
+
  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
 VkPhysicalDevicePointClippingProperties *properties =
(VkPhysicalDevicePointClippingProperties *) ext;
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py

index d4915c95013..1329ef74026 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -121,6 +121,7 @@ EXTENSIONS = [
Extension('VK_EXT_external_memory_dma_buf',   1, True),
Extension('VK_EXT_global_priority',   1,
  'device->has_context_priority'),
+Extension('VK_EXT_pci_bus_info',  1, True),
Extension('VK_EXT_shader_viewport_index_layer',   1, True),
Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),

Extension('VK_EXT_vertex_attribute_divisor',  3, True),
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5b4c286bf38..763dc9bb4ce 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -841,6 +841,12 @@ struct anv_physical_device {
boolno_hw;
charpath[20];
const char *name;
+struct {
+   uint16_t domain;
+   uint8_t  bus;
+   uint8_t  device;
+   uint8_t  function;
+}   pci_info;
struct gen_device_info  info;
/** Amount of "GPU memory" we want to advertise
 *
--
2.19.1




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Implement VK_EXT_pci_bus_info

2018-10-14 Thread Lionel Landwerlin
Even though the Intel GPU are always at the same PCI location, all the
info we need is already provided by libdrm. Let's be future proof.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/vulkan/anv_device.c  | 24 +++-
 src/intel/vulkan/anv_extensions.py |  1 +
 src/intel/vulkan/anv_private.h |  6 ++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 6a24d1086d8..1c7942f1b8e 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -309,9 +309,10 @@ anv_physical_device_free_disk_cache(struct 
anv_physical_device *device)
 static VkResult
 anv_physical_device_init(struct anv_physical_device *device,
  struct anv_instance *instance,
- const char *primary_path,
- const char *path)
+ drmDevicePtr drm_device)
 {
+   const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
+   const char *path = drm_device->nodes[DRM_NODE_RENDER];
VkResult result;
int fd;
int master_fd = -1;
@@ -342,6 +343,11 @@ anv_physical_device_init(struct anv_physical_device 
*device,
   device->no_hw = true;
}
 
+   device->pci_info.domain = drm_device->businfo.pci->domain;
+   device->pci_info.bus = drm_device->businfo.pci->bus;
+   device->pci_info.device = drm_device->businfo.pci->dev;
+   device->pci_info.function = drm_device->businfo.pci->func;
+
device->name = gen_get_device_name(device->chipset_id);
if (!gen_get_device_info(device->chipset_id, &device->info)) {
   result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
@@ -715,9 +721,7 @@ anv_enumerate_devices(struct anv_instance *instance)
   devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
 
  result = anv_physical_device_init(&instance->physicalDevice,
-instance,
-devices[i]->nodes[DRM_NODE_PRIMARY],
-devices[i]->nodes[DRM_NODE_RENDER]);
+   instance, devices[i]);
  if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
 break;
   }
@@ -1166,6 +1170,16 @@ void anv_GetPhysicalDeviceProperties2(
  break;
   }
 
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
+ VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
+(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+ properties->pciDomain = pdevice->pci_info.domain;
+ properties->pciBus = pdevice->pci_info.bus;
+ properties->pciDevice = pdevice->pci_info.device;
+ properties->pciFunction = pdevice->pci_info.function;
+ break;
+  }
+
   case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
  VkPhysicalDevicePointClippingProperties *properties =
 (VkPhysicalDevicePointClippingProperties *) ext;
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index d4915c95013..1329ef74026 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -121,6 +121,7 @@ EXTENSIONS = [
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
 Extension('VK_EXT_global_priority',   1,
   'device->has_context_priority'),
+Extension('VK_EXT_pci_bus_info',  1, True),
 Extension('VK_EXT_shader_viewport_index_layer',   1, True),
 Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),
 Extension('VK_EXT_vertex_attribute_divisor',  3, True),
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5b4c286bf38..763dc9bb4ce 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -841,6 +841,12 @@ struct anv_physical_device {
 boolno_hw;
 charpath[20];
 const char *name;
+struct {
+   uint16_t domain;
+   uint8_t  bus;
+   uint8_t  device;
+   uint8_t  function;
+}   pci_info;
 struct gen_device_info  info;
 /** Amount of "GPU memory" we want to advertise
  *
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vulkan: Add the fuchsia headers

2018-10-14 Thread Lionel Landwerlin

Acked-by: Lionel Landwerlin 

On 13/10/2018 16:00, Jason Ekstrand wrote:

These were missing in the last couple of spec updates.
---
  include/vulkan/vulkan.h |  4 +++
  include/vulkan/vulkan_fuchsia.h | 58 +
  2 files changed, 62 insertions(+)
  create mode 100644 include/vulkan/vulkan_fuchsia.h

diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h
index d05c8490a5b..cf9d85ae597 100644
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -24,6 +24,10 @@
  #include "vulkan_android.h"
  #endif
  
+#ifdef VK_USE_PLATFORM_FUCHSIA

+#include 
+#include "vulkan_fuchsia.h"
+#endif
  
  #ifdef VK_USE_PLATFORM_IOS_MVK

  #include "vulkan_ios.h"
diff --git a/include/vulkan/vulkan_fuchsia.h b/include/vulkan/vulkan_fuchsia.h
new file mode 100644
index 000..e0ed5455adc
--- /dev/null
+++ b/include/vulkan/vulkan_fuchsia.h
@@ -0,0 +1,58 @@
+#ifndef VULKAN_FUCHSIA_H_
+#define VULKAN_FUCHSIA_H_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2015-2018 The Khronos Group Inc.
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+/*
+** This header is generated from the Khronos Vulkan XML API Registry.
+**
+*/
+
+
+#define VK_FUCHSIA_imagepipe_surface 1
+#define VK_FUCHSIA_IMAGEPIPE_SURFACE_SPEC_VERSION 1
+#define VK_FUCHSIA_IMAGEPIPE_SURFACE_EXTENSION_NAME 
"VK_FUCHSIA_imagepipe_surface"
+
+typedef VkFlags VkImagePipeSurfaceCreateFlagsFUCHSIA;
+
+typedef struct VkImagePipeSurfaceCreateInfoFUCHSIA {
+VkStructureType sType;
+const void* pNext;
+VkImagePipeSurfaceCreateFlagsFUCHSIAflags;
+zx_handle_t imagePipeHandle;
+} VkImagePipeSurfaceCreateInfoFUCHSIA;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkCreateImagePipeSurfaceFUCHSIA)(VkInstance 
instance, const VkImagePipeSurfaceCreateInfoFUCHSIA* pCreateInfo, const 
VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateImagePipeSurfaceFUCHSIA(
+VkInstance  instance,
+const VkImagePipeSurfaceCreateInfoFUCHSIA*  pCreateInfo,
+const VkAllocationCallbacks*pAllocator,
+VkSurfaceKHR*   pSurface);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev