date:20180314

Re: [Mesa-dev] [PATCH 1/5] radeonsi: remove fast color clear for single-sample buffers

2018-03-14 Thread Dieter Nützel


For the series (1-3)

Tested-by: Dieter Nützel 

Are these numbers OK?

Triangle,Radeon RX 580 Series (POLARIS10 / DRM 3.25.0 / 
4.16.0-rc1-1.g7262353-default+, LLVM 7.0.0),3.1 Mesa 18.1.0-devel 
(git-a8cc051d2e),1920,1080,YES,Off,5240,6,0,314471


Or should I retest without this series?

Dieter

Am 11.03.2018 19:11, schrieb Marek Olšák:

From: Marek Olšák 

This should improve the score for the GpuTest Triangle benchmark.
Vulkan doesn't use this either.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 -
 src/gallium/drivers/radeon/r600_texture.c | 11 +---
 src/gallium/drivers/radeonsi/si_clear.c   | 37 
++-

 src/gallium/drivers/radeonsi/si_state.c   |  6 -
 4 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 7941903..9701757 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -209,21 +209,20 @@ struct r600_cmask_info {
 struct r600_texture {
struct r600_resourceresource;

struct radeon_surf  surface;
uint64_tsize;
struct r600_texture *flushed_depth_texture;

/* Colorbuffer compression and fast clear. */
struct r600_fmask_info  fmask;
struct r600_cmask_info  cmask;
-   struct r600_resource*cmask_buffer;
uint64_tdcc_offset; /* 0 = disabled */
unsignedcb_color_info; /* fast clear enable bit 
*/
unsignedcolor_clear_value[2];
unsignedlast_msaa_resolve_target_micro_mode;
unsignednum_level0_transfers;

/* Depth buffer compression and fast clear. */
uint64_thtile_offset;
float   depth_clear_value;
 	uint16_t			dirty_level_mask; /* each bit says if that mipmap is 
compressed */

diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 125e7ef..03bc955 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen 
*sscreen,

 {
if (!rtex->cmask.size)
return;

assert(rtex->resource.b.b.nr_samples <= 1);

/* Disable CMASK. */
memset(&rtex->cmask, 0, sizeof(rtex->cmask));
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
rtex->dirty_level_mask = 0;
-
rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);

-   if (rtex->cmask_buffer != &rtex->resource)
-   r600_resource_reference(&rtex->cmask_buffer, NULL);
-
/* Notify all contexts about the change. */
p_atomic_inc(&sscreen->dirty_tex_counter);
p_atomic_inc(&sscreen->compressed_colortex_counter);
 }

 static bool r600_can_disable_dcc(struct r600_texture *rtex)
 {
/* We can't disable DCC if it can be written by another process. */
return rtex->dcc_offset &&
   (!rtex->resource.b.is_shared ||
@@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct
pipe_screen* screen,
  slice_size, whandle);
 }

 static void r600_texture_destroy(struct pipe_screen *screen,
 struct pipe_resource *ptex)
 {
struct r600_texture *rtex = (struct r600_texture*)ptex;
struct r600_resource *resource = &rtex->resource;

r600_texture_reference(&rtex->flushed_depth_texture, NULL);
-
-   if (rtex->cmask_buffer != &rtex->resource) {
-   r600_resource_reference(&rtex->cmask_buffer, NULL);
-   }
pb_reference(&resource->buf, NULL);
r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
FREE(rtex);
 }

 static const struct u_resource_vtbl r600_texture_vtbl;

 /* The number of samples can be specified independently of the 
texture. */

 void si_texture_get_fmask_info(struct si_screen *sscreen,
@@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen 
*screen,

rtex->db_compatible = true;

if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
r600_texture_allocate_htile(sscreen, rtex);
}
} else {
if (base->nr_samples > 1) {
if (!buf) {
r600_texture_allocate_fmask(sscreen, rtex);
r600_texture_allocate_cmask(sscreen, rtex);
-   rtex->cmask_buffer = &rtex->resource;
}
if (!rtex->fmask.size || !rtex->cmask.size

Re: [Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

2018-03-14 Thread Thomas Helland

Yup, most definitely. I just have one more thing to test before
sending out a V2. I've toyed around with arrays and sets and
stuff to see if there are better options than a linked list.
At least for now the answer is: "no, there isn't", but I'm gonna
test u_vector for this use later today to see if that is even better.
Expect new patch this evening CET.

2018-03-14 20:58 GMT+01:00 Dieter Nützel :
> Hello Thomas,
>
> is this useful even after '[Mesa-dev] [PATCH 0/2] V2: Use hash table cloning
> in copy propagation' landed?
>
> I've running both together with Dave's '[Mesa-dev] [PATCH] radv/winsys:
> replace bo list searchs with a hash table.' patch.
>
> Dieter
>
>
> Am 24.01.2018 08:33, schrieb Thomas Helland:
>>
>> 2018-01-21 23:58 GMT+01:00 Eric Anholt :
>>>
>>> Thomas Helland  writes:
>>>
 Also, allocate worklist_elem in groups of 20, to reduce the burden of
 allocation. Do not use rzalloc, as there is no need. This lets us drop
 the number of calls to ralloc from aproximately 10% of all calls to
 ralloc(130 000 calls), down to a mere 2000 calls to ralloc_array_size.
 This cuts the runtime of shader-db by 1%, while at the same time
 reducing the number of stalled cycles, executed cycles, and executed
 instructions by about 1 % as reported by perf. I did a five-run
 benchmark pre and post and got a statistical variance less than 0.1% pre
 and post. This was with i965's ir validation polluting the benchmark, so
 the numbers are even better in release builds.

 Performance change as found with perf-diff:
 4.74% -0.23%  libc-2.26.so[.] _int_malloc
 1.88% -0.21%  libc-2.26.so[.] malloc
 2.27% +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
 2.95% -0.12%  libc-2.26.so[.] _int_free
   +0.11%  libmesa_dri_drivers.so  [.] worklist_push
 1.22% -0.08%  libc-2.26.so[.] malloc_consolidate
 0.16% -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
 1.21% +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
 0.75% -0.05%  libc-2.26.so[.] cfree@GLIBC_2.2.5
 0.50% -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
 0.57% +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
 1.29% -0.04%  libmesa_dri_drivers.so  [.] unsafe_free
>>>
>>>
>>> I'm curious, since a NIR instruction worklist seems like a generally
>>> useful thing to have:
>>>
>>> Could nir_worklist.c keep the implementation of this?
>>>
>>> Also, I wonder if it wouldn't be even better to have a u_dynarray of
>>> instructions in the worklist, with push/pop on the end of the array, and
>>> a struct set tracking the instructions in the array to avoid
>>> double-adding.  I actually don't know if that would be better or not, so
>>> I'd be happy with the worklist management just moved to nir_worklist.c.
>>
>>
>> I'll look into this to see what I can do. nir_worklist.c at this time has
>> only
>> a block worklist. This numbers all the blocks, uses a bitset for checking
>> if the item is present, and uses an array with an index pointing to the
>> start of the queue of blocks in the buffer.
>>
>> The same scheme could be easily used for ssa-defs, as these are
>> also numbered. I actually did this for the VRP pass I wrote years ago.
>>
>> However, for instructions we do not have a way of numbering them,
>> so a different scheme would have to be used. A dynarray + set type
>> of thing, us you're suggesting, might get us where we want.
>> I'll see what I can come up with.
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 12/18] i965/blorp: Update the fast clear color address.

2018-03-14 Thread Jordan Justen

For the subject, instead of 'address', what about something like:

i965/blorp: Update the fast clear value buffer

On 2018-03-08 08:49:05, Rafael Antognolli wrote:
> On Gen10, whenever we do a fast clear, blorp will update the clear color
> state buffer for us, as long as we set the clear color address
> correctly.
> 
> However, on a hiz clear, if the surface is already on the fast clear
> state we skip the actual fast clear operation and, before gen10, only
> updated the miptree. On gen10+ we need to update the clear value state
> buffer too, since blorp will not be doing a fast clear and updating it
> for us.
> 
> v4:
>  - do not use clear_value_size in the for loop
>  - Get the address of the clear color from the aux buffer or the
>  clear_color_bo, depending on which one is available.
>  - let core blorp update the clear color, but also update it when we
>  skip a fast clear depth.
> 
> Signed-off-by: Rafael Antognolli 
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c | 11 +++
>  src/mesa/drivers/dri/i965/brw_clear.c | 22 ++
>  2 files changed, 33 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index ffd957fb866..914aeeace7a 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -185,6 +185,17 @@ blorp_surf_for_miptree(struct brw_context *brw,
>  
>surf->aux_addr.buffer = aux_buf->bo;
>surf->aux_addr.offset = aux_buf->offset;
> +
> +  if (devinfo->gen >= 10) {
> + /* If we have a CCS surface and clear_color_bo set, use that bo as
> +  * storage for the indirect clear color. Otherwise, use the extra
> +  * space at the end of the aux_buffer.
> +  */
> + surf->clear_color_addr = (struct blorp_address) {
> +.buffer = aux_buf->clear_color_bo,
> +.offset = aux_buf->clear_color_offset,
> + };
> +  }
> } else {
>surf->aux_addr = (struct blorp_address) {
>   .buffer = NULL,
> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
> b/src/mesa/drivers/dri/i965/brw_clear.c
> index 8aa83722ee9..63c0b241898 100644
> --- a/src/mesa/drivers/dri/i965/brw_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> @@ -108,6 +108,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
> struct intel_mipmap_tree *mt = depth_irb->mt;
> struct gl_renderbuffer_attachment *depth_att = 
> &fb->Attachment[BUFFER_DEPTH];
> const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   bool same_clear_value = true;
>  
> if (devinfo->gen < 6)
>return false;
> @@ -213,6 +214,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
>}
>  
>intel_miptree_set_depth_clear_value(ctx, mt, clear_value);
> +  same_clear_value = false;
> }
>  
> bool need_clear = false;
> @@ -232,6 +234,26 @@ brw_fast_clear_depth(struct gl_context *ctx)
> * state then simply updating the miptree fast clear value is 
> sufficient
> * to change their clear value.
> */
> +  if (devinfo->gen >= 10 && !same_clear_value) {
> + /* Before gen10, it was enough to just update the clear value in the
> +  * miptree. But on gen10+, we let blorp update the clear value state
> +  * buffer when doing a fast clear. Since we are skipping the fast
> +  * clear here, we need to update the clear color ourselves.
> +  */
> + uint32_t clear_offset = mt->hiz_buf->clear_color_offset;
> + union isl_color_value clear_color = { .f32 = { clear_value, } };
> +
> + /* We can't update the clear color while the hardware is still using
> +  * the previous one for a resolve or sampling from it. So make sure
> +  * that there's no pending commands at this point.
> +  */
> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);

I think we talked about potential performance concerns over the stall,
but we decided it was probably unlikely that an application would
clear the buffer multiple times with different values.

I just wanted to mention it in case anyone else has other opinions on
it.

11 - 12 Reviewed-by: Jordan Justen 

> + for (int i = 0; i < 4; i++) {
> +brw_store_data_imm32(brw, mt->hiz_buf->clear_color_bo,
> + clear_offset + i * 4, clear_color.u32[i]);
> + }
> + brw_emit_pipe_control_flush(brw, 
> PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +  }
>return true;
> }
>  
> -- 
> 2.14.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vbo: Correctly handle source arrays in vbo_split_copy.

2018-03-14 Thread Mathias Fröhlich

Hi,

On Wednesday, 14 March 2018 22:28:28 CET Brian Paul wrote:
> Reviewed-by: Brian Paul 

Thanks, pushed!

best
Mathias


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC] nir: Add a deref instruction type

2018-03-14 Thread Jason Ekstrand

This commit adds a new instruction type to NIR for handling derefs.
Nothing uses it yet but this adds the data structure as well as all of
the code to validate, print, clone, and [de]serialize them.

Cc: Rob Clark 
Cc: Connor Abbott 

---

This is not tested beyond compile testing.  I'm sending it out ahead so
that people can comment on the instruction data structure.  I think this
should handle all the SPIR-V use-cases fairly nicely as well as the
use-cases we have today.

 src/compiler/nir/nir.c   | 49 +++
 src/compiler/nir/nir.h   | 47 +-
 src/compiler/nir/nir_clone.c | 45 +
 src/compiler/nir/nir_print.c | 46 ++
 src/compiler/nir/nir_serialize.c | 85 
 src/compiler/nir/nir_validate.c  | 67 +++
 6 files changed, 338 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index a97b119..1023eb9 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -469,6 +469,26 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
return instr;
 }
 
+nir_deref_instr *
+nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
+{
+   nir_deref_instr *instr =
+  rzalloc_size(shader, sizeof(nir_deref_instr));
+
+   instr_init(&instr->instr, nir_instr_type_deref);
+
+   instr->deref_type = deref_type;
+   if (deref_type != nir_deref_type_var)
+  src_init(&instr->parent);
+
+   if (deref_type == nir_deref_type_array_indirect)
+  src_init(&instr->arr.indirect);
+
+   dest_init(&instr->dest);
+
+   return instr;
+}
+
 nir_jump_instr *
 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
 {
@@ -1198,6 +1218,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb 
cb, void *state)
 }
 
 static bool
+visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   return cb(&instr->dest, state);
+}
+
+static bool
 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
  void *state)
 {
@@ -1238,6 +1264,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb 
cb, void *state)
switch (instr->type) {
case nir_instr_type_alu:
   return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+   case nir_instr_type_deref:
+  return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
case nir_instr_type_intrinsic:
   return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
case nir_instr_type_tex:
@@ -1349,6 +1377,23 @@ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb 
cb, void *state)
 }
 
 static bool
+visit_deref_instr_src(nir_deref_instr *instr,
+  nir_foreach_src_cb cb, void *state)
+{
+   if (instr->deref_type != nir_deref_type_var) {
+  if (!visit_src(&instr->parent, cb, state))
+ return false;
+   }
+
+   if (instr->deref_type == nir_deref_type_array_indirect) {
+  if (!visit_src(&instr->arr.indirect, cb, state))
+ return false;
+   }
+
+   return true;
+}
+
+static bool
 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
 {
for (unsigned i = 0; i < instr->num_srcs; i++) {
@@ -1436,6 +1481,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, 
void *state)
   if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
  return false;
   break;
+   case nir_instr_type_deref:
+  if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
+ return false;
+  break;
case nir_instr_type_intrinsic:
   if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
  return false;
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 839d403..a40a3a0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -421,6 +421,7 @@ typedef struct nir_register {
 
 typedef enum {
nir_instr_type_alu,
+   nir_instr_type_deref,
nir_instr_type_call,
nir_instr_type_tex,
nir_instr_type_intrinsic,
@@ -888,7 +889,10 @@ bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const 
nir_alu_instr *alu2,
 typedef enum {
nir_deref_type_var,
nir_deref_type_array,
-   nir_deref_type_struct
+   nir_deref_type_struct,
+   nir_deref_type_array_direct,
+   nir_deref_type_array_indirect,
+   nir_deref_type_array_wildcard,
 } nir_deref_type;
 
 typedef struct nir_deref {
@@ -950,6 +954,42 @@ nir_deref_tail(nir_deref *deref)
 typedef struct {
nir_instr instr;
 
+   /** The type of this deref instruction */
+   nir_deref_type deref_type;
+
+   /** The mode of the underlying variable */
+   nir_variable_mode mode;
+
+   /** The dereferenced type of the resulting pointer value */
+   const struct glsl_type *type;
+
+   union {
+  /** Variable being dereferenced if deref_type is a deref_var */
+  nir_variable *var;
+
+  /** Parent deref if deref_type is not deref_var */
+  nir_src parent;
+   };
+
+   /** Addi

[Mesa-dev] [Bug 105464] Reading per-patch outputs in Tessellation Control Shader returns undefined values

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105464

--- Comment #6 from Clément Guérin  ---
I can confirm that the tessellation demo is broken without Philip's patch on
mesa 03e37ec6d7 and llvm-svn 327550 on R9 Fury.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.

2018-03-14 Thread Cherniak, Bruce

Code looks good.  I'm not able to test it, but it's isolated within #defined( 
__APPLE__).
Thanks for the contribution.

Reviewed-by: Bruce Cherniak  

> On Mar 14, 2018, at 6:19 PM, Jeremy Huddleston Sequoia  
> wrote:
> 
> From: Apple SWE 
> 
> The implementation for bootstrapping SWR on Darwin targets is based on the 
> Linux version.
> Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to 
> determine the
> physical identifiers, processor identifiers, core counts and thread-processor 
> affinities.
> 
> With this patch, it is possible to use SWR as an alternate renderer on OSX to 
> softpipe and
> llvmpipe.
> 
> Reviewed-by: Jeremy Huddleston Sequoia 
> Signed-off-by: Jeremy Huddleston Sequoia 
> ---
> .../drivers/swr/rasterizer/core/threads.cpp| 56 +-
> 1 file changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
> b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
> index 4d79168d2d..3eb20abcbf 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
> @@ -36,6 +36,11 @@
> #include 
> #endif
> 
> +#ifdef __APPLE__
> +#include 
> +#include 
> +#endif
> +
> #include "common/os.h"
> #include "context.h"
> #include "frontend.h"
> @@ -219,6 +224,56 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
> uint32_t& out_numThread
> 
> #elif defined(__APPLE__)
> 
> +auto numProcessors = 0;
> +auto numCores = 0;
> +auto numPhysicalIds = 0;
> +
> +int value;
> +size_t size = sizeof(value);
> +
> +int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
> +SWR_ASSERT(result == 0);
> +numPhysicalIds = value;
> +
> +result = sysctlbyname("hw.logicalcpu", &value, &size, NULL, 0);
> +SWR_ASSERT(result == 0);
> +numProcessors = value;
> +
> +result = sysctlbyname("hw.physicalcpu", &value, &size, NULL, 0);
> +SWR_ASSERT(result == 0);
> +numCores = value;
> +
> +out_nodes.resize(numPhysicalIds);
> +
> +for (auto physId = 0; physId < numPhysicalIds; ++physId)
> +{
> +auto &numaNode = out_nodes[physId];
> +auto procId = 0;
> +
> +numaNode.cores.resize(numCores);
> +
> +while (procId < numProcessors)
> +{
> +for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, 
> ++procId)
> +{
> +auto &core = numaNode.cores[coreId];
> +
> +core.procGroup = coreId;
> +core.threadIds.push_back(procId);
> +}
> +}
> +}
> +
> +out_numThreadsPerProcGroup = 0;
> +
> +for (auto &node : out_nodes)
> +{
> +for (auto &core : node.cores)
> +{
> +out_numThreadsPerProcGroup += core.threadIds.size();
> +}
> +}
> +
> #else
> 
> #error Unsupported platform
> @@ -253,7 +308,6 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
> uint32_t& out_numThread
> }
> }
> 
> -
> void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t 
> procGroupId = 0, bool bindProcGroup=false)
> {
> // Only bind threads when MAX_WORKER_THREADS isn't set.
> -- 
> 2.16.1 (Apple Git-102)
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/50] glsl: Add "built-in" functions to do eq(fp64, fp64)

2018-03-14 Thread Dave Airlie

> Is a mix really warranted here? Could just use
> return result && !(isaNaN || isbNaN) (since the other mix part is just
> false I think mix is a bit overkill, albeit it might not really make a
> difference).
> Actually I think it should be simplified, you don't need to check both
> vars for NaN (because if just one is NaN, the comparison(s) will be
> false anyway).
> so just return result && !isaNaN

This saves 2 instructions, granted that is a minor drop in a very large
ocean, but seems fine to just do it.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa 2/2] sched.h needs to be imported on Darwin/OSX targets.

2018-03-14 Thread Cherniak, Bruce

We don't currently build SWR on OS X, I've had difficulty building Mesa on OS X 
in general.
But, I'd be very interesting in learning.  Thanks for the patch.

Reviewed-by: Bruce Cherniak  

> On Mar 14, 2018, at 6:19 PM, Jeremy Huddleston Sequoia  
> wrote:
> 
> From: Apple SWE 
> 
> sched_yield is used but the include reference on Darwin is missing. This patch
> conditionally guards on Darwin/OSX to import sched.h first.
> 
> Reviewed-by: Jeremy Huddleston Sequoia 
> Signed-off-by: Jeremy Huddleston Sequoia 
> ---
> src/gallium/drivers/swr/swr_fence.cpp | 4 
> 1 file changed, 4 insertions(+)
> 
> diff --git a/src/gallium/drivers/swr/swr_fence.cpp 
> b/src/gallium/drivers/swr/swr_fence.cpp
> index 3005eb9aaa..b05ac8cec0 100644
> --- a/src/gallium/drivers/swr/swr_fence.cpp
> +++ b/src/gallium/drivers/swr/swr_fence.cpp
> @@ -29,6 +29,10 @@
> #include "swr_screen.h"
> #include "swr_fence.h"
> 
> +#ifdef __APPLE__
> +#include 
> +#endif
> +
> #if defined(PIPE_CC_MSVC) // portable thread yield
>#define sched_yield SwitchToThread
> #endif
> -- 
> 2.16.1 (Apple Git-102)
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Rob Clark

On Wed, Mar 14, 2018 at 8:44 PM, Connor Abbott  wrote:
> On Wed, Mar 14, 2018 at 6:07 PM, Rob Clark  wrote:
>> On Wed, Mar 14, 2018 at 7:42 PM, Connor Abbott  wrote:
>>> On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark  wrote:
 On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott  wrote:
> FWIW, the way I imagined doing this was something like:
>
> 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just
> make everything assert if the base deref isn't a nir_deref_var. This
> will be a bit of a flag-day, but also very mechanical. It'll also help
> us catch cases where we don't handle new-style derefs later.
> 2. Add a pass to flatten nir_deref_type_pointer into
> nir_deref_type_var if possible (i.e. if there's a clear chain up to
> the base variable without any phi nodes or whatever). This should
> always be possible for GLSL, as well as SPIR-V unless
> KHR_variable_pointers is enabled. We'll use this to avoid too much
> churn in drivers, passes that haven't been updated, etc. We might also
> want a pass to do the opposite, for converting passes where we don't
> want to have codepaths for both forms at once.

 btw, does it seem reasonable to assert that deref instruction src's
 are *always* in SSA form?  That seems reasonable to me since they will
 be mostly lowered away before the driver sees them (and I think makes
 some of the operation on them easier), and I can't think of any way
 for them *not* to be SSA (since they aren't real instructions).
>>>
>>> I think so... as long as you don't lower locals to regs before
>>> lowering everything to explicit address arithmetic. Although, with the
>>> physical memory model, it's just another source like any other so I'm
>>> not sure if there's a point.
>>>
>>
>> I think w/ phys memory model, we could lower away the deref's before
>> going to regs.  That *seems* like a reasonable requirement to me.
>>

 If so, my rough thoughts are a deref instruction chain (formed by ssa
 links to previous deref instruction) either start w/
 nir_deref_instr_pointer or nir_deref_instruction_var instructions at
 the head of the list (to start, I guess you could ignore adding the
 nir_deref_instr_pointer instruction and I could add that for
 clover/spirv work).  Followed by N links of struct/array deref_link
 instructions that have two ssa src's (one that is previous deref
 instruction and one that is array or struct member offset)
>>>
>>> Why would you need a separate nir_deref_instr_pointer? Do you want to
>>> put information like what type of pointer it is in there? Maybe we
>>> could just make that part of every nir_deref_instr instead?
>>
>> well, in clc you could hypotheticaly do something like:
>>
>>   __global struct Foo *f = (struct Foo *)0x1234;
>>
>> so you don't necessarily have a var at the start of your deref chain.
>>
>> More realistic example is:
>>
>>   ptr->a.b->c.d
>>
>> which is really two deref chains, first starting at a var, second
>> starting at an ssa ptr (which I think realistically ends up needing to
>> be a fat pointer to deal w/ cl's multiple address spaces[1]), with an
>> intermediate load_global or load_shared intrinsic in between.
>>
>> Anyways, don't want to derail the conversion to deref instructions too
>> much, but I do think we need something different for "var" vs "ptr"
>> (and the nice thing about deref chains is this should be easier to
>> add)
>
> My point was that you don't really need a distinction, as long as
> deref instructions can accept any old pointer. In your second example,
> there would be a struct deref, a load, and then a second struct deref
> using the result of the load. This is similar to how it's done in
> LLVM.
>

I guess that comes down to how we define what a pointer is..

if it is abstract enough to deal with both logical pointers (ie.
something that refers back to a var of some sort) or physical
pointers, I guess that can work.. at this point I'm not too picky
about the color of that bikeshed ;-)

fwiw, on the topic of abstracting deref chains in prep of changing things:

  https://github.com/freedreno/mesa/commits/deref-chains

I've gotten approx (I think) about half way thru going thru the
intr->variables[n]->var callsites, and will pick it up in the morning.
I think we need to do similar for nir_tex_instr (but that looks like
only a few call-sites).  But as long as we can assume deref chains are
in SSA I think some refactoring like that up front will make the
conversion easier (ie. accessors will be easy to convert over to new
world order).

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH shaderdb] run: -p option accepts hex format pci-id

2018-03-14 Thread Dongwon Kim

-p option now takes hex format pci-id of target architecture.

Signed-off-by: Dongwon Kim 
---
 run.c | 35 +--
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/run.c b/run.c
index 69e64c7..3db97ec 100644
--- a/run.c
+++ b/run.c
@@ -356,7 +356,8 @@ const struct platform platforms[] = {
 void print_usage(const char *prog_name)
 {
 fprintf(stderr,
-"Usage: %s [-d ] [-j ] [-o ] [-p 
] \n",
+"Usage: %s [-d ] [-j ] [-o ] [-p 
] \n",
 prog_name);
 }
 
@@ -456,6 +457,7 @@ main(int argc, char **argv)
 break;
 case 'p': {
 const struct platform *platform = NULL;
+
 for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++) {
 if (strcasecmp(optarg, platforms[i].name) == 0) {
 platform = platforms + i;
@@ -463,17 +465,30 @@ main(int argc, char **argv)
 }
 }
 
-if (platform == NULL) {
-fprintf(stderr, "Invalid platform.\nValid platforms are:");
-for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++)
-fprintf(stderr, " %s", platforms[i].name);
-fprintf(stderr, "\n");
-return -1;
+if (platform) {
+printf("### Compiling for %s(PCI_ID=%s) ###\n", platform->name,
+   platform->pci_id);
+setenv("INTEL_DEVID_OVERRIDE", platform->pci_id, 1);
+break;
 }
 
-printf("### Compiling for %s ###\n", platform->name);
-setenv("INTEL_DEVID_OVERRIDE", platform->pci_id, 1);
-break;
+if (optarg[0] == '0' && optarg[1] == 'x') {
+/* check if rest of given string indicates hex number */
+if (strtol(optarg, NULL, 16) > 0) {
+setenv("INTEL_DEVID_OVERRIDE", optarg, 1);
+printf("### Compiling for GEN arch with PCI_ID=%s ###\n",
+   optarg);
+break;
+}
+}
+
+fprintf(stderr, "Invalid platform.\nValid platforms are:");
+for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++)
+fprintf(stderr, " %s", platforms[i].name);
+
+fprintf(stderr, "\n");
+fprintf(stderr, "Or\nPCI-ID of other supported platform.\n");
+return -1;
 }
 case 'j':
 max_threads = atoi(optarg);
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Connor Abbott

On Wed, Mar 14, 2018 at 6:07 PM, Rob Clark  wrote:
> On Wed, Mar 14, 2018 at 7:42 PM, Connor Abbott  wrote:
>> On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark  wrote:
>>> On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott  wrote:
 FWIW, the way I imagined doing this was something like:

 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just
 make everything assert if the base deref isn't a nir_deref_var. This
 will be a bit of a flag-day, but also very mechanical. It'll also help
 us catch cases where we don't handle new-style derefs later.
 2. Add a pass to flatten nir_deref_type_pointer into
 nir_deref_type_var if possible (i.e. if there's a clear chain up to
 the base variable without any phi nodes or whatever). This should
 always be possible for GLSL, as well as SPIR-V unless
 KHR_variable_pointers is enabled. We'll use this to avoid too much
 churn in drivers, passes that haven't been updated, etc. We might also
 want a pass to do the opposite, for converting passes where we don't
 want to have codepaths for both forms at once.
>>>
>>> btw, does it seem reasonable to assert that deref instruction src's
>>> are *always* in SSA form?  That seems reasonable to me since they will
>>> be mostly lowered away before the driver sees them (and I think makes
>>> some of the operation on them easier), and I can't think of any way
>>> for them *not* to be SSA (since they aren't real instructions).
>>
>> I think so... as long as you don't lower locals to regs before
>> lowering everything to explicit address arithmetic. Although, with the
>> physical memory model, it's just another source like any other so I'm
>> not sure if there's a point.
>>
>
> I think w/ phys memory model, we could lower away the deref's before
> going to regs.  That *seems* like a reasonable requirement to me.
>
>>>
>>> If so, my rough thoughts are a deref instruction chain (formed by ssa
>>> links to previous deref instruction) either start w/
>>> nir_deref_instr_pointer or nir_deref_instruction_var instructions at
>>> the head of the list (to start, I guess you could ignore adding the
>>> nir_deref_instr_pointer instruction and I could add that for
>>> clover/spirv work).  Followed by N links of struct/array deref_link
>>> instructions that have two ssa src's (one that is previous deref
>>> instruction and one that is array or struct member offset)
>>
>> Why would you need a separate nir_deref_instr_pointer? Do you want to
>> put information like what type of pointer it is in there? Maybe we
>> could just make that part of every nir_deref_instr instead?
>
> well, in clc you could hypotheticaly do something like:
>
>   __global struct Foo *f = (struct Foo *)0x1234;
>
> so you don't necessarily have a var at the start of your deref chain.
>
> More realistic example is:
>
>   ptr->a.b->c.d
>
> which is really two deref chains, first starting at a var, second
> starting at an ssa ptr (which I think realistically ends up needing to
> be a fat pointer to deal w/ cl's multiple address spaces[1]), with an
> intermediate load_global or load_shared intrinsic in between.
>
> Anyways, don't want to derail the conversion to deref instructions too
> much, but I do think we need something different for "var" vs "ptr"
> (and the nice thing about deref chains is this should be easier to
> add)

My point was that you don't really need a distinction, as long as
deref instructions can accept any old pointer. In your second example,
there would be a struct deref, a load, and then a second struct deref
using the result of the load. This is similar to how it's done in
LLVM.

>
> BR,
> -R
>
> [1] kinda a different topic.. short version is I'm leaning towards a
> nir_deref_instr_pointer taking a two component vector as it's src so
> it can be lowered to an if/else chain to deal with different address
> spaces, and then let opt passes clean things up so driver ends up with
> either load/store_global or load/store_local, etc
>
>
>>
>>>
 3. Modify nir_lower_io to handle new-style derefs, especially for
 shared variables (i.e. KHR_variable_pointers for anv). We might have
 to modify a few other passes, too.
 4. Add the required deref lowering passes to all drivers.
 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs.
 At the very least, we should be using this to implement the shared
 variable bits of KHR_variable_pointers. If we add stride/offset
 annotations to nir_deref_instr for UBO's and SSBO's, then we might
 also be able to get rid of the vtn_deref stuff entirely (although I'm
 not sure if that should be a goal right now).
>>>
>>> I think I might try to prototype something where we convert vtn over
>>> to new-style deref instructions, plus a pass to lower to old style
>>> deref chains.  It partly comes down to how quickly I can finish a
>>> couple other things, and how much I can't sleep on a long-ass flight.
>>> (I guess even if throw-away

Re: [Mesa-dev] [PATCH v4 10/18] i965/miptree: Add new BO for clear color.

2018-03-14 Thread Jordan Justen

What about a subject like this?

i965/miptree: Add new clear color BO for winsys aux buffers

On 2018-03-08 08:49:03, Rafael Antognolli wrote:
> Add an extra BO to store clear color when we receive the aux buffer from
> the window system. Since we have no control over the aux buffer size in
> this case, we need the new BO to store only the clear color.
> 
> Signed-off-by: Rafael Antognolli 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 22d0ae89367..a8b89d9170a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -969,6 +969,23 @@ create_ccs_buf_for_image(struct brw_context *brw,
>return false;
> }
>  
> +   /* On gen10+ we start using an extra space in the aux buffer to store the
> +* indirect clear color. However, if we imported an image from the window
> +* system with CCS, we don't have the extra space at the end of the aux
> +* buffer. So create a new bo here that will store that clear color.
> +*/
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   if (devinfo->gen >= 10) {
> +  mt->mcs_buf->clear_color_bo =
> + brw_bo_alloc(brw->bufmgr, "clear_color_bo",
> +  brw->isl_dev.ss.clear_color_state_size, 64);
> +  if (!mt->mcs_buf->clear_color_bo) {
> + free(mt->mcs_buf);
> + mt->mcs_buf = NULL;
> + return false;
> +  }
> +   }
> +
> mt->mcs_buf->bo = image->bo;
> brw_bo_reference(image->bo);
>  
> @@ -1211,6 +1228,7 @@ intel_miptree_aux_buffer_free(struct 
> intel_miptree_aux_buffer *aux_buf)
>return;
>  
> brw_bo_unreference(aux_buf->bo);
> +   brw_bo_unreference(aux_buf->clear_color_bo);

Should this be added in the previous patch?

Should it only happen when gen >= 10? I guess it will be null for gen
< 10, so this will be a no-op.

-Jordan

>  
> free(aux_buf);
>  }
> -- 
> 2.14.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.

2018-03-14 Thread Jeremy Sequoia



Sent from my iPhone...

> On Mar 14, 2018, at 16:41, Matt Turner  wrote:
> 
> Subject should have a swr prefix or similar.
> 
> On Wed, Mar 14, 2018 at 4:19 PM, Jeremy Huddleston Sequoia
>  wrote:
>> From: Apple SWE 
> 
> Explain?

I didn’t author it.  Author is an Apple Employee in SWE who does not want 
attribution.

> 
>> 
>> The implementation for bootstrapping SWR on Darwin targets is based on the 
>> Linux version.
>> Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to 
>> determine the
>> physical identifiers, processor identifiers, core counts and 
>> thread-processor affinities.
>> 
>> With this patch, it is possible to use SWR as an alternate renderer on OSX 
>> to softpipe and
>> llvmpipe.
> 
> All of these look like they're too long to fix in 80 columns in git show.
> 
>> Reviewed-by: Jeremy Huddleston Sequoia 
> 
> I'm guessing you're just pushing code someone else wrote...
> 
>> Signed-off-by: Jeremy Huddleston Sequoia 
> 
> We don't  have a DCO, so Signed-off-by has no meaning. Worse, in the
> case we added a DCO in the future, all of the stupid Signed-off-by's
> people have been cargo culting for years would confuse everything.
> Please stop doing it.

Ok.  I’ll remove it.

> 
> Please don't push code to maintained drivers without going through the
> mailing list. I feel like I shouldn't have to say that.

In the past there hasn’t been much care about code in __APPLE__, so I figured 
it want that big of a deal, but if you are interested in reviewing these 
changes, that’s great to hear.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] anv/pipeline: set active_stages early

2018-03-14 Thread Caio Marcelo de Oliveira Filho

Since the intermediate states of active_stages are not used,
i.e. active_stages is read only after all stages were set into it,
just set its value before compiling the shaders.

This will allow to conditionally run certain passes based on what
other shaders are being used, e.g. a certain pass might only be
applicable to the vertex shader if there's no geometry or tessellation
shader being used.
---
 src/intel/vulkan/anv_pipeline.c  | 12 +---
 src/intel/vulkan/genX_pipeline.c |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index cb34f3be77..fba0039240 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -501,7 +501,6 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline 
*pipeline,
 struct anv_shader_bin *shader)
 {
pipeline->shaders[stage] = shader;
-   pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
 }
 
 static VkResult
@@ -1334,11 +1333,18 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {};
struct anv_shader_module *modules[MESA_SHADER_STAGES] = {};
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
-  gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
+  VkShaderStageFlagBits vk_stage = pCreateInfo->pStages[i].stage;
+  gl_shader_stage stage = ffs(vk_stage) - 1;
   pStages[stage] = &pCreateInfo->pStages[i];
   modules[stage] = anv_shader_module_from_handle(pStages[stage]->module);
+  pipeline->active_stages |= vk_stage;
}
 
+   if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
+  pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+
+   assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
+
if (modules[MESA_SHADER_VERTEX]) {
   result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
modules[MESA_SHADER_VERTEX],
@@ -1378,7 +1384,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
  goto compile_fail;
}
 
-   assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
+   assert(pipeline->shaders[MESA_SHADER_VERTEX]);
 
anv_pipeline_setup_l3_config(pipeline, false);
 
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 9c08bc2033..eb2d414735 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1787,6 +1787,7 @@ compute_pipeline_create(
pipeline->needs_data_cache = false;
 
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
+   pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
ANV_FROM_HANDLE(anv_shader_module, module,  pCreateInfo->stage.module);
result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
 pCreateInfo->stage.pName,
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] anv/pipeline: fail if tcs/tes compile fail

2018-03-14 Thread Caio Marcelo de Oliveira Filho

---
 src/intel/vulkan/anv_pipeline.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 9cfd16df2a..cb34f3be77 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1349,13 +1349,15 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
}
 
if (modules[MESA_SHADER_TESS_EVAL]) {
-  anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo,
-   modules[MESA_SHADER_TESS_CTRL],
-   pStages[MESA_SHADER_TESS_CTRL]->pName,
-   
pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
-   modules[MESA_SHADER_TESS_EVAL],
-   pStages[MESA_SHADER_TESS_EVAL]->pName,
-   
pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo);
+  result = anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo,
+modules[MESA_SHADER_TESS_CTRL],
+
pStages[MESA_SHADER_TESS_CTRL]->pName,
+
pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
+modules[MESA_SHADER_TESS_EVAL],
+
pStages[MESA_SHADER_TESS_EVAL]->pName,
+
pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo);
+  if (result != VK_SUCCESS)
+ goto compile_fail;
}
 
if (modules[MESA_SHADER_GEOMETRY]) {
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 08/18] intel/blorp: Update clear color state buffer during fast clears.

2018-03-14 Thread Jordan Justen

On 2018-03-08 08:49:01, Rafael Antognolli wrote:
> We always want to update the fast clear color during a fast clear on
> i965. On anv, we doing that before a resolve, but by adding support to

s/we/we are/ ?

Patches 5 - 9:
Reviewed-by: Jordan Justen 

> blorp, we can do a similar thing and update it during a fast clear
> instead.
> 
> The goal is to remove some code from anv that does such update, and
> centralize everything in blorp, hopefully removing a lot of code
> duplication. It also allows us to have a similar behavior on gen < 9 and
> gen >= 10.
> 
> Signed-off-by: Rafael Antognolli 
> ---
>  src/intel/blorp/blorp_genX_exec.h | 48 
> +++
>  1 file changed, 48 insertions(+)
> 
> diff --git a/src/intel/blorp/blorp_genX_exec.h 
> b/src/intel/blorp/blorp_genX_exec.h
> index c68767a2faa..eef6ed8291a 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -1642,6 +1642,51 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
>  }
>  #endif
>  
> +static void
> +blorp_update_clear_color(struct blorp_batch *batch,
> + const struct brw_blorp_surface_info *info,
> + enum isl_aux_op op)
> +{
> +   if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) {
> +#if GEN_GEN >= 9
> +  for (int i = 0; i < 4; i++) {
> + blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
> +sdi.Address = info->clear_color_addr;
> +sdi.Address.offset += i * 4;
> +sdi.ImmediateData = info->clear_color.u32[i];
> + }
> +  }
> +#elif GEN_GEN >= 7
> +  blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = info->clear_color_addr;
> + sdi.ImmediateData = ISL_CHANNEL_SELECT_RED   << 25 |
> + ISL_CHANNEL_SELECT_GREEN << 22 |
> + ISL_CHANNEL_SELECT_BLUE  << 19 |
> + ISL_CHANNEL_SELECT_ALPHA << 16;
> + if (isl_format_has_int_channel(info->view.format)) {
> +for (unsigned i = 0; i < 4; i++) {
> +   assert(info->clear_color.u32[i] == 0 ||
> +  info->clear_color.u32[i] == 1);
> +}
> +sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31;
> +sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30;
> +sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29;
> +sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28;
> + } else {
> +for (unsigned i = 0; i < 4; i++) {
> +   assert(info->clear_color.f32[i] == 0.0f ||
> +  info->clear_color.f32[i] == 1.0f);
> +}
> +sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31;
> +sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30;
> +sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29;
> +sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28;
> + }
> +  }
> +#endif
> +   }
> +}
> +
>  /**
>   * \brief Execute a blit or render pass operation.
>   *
> @@ -1654,6 +1699,9 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
>  static void
>  blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
>  {
> +   blorp_update_clear_color(batch, ¶ms->dst, params->fast_clear_op);
> +   blorp_update_clear_color(batch, ¶ms->depth, params->hiz_op);
> +
>  #if GEN_GEN >= 8
> if (params->hiz_op != ISL_AUX_OP_NONE) {
>blorp_emit_gen8_hiz_op(batch, params);
> -- 
> 2.14.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.

2018-03-14 Thread Matt Turner

On Wed, Mar 14, 2018 at 5:07 PM, Jeremy Sequoia  wrote:
>> Please don't push code to maintained drivers without going through the
>> mailing list. I feel like I shouldn't have to say that.
>
> In the past there hasn’t been much care about code in __APPLE__, so I figured 
> it want that big of a deal, but if you are interested in reviewing these 
> changes, that’s great to hear.

I'd be interested if it was for my driver, and I assume other
maintainers feel the same way. I'd at least like to check for things
like whether the added code is inside a function. :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] configure: remove unneeded XCB_REQUIRED

2018-03-14 Thread Andres Gomez

On Thu, 2018-03-15 at 09:37 +1000, Dave Airlie wrote:
> On 15 March 2018 at 09:29, Andres Gomez  wrote:
> > It is only used for dri3 and xcb-dri3 and xcb-present were already
> > mandating the minimal version, which is incoherent with the xcb one.
> 
> We are in the middle of a thread discussing this area already,
> probably don't need
> this patch at this stage. Let's solve the problem first.

Ouch!

I did really miss the thread. Thanks for the heads up. I drop this
patch and will follow the thread.

Thanks!

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Rob Clark

On Wed, Mar 14, 2018 at 7:42 PM, Connor Abbott  wrote:
> On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark  wrote:
>> On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott  wrote:
>>> FWIW, the way I imagined doing this was something like:
>>>
>>> 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just
>>> make everything assert if the base deref isn't a nir_deref_var. This
>>> will be a bit of a flag-day, but also very mechanical. It'll also help
>>> us catch cases where we don't handle new-style derefs later.
>>> 2. Add a pass to flatten nir_deref_type_pointer into
>>> nir_deref_type_var if possible (i.e. if there's a clear chain up to
>>> the base variable without any phi nodes or whatever). This should
>>> always be possible for GLSL, as well as SPIR-V unless
>>> KHR_variable_pointers is enabled. We'll use this to avoid too much
>>> churn in drivers, passes that haven't been updated, etc. We might also
>>> want a pass to do the opposite, for converting passes where we don't
>>> want to have codepaths for both forms at once.
>>
>> btw, does it seem reasonable to assert that deref instruction src's
>> are *always* in SSA form?  That seems reasonable to me since they will
>> be mostly lowered away before the driver sees them (and I think makes
>> some of the operation on them easier), and I can't think of any way
>> for them *not* to be SSA (since they aren't real instructions).
>
> I think so... as long as you don't lower locals to regs before
> lowering everything to explicit address arithmetic. Although, with the
> physical memory model, it's just another source like any other so I'm
> not sure if there's a point.
>

I think w/ phys memory model, we could lower away the deref's before
going to regs.  That *seems* like a reasonable requirement to me.

>>
>> If so, my rough thoughts are a deref instruction chain (formed by ssa
>> links to previous deref instruction) either start w/
>> nir_deref_instr_pointer or nir_deref_instruction_var instructions at
>> the head of the list (to start, I guess you could ignore adding the
>> nir_deref_instr_pointer instruction and I could add that for
>> clover/spirv work).  Followed by N links of struct/array deref_link
>> instructions that have two ssa src's (one that is previous deref
>> instruction and one that is array or struct member offset)
>
> Why would you need a separate nir_deref_instr_pointer? Do you want to
> put information like what type of pointer it is in there? Maybe we
> could just make that part of every nir_deref_instr instead?

well, in clc you could hypotheticaly do something like:

  __global struct Foo *f = (struct Foo *)0x1234;

so you don't necessarily have a var at the start of your deref chain.

More realistic example is:

  ptr->a.b->c.d

which is really two deref chains, first starting at a var, second
starting at an ssa ptr (which I think realistically ends up needing to
be a fat pointer to deal w/ cl's multiple address spaces[1]), with an
intermediate load_global or load_shared intrinsic in between.

Anyways, don't want to derail the conversion to deref instructions too
much, but I do think we need something different for "var" vs "ptr"
(and the nice thing about deref chains is this should be easier to
add)

BR,
-R

[1] kinda a different topic.. short version is I'm leaning towards a
nir_deref_instr_pointer taking a two component vector as it's src so
it can be lowered to an if/else chain to deal with different address
spaces, and then let opt passes clean things up so driver ends up with
either load/store_global or load/store_local, etc


>
>>
>>> 3. Modify nir_lower_io to handle new-style derefs, especially for
>>> shared variables (i.e. KHR_variable_pointers for anv). We might have
>>> to modify a few other passes, too.
>>> 4. Add the required deref lowering passes to all drivers.
>>> 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs.
>>> At the very least, we should be using this to implement the shared
>>> variable bits of KHR_variable_pointers. If we add stride/offset
>>> annotations to nir_deref_instr for UBO's and SSBO's, then we might
>>> also be able to get rid of the vtn_deref stuff entirely (although I'm
>>> not sure if that should be a goal right now).
>>
>> I think I might try to prototype something where we convert vtn over
>> to new-style deref instructions, plus a pass to lower to old style
>> deref chains.  It partly comes down to how quickly I can finish a
>> couple other things, and how much I can't sleep on a long-ass flight.
>> (I guess even if throw-away, if it gives some idea of what to do or
>> what not to do it might be useful?)
>>
>> Anyways, as far as decoupling this from backend drivers, I think a
>> nir_intr_get_var(intr, n) instruction to replace open coded
>> intr->variables[0]->var could go a long way.  (In the new world this
>> would follow ssa links to previous deref instruction to find the
>> nir_deref_instruction_var.)  I'll try typing this up in a few minutes.
>>
>>> At this point

Re: [Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Connor Abbott

On Wed, Mar 14, 2018 at 5:05 PM, Rob Clark  wrote:
> On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott  wrote:
>> FWIW, the way I imagined doing this was something like:
>>
>> 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just
>> make everything assert if the base deref isn't a nir_deref_var. This
>> will be a bit of a flag-day, but also very mechanical. It'll also help
>> us catch cases where we don't handle new-style derefs later.
>> 2. Add a pass to flatten nir_deref_type_pointer into
>> nir_deref_type_var if possible (i.e. if there's a clear chain up to
>> the base variable without any phi nodes or whatever). This should
>> always be possible for GLSL, as well as SPIR-V unless
>> KHR_variable_pointers is enabled. We'll use this to avoid too much
>> churn in drivers, passes that haven't been updated, etc. We might also
>> want a pass to do the opposite, for converting passes where we don't
>> want to have codepaths for both forms at once.
>
> btw, does it seem reasonable to assert that deref instruction src's
> are *always* in SSA form?  That seems reasonable to me since they will
> be mostly lowered away before the driver sees them (and I think makes
> some of the operation on them easier), and I can't think of any way
> for them *not* to be SSA (since they aren't real instructions).

I think so... as long as you don't lower locals to regs before
lowering everything to explicit address arithmetic. Although, with the
physical memory model, it's just another source like any other so I'm
not sure if there's a point.

>
> If so, my rough thoughts are a deref instruction chain (formed by ssa
> links to previous deref instruction) either start w/
> nir_deref_instr_pointer or nir_deref_instruction_var instructions at
> the head of the list (to start, I guess you could ignore adding the
> nir_deref_instr_pointer instruction and I could add that for
> clover/spirv work).  Followed by N links of struct/array deref_link
> instructions that have two ssa src's (one that is previous deref
> instruction and one that is array or struct member offset)

Why would you need a separate nir_deref_instr_pointer? Do you want to
put information like what type of pointer it is in there? Maybe we
could just make that part of every nir_deref_instr instead?

>
>> 3. Modify nir_lower_io to handle new-style derefs, especially for
>> shared variables (i.e. KHR_variable_pointers for anv). We might have
>> to modify a few other passes, too.
>> 4. Add the required deref lowering passes to all drivers.
>> 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs.
>> At the very least, we should be using this to implement the shared
>> variable bits of KHR_variable_pointers. If we add stride/offset
>> annotations to nir_deref_instr for UBO's and SSBO's, then we might
>> also be able to get rid of the vtn_deref stuff entirely (although I'm
>> not sure if that should be a goal right now).
>
> I think I might try to prototype something where we convert vtn over
> to new-style deref instructions, plus a pass to lower to old style
> deref chains.  It partly comes down to how quickly I can finish a
> couple other things, and how much I can't sleep on a long-ass flight.
> (I guess even if throw-away, if it gives some idea of what to do or
> what not to do it might be useful?)
>
> Anyways, as far as decoupling this from backend drivers, I think a
> nir_intr_get_var(intr, n) instruction to replace open coded
> intr->variables[0]->var could go a long way.  (In the new world this
> would follow ssa links to previous deref instruction to find the
> nir_deref_instruction_var.)  I'll try typing this up in a few minutes.
>
>> At this point, we can fix things up and move everything else over to
>> new-style derefs at our leisure. Also, it should now be pretty
>> straightforward to add support for shared variable pointers to radv
>> without lowering everything to offsets up-front, which is nice.
>>
>> Connor
>>
>>
>> On Wed, Mar 14, 2018 at 2:32 PM, Jason Ekstrand  wrote:
>>> All,
>>>
>>> Connor and I along with several others have been discussing for a while
>>> changing the way NIR dereferences work.  In particular, adding a new
>>> nir_deref_instr type where the first one in the chain takes a variable and
>>> is followed by a series of instructions which take another deref instruction
>>> and do an array or structure dereference on it.
>>>
>>> Much of the motivation for this is some of the upcoming SPIR-V stuff where
>>> we have more real pointers and deref chains don't really work anymore.  It
>>> will also allow for things such as CSE of common derefs which could make
>>> analysis easier.  This is similar to what LLVM does and it's working very
>>> well for them.
>>>
>>> The reason for this e-mail is that this is going to be a flag-day change.
>>> We've been talking about it for a while but this is going to be a major and
>>> fairly painful change in the short term so no one has actually done it.
>>> It's time we finally j

Re: [Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.

2018-03-14 Thread Matt Turner

Subject should have a swr prefix or similar.

On Wed, Mar 14, 2018 at 4:19 PM, Jeremy Huddleston Sequoia
 wrote:
> From: Apple SWE 

Explain?

>
> The implementation for bootstrapping SWR on Darwin targets is based on the 
> Linux version.
> Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to 
> determine the
> physical identifiers, processor identifiers, core counts and thread-processor 
> affinities.
>
> With this patch, it is possible to use SWR as an alternate renderer on OSX to 
> softpipe and
> llvmpipe.

All of these look like they're too long to fix in 80 columns in git show.

> Reviewed-by: Jeremy Huddleston Sequoia 

I'm guessing you're just pushing code someone else wrote...

> Signed-off-by: Jeremy Huddleston Sequoia 

We don't  have a DCO, so Signed-off-by has no meaning. Worse, in the
case we added a DCO in the future, all of the stupid Signed-off-by's
people have been cargo culting for years would confuse everything.
Please stop doing it.

Please don't push code to maintained drivers without going through the
mailing list. I feel like I shouldn't have to say that.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] configure: remove unneeded XCB_REQUIRED

2018-03-14 Thread Dave Airlie

On 15 March 2018 at 09:29, Andres Gomez  wrote:
> It is only used for dri3 and xcb-dri3 and xcb-present were already
> mandating the minimal version, which is incoherent with the xcb one.

We are in the middle of a thread discussing this area already,
probably don't need
this patch at this stage. Let's solve the problem first.

Dave.
>
> This also makes configure.ac more homogeneous with other modules, like
> dri2 or glx, which also need xcb but get the minimal version from
> xcb-dri2 or xcb-glx, respectively.
>
> Cc: Emil Velikov 
> Cc: Eric Engestrom 
> Signed-off-by: Andres Gomez 
> ---
>  configure.ac | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 621dc328d90..4392a427699 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -90,7 +90,6 @@ LIBVA_REQUIRED=0.38.0
>  VDPAU_REQUIRED=1.1
>  WAYLAND_REQUIRED=1.11
>  WAYLAND_PROTOCOLS_REQUIRED=1.8
> -XCB_REQUIRED=1.9.3
>  XCBDRI2_REQUIRED=1.8
>  XCBDRI3_REQUIRED=1.13
>  XCBGLX_REQUIRED=1.8.1
> @@ -1850,7 +1849,7 @@ fi
>  if test x"$enable_dri3" = xyes; then
>  DEFINES="$DEFINES -DHAVE_DRI3"
>
> -dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 >= $XCBDRI3_REQUIRED 
> xcb-xfixes xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= 
> $XSHMFENCE_REQUIRED"
> +dri3_modules="x11-xcb xcb xcb-dri3 >= $XCBDRI3_REQUIRED xcb-xfixes 
> xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
>  PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
>  fi
>
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH mesa 2/2] sched.h needs to be imported on Darwin/OSX targets.

2018-03-14 Thread Jeremy Huddleston Sequoia

From: Apple SWE 

sched_yield is used but the include reference on Darwin is missing. This patch
conditionally guards on Darwin/OSX to import sched.h first.

Reviewed-by: Jeremy Huddleston Sequoia 
Signed-off-by: Jeremy Huddleston Sequoia 
---
 src/gallium/drivers/swr/swr_fence.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/swr/swr_fence.cpp 
b/src/gallium/drivers/swr/swr_fence.cpp
index 3005eb9aaa..b05ac8cec0 100644
--- a/src/gallium/drivers/swr/swr_fence.cpp
+++ b/src/gallium/drivers/swr/swr_fence.cpp
@@ -29,6 +29,10 @@
 #include "swr_screen.h"
 #include "swr_fence.h"
 
+#ifdef __APPLE__
+#include 
+#endif
+
 #if defined(PIPE_CC_MSVC) // portable thread yield
#define sched_yield SwitchToThread
 #endif
-- 
2.16.1 (Apple Git-102)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH mesa 1/2] Add processor topology calculation implementation for Darwin/OSX targets.

2018-03-14 Thread Jeremy Huddleston Sequoia

From: Apple SWE 

The implementation for bootstrapping SWR on Darwin targets is based on the 
Linux version.
Instead of reading the output of /proc/cpuinfo, sysctlbyname is used to 
determine the
physical identifiers, processor identifiers, core counts and thread-processor 
affinities.

With this patch, it is possible to use SWR as an alternate renderer on OSX to 
softpipe and
llvmpipe.

Reviewed-by: Jeremy Huddleston Sequoia 
Signed-off-by: Jeremy Huddleston Sequoia 
---
 .../drivers/swr/rasterizer/core/threads.cpp| 56 +-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 4d79168d2d..3eb20abcbf 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -36,6 +36,11 @@
 #include 
 #endif
 
+#ifdef __APPLE__
+#include 
+#include 
+#endif
+
 #include "common/os.h"
 #include "context.h"
 #include "frontend.h"
@@ -219,6 +224,56 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
uint32_t& out_numThread
 
 #elif defined(__APPLE__)
 
+auto numProcessors = 0;
+auto numCores = 0;
+auto numPhysicalIds = 0;
+
+int value;
+size_t size = sizeof(value);
+
+int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
+SWR_ASSERT(result == 0);
+numPhysicalIds = value;
+
+result = sysctlbyname("hw.logicalcpu", &value, &size, NULL, 0);
+SWR_ASSERT(result == 0);
+numProcessors = value;
+
+result = sysctlbyname("hw.physicalcpu", &value, &size, NULL, 0);
+SWR_ASSERT(result == 0);
+numCores = value;
+
+out_nodes.resize(numPhysicalIds);
+
+for (auto physId = 0; physId < numPhysicalIds; ++physId)
+{
+auto &numaNode = out_nodes[physId];
+auto procId = 0;
+
+numaNode.cores.resize(numCores);
+
+while (procId < numProcessors)
+{
+for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, 
++procId)
+{
+auto &core = numaNode.cores[coreId];
+
+core.procGroup = coreId;
+core.threadIds.push_back(procId);
+}
+}
+}
+
+out_numThreadsPerProcGroup = 0;
+
+for (auto &node : out_nodes)
+{
+for (auto &core : node.cores)
+{
+out_numThreadsPerProcGroup += core.threadIds.size();
+}
+}
+
 #else
 
 #error Unsupported platform
@@ -253,7 +308,6 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
uint32_t& out_numThread
 }
 }
 
-
 void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId 
= 0, bool bindProcGroup=false)
 {
 // Only bind threads when MAX_WORKER_THREADS isn't set.
-- 
2.16.1 (Apple Git-102)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] configure: remove unneeded XCB_REQUIRED

2018-03-14 Thread Andres Gomez

It is only used for dri3 and xcb-dri3 and xcb-present were already
mandating the minimal version, which is incoherent with the xcb one.

This also makes configure.ac more homogeneous with other modules, like
dri2 or glx, which also need xcb but get the minimal version from
xcb-dri2 or xcb-glx, respectively.

Cc: Emil Velikov 
Cc: Eric Engestrom 
Signed-off-by: Andres Gomez 
---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 621dc328d90..4392a427699 100644
--- a/configure.ac
+++ b/configure.ac
@@ -90,7 +90,6 @@ LIBVA_REQUIRED=0.38.0
 VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.11
 WAYLAND_PROTOCOLS_REQUIRED=1.8
-XCB_REQUIRED=1.9.3
 XCBDRI2_REQUIRED=1.8
 XCBDRI3_REQUIRED=1.13
 XCBGLX_REQUIRED=1.8.1
@@ -1850,7 +1849,7 @@ fi
 if test x"$enable_dri3" = xyes; then
 DEFINES="$DEFINES -DHAVE_DRI3"
 
-dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 >= $XCBDRI3_REQUIRED 
xcb-xfixes xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= 
$XSHMFENCE_REQUIRED"
+dri3_modules="x11-xcb xcb xcb-dri3 >= $XCBDRI3_REQUIRED xcb-xfixes 
xcb-present >= $XCBPRESENT_REQUIRED xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
 PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
 fi
 
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] anv/pipeline: don't pass constant view index in multiview

2018-03-14 Thread Caio Marcelo de Oliveira Filho

On Tue, Feb 27, 2018 at 12:13:52PM -0800, Jason Ekstrand wrote:
> > diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c
> > b/src/intel/vulkan/anv_nir_lower_multiview.c
> > index d2aefdee62..365a70d757 100644
> > --- a/src/intel/vulkan/anv_nir_lower_multiview.c
> > +++ b/src/intel/vulkan/anv_nir_lower_multiview.c
> > @@ -72,7 +72,8 @@ build_view_index(struct lower_multiview_state *state)
> >b->cursor = nir_before_block(nir_start_block(b->impl));
> >
> >assert(state->view_mask != 0);
> > -  if (0 && _mesa_bitcount(state->view_mask) == 1) {
> > +  if (_mesa_bitcount(state->view_mask) == 1) {
> >
> 
> Yes, I think it's probably safe to turn this on now.  Originally, I had it
> commented out because I was afraid of not getting enough test coverage.
> For all I know, we still aren't getting enough test coverage but I think
> we've proven by now that the calculations below work.

There are now CTS tests covering multiview, including cases with a
single view set in the view_mask.


> > +  /* Unless there is only one possible view index (that would be set
> > +   * directly), pass it to the next stage. */
> >
> 
> With multi-line comments, we usually put the "*/" on its own line.

Just sent v2 fixing this.


Thanks,
Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105442] Hang when running nine ff lighting shader with radeonsi

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105442

Axel Davy  changed:

   What|Removed |Added

Summary|Hang when running nine ff   |Hang when running nine ff
   |lighting shader |lighting shader with
   ||radeonsi

--- Comment #1 from Axel Davy  ---
I tested with a slightly older version of llvm I had locally that was based on
llvm git from june, and the same behaviour occurs. It could be a radeonsi bug
introduced in the llvm asm generation or an llvm bug uncovered by a change in
that asm.

I filled a bug on the llvm side as it's likely involved:
https://bugs.llvm.org/show_bug.cgi?id=36704

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/2] anv/pipeline: use less instructions for multiview

2018-03-14 Thread Caio Marcelo de Oliveira Filho

The view_index is encoded in the remainder of dividing instance id by
the number of views in the view mask (n). In the general case (handled
by the else clause), there is a need to map from 0..n-1 into the
number of the view being masked. For that a map is encoded.

In the case only the first n bits in the mask are set, the mapping is
trivial, 0..n-1 already represent what view is being referred to.

That case was in the original patch that added
anv_nir_lower_multiview.c but disabled.
---
 src/intel/vulkan/anv_nir_lower_multiview.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c 
b/src/intel/vulkan/anv_nir_lower_multiview.c
index 88e6f9af87..d2aefdee62 100644
--- a/src/intel/vulkan/anv_nir_lower_multiview.c
+++ b/src/intel/vulkan/anv_nir_lower_multiview.c
@@ -86,7 +86,7 @@ build_view_index(struct lower_multiview_state *state)
 nir_umod(b, nir_load_instance_id(b),
 nir_imm_int(b, _mesa_bitcount(state->view_mask)));
 
- if (0 && util_is_power_of_two(state->view_mask + 1)) {
+ if (util_is_power_of_two(state->view_mask + 1)) {
 /* If we have a full view mask, then compacted is what we want */
 state->view_index = compacted;
  } else {
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/2] anv/pipeline: don't pass constant view index in multiview

2018-03-14 Thread Caio Marcelo de Oliveira Filho

If view mask has only one bit set, view index is effectively a
constant, so doesn't need to be passed to the next stages, just always
set it.

Part of this was in the original patch that added
anv_nir_lower_multiview.c but disabled.

v2: Fixed comment style.

---
 src/intel/vulkan/anv_nir_lower_multiview.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c 
b/src/intel/vulkan/anv_nir_lower_multiview.c
index d2aefdee62..a81210adbc 100644
--- a/src/intel/vulkan/anv_nir_lower_multiview.c
+++ b/src/intel/vulkan/anv_nir_lower_multiview.c
@@ -72,7 +72,8 @@ build_view_index(struct lower_multiview_state *state)
   b->cursor = nir_before_block(nir_start_block(b->impl));
 
   assert(state->view_mask != 0);
-  if (0 && _mesa_bitcount(state->view_mask) == 1) {
+  if (_mesa_bitcount(state->view_mask) == 1) {
+ /* Set the view index directly. */
  state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1);
   } else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) {
  /* We only support 16 viewports */
@@ -210,11 +211,16 @@ anv_nir_lower_multiview(nir_shader *shader, uint32_t 
view_mask)
   assert(view_index->parent_instr->block == nir_start_block(entrypoint));
   b->cursor = nir_after_instr(view_index->parent_instr);
 
-  nir_variable *view_index_out =
- nir_variable_create(shader, nir_var_shader_out,
- glsl_int_type(), "view index");
-  view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
-  nir_store_var(b, view_index_out, view_index, 0x1);
+  /* Unless there is only one possible view index (that would be set
+   * directly), pass it to the next stage.
+   */
+  if (_mesa_bitcount(state.view_mask) != 1) {
+ nir_variable *view_index_out =
+nir_variable_create(shader, nir_var_shader_out,
+glsl_int_type(), "view index");
+ view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
+ nir_store_var(b, view_index_out, view_index, 0x1);
+  }
 
   nir_variable *layer_id_out =
  nir_variable_create(shader, nir_var_shader_out,
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105442] Hang when running nine ff lighting shader

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105442

Axel Davy  changed:

   What|Removed |Added

 QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
   Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-03-14 Thread Dongwon Kim

I see. Because user will be putting PCI_ID instead if a specific device
variant is required. I shouldn't have been confused in the first
place :-).

Thanks,

On Wed, Mar 14, 2018 at 04:03:05PM -0700, Kenneth Graunke wrote:
> On Wednesday, March 14, 2018 3:43:18 PM PDT Dongwon Kim wrote:
> > Yeah, thought about that (checking name then -> try to parse it as PCI-ID)
> > but didn't implement it because it won't work when there are multiple
> > different PCI-ID bound to same 'name' (e.g. want to use a specific PCI-ID
> > hsw). But wait a minite I think the opposite way (check if it's PCI-ID
> > first) should cover that case
> > 
> > I will upload v2 with this change shortly.
> 
> It should work either way... 'hsw' would pick some arbitrary Haswell
> PCI ID (if you don't care which one), and 0xD26 would pick a specific
> Haswell PCI ID.
> 
> --Ken


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Rob Clark

On Wed, Mar 14, 2018 at 4:58 PM, Connor Abbott  wrote:
> FWIW, the way I imagined doing this was something like:
>
> 1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just
> make everything assert if the base deref isn't a nir_deref_var. This
> will be a bit of a flag-day, but also very mechanical. It'll also help
> us catch cases where we don't handle new-style derefs later.
> 2. Add a pass to flatten nir_deref_type_pointer into
> nir_deref_type_var if possible (i.e. if there's a clear chain up to
> the base variable without any phi nodes or whatever). This should
> always be possible for GLSL, as well as SPIR-V unless
> KHR_variable_pointers is enabled. We'll use this to avoid too much
> churn in drivers, passes that haven't been updated, etc. We might also
> want a pass to do the opposite, for converting passes where we don't
> want to have codepaths for both forms at once.

btw, does it seem reasonable to assert that deref instruction src's
are *always* in SSA form?  That seems reasonable to me since they will
be mostly lowered away before the driver sees them (and I think makes
some of the operation on them easier), and I can't think of any way
for them *not* to be SSA (since they aren't real instructions).

If so, my rough thoughts are a deref instruction chain (formed by ssa
links to previous deref instruction) either start w/
nir_deref_instr_pointer or nir_deref_instruction_var instructions at
the head of the list (to start, I guess you could ignore adding the
nir_deref_instr_pointer instruction and I could add that for
clover/spirv work).  Followed by N links of struct/array deref_link
instructions that have two ssa src's (one that is previous deref
instruction and one that is array or struct member offset)

> 3. Modify nir_lower_io to handle new-style derefs, especially for
> shared variables (i.e. KHR_variable_pointers for anv). We might have
> to modify a few other passes, too.
> 4. Add the required deref lowering passes to all drivers.
> 5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs.
> At the very least, we should be using this to implement the shared
> variable bits of KHR_variable_pointers. If we add stride/offset
> annotations to nir_deref_instr for UBO's and SSBO's, then we might
> also be able to get rid of the vtn_deref stuff entirely (although I'm
> not sure if that should be a goal right now).

I think I might try to prototype something where we convert vtn over
to new-style deref instructions, plus a pass to lower to old style
deref chains.  It partly comes down to how quickly I can finish a
couple other things, and how much I can't sleep on a long-ass flight.
(I guess even if throw-away, if it gives some idea of what to do or
what not to do it might be useful?)

Anyways, as far as decoupling this from backend drivers, I think a
nir_intr_get_var(intr, n) instruction to replace open coded
intr->variables[0]->var could go a long way.  (In the new world this
would follow ssa links to previous deref instruction to find the
nir_deref_instruction_var.)  I'll try typing this up in a few minutes.

> At this point, we can fix things up and move everything else over to
> new-style derefs at our leisure. Also, it should now be pretty
> straightforward to add support for shared variable pointers to radv
> without lowering everything to offsets up-front, which is nice.
>
> Connor
>
>
> On Wed, Mar 14, 2018 at 2:32 PM, Jason Ekstrand  wrote:
>> All,
>>
>> Connor and I along with several others have been discussing for a while
>> changing the way NIR dereferences work.  In particular, adding a new
>> nir_deref_instr type where the first one in the chain takes a variable and
>> is followed by a series of instructions which take another deref instruction
>> and do an array or structure dereference on it.
>>
>> Much of the motivation for this is some of the upcoming SPIR-V stuff where
>> we have more real pointers and deref chains don't really work anymore.  It
>> will also allow for things such as CSE of common derefs which could make
>> analysis easier.  This is similar to what LLVM does and it's working very
>> well for them.
>>
>> The reason for this e-mail is that this is going to be a flag-day change.
>> We've been talking about it for a while but this is going to be a major and
>> fairly painful change in the short term so no one has actually done it.
>> It's time we finally just suck it up and make it happen.  While we will try
>> to make the change as incrementally and reviewably as possible but there is
>> a real limit as to what is possible here.  My plan is to start cracking away
>> at this on Monday and hopefully have something working for i965/anv by the
>> end of the week or maybe some time the week after.  If anyone has something
>> to say in opposition, please speak up now and not after I've spent a week
>> straight frantically hacking on NIR.
>>
>> I would like everyone to be respectful of the fact that this will be a major
>> change and very pai

Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-03-14 Thread Kenneth Graunke

On Wednesday, March 14, 2018 3:43:18 PM PDT Dongwon Kim wrote:
> Yeah, thought about that (checking name then -> try to parse it as PCI-ID)
> but didn't implement it because it won't work when there are multiple
> different PCI-ID bound to same 'name' (e.g. want to use a specific PCI-ID
> hsw). But wait a minite I think the opposite way (check if it's PCI-ID
> first) should cover that case
> 
> I will upload v2 with this change shortly.

It should work either way... 'hsw' would pick some arbitrary Haswell
PCI ID (if you don't care which one), and 0xD26 would pick a specific
Haswell PCI ID.

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH shaderdb 3/3] run: shader program file created via GetProgramBinary (v3)

2018-03-14 Thread Dongwon Kim

Thanks for the review, Ken
I agree on most of your proposals.
I will upload another version shortly.

On Wed, Mar 14, 2018 at 03:10:25PM -0700, Kenneth Graunke wrote:
> On Monday, February 26, 2018 2:17:05 PM PDT Dongwon Kim wrote:
> > extraction of linked binary program to a file using glGetProgramBinary.
> > This file is intended to be loaded by glProgramBinary in the graphic
> > application running on the target system.
> > 
> > To enable this feature, a new option '--bin' has to be passed to the
> > program execution.
> > 
> > v2: 1. define MAX_LOG_LEN and use it as the size of gl log
> > 2. define MAX_PROG_SIZE and use it as the max size of extracted
> >shader_program
> > 3. out_file is now pointer allocated by strdup for the file name
> > 
> > v3: 1. automatically using original shader test file's name +  ".bin"
> >as a filename for program binary - better way to cover the case
> >with batch compilation of many shader test files in the same
> >directory
> > 2. remove --out= since it is now unnecessary (due to v3-1.)
> >to provide custom file name. Instead, option, "--bin", which is
> >basically a flag that enables getting program binary as a file.
> > 3. Now it tries to get the length of binary by reading program's
> >GL_PROGRAM_BINARY_LENGTH_OES parameter
> > 
> > Signed-off-by: Dongwon Kim 
> > ---
> >  run.c | 68 
> > +++
> >  1 file changed, 64 insertions(+), 4 deletions(-)
> > 
> > diff --git a/run.c b/run.c
> > index d066567..bbab5d9 100644
> > --- a/run.c
> > +++ b/run.c
> > @@ -52,6 +52,9 @@
> >  
> >  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
> >  
> > +#define MAX_LOG_LEN 4096
> > +#define MAX_PROG_SIZE (10*1024*1024) /* maximum 10MB for shader program */
> > +
> >  struct context_info {
> >  char *extension_string;
> >  int extension_string_len;
> > @@ -358,18 +361,20 @@ const struct platform platforms[] = {
> >  enum
> >  {
> >  PCI_ID_OVERRIDE_OPTION = CHAR_MAX + 1,
> > +LOADABLE_PROGRAM_BINARY_OPTION,
> >  };
> >  
> >  const struct option const long_options[] =
> >  {
> >  {"pciid", required_argument, NULL, PCI_ID_OVERRIDE_OPTION},
> > +{"bin", no_argument, NULL, LOADABLE_PROGRAM_BINARY_OPTION},
> 
> This sounds like we're loading binaries.  Can we call it
> GENERATE_PROGRAM_BINARY_OPTION instead?

Yeah, I will change this.

> 
> >  {NULL, 0, NULL, 0}
> >  };
> >  
> >  void print_usage(const char *prog_name)
> >  {
> >  fprintf(stderr,
> > -"Usage: %s [-d ] [-j ] [-o ] [-p 
> > ] [--pciid=]  > *.shader_test files>\n",
> > +"Usage: %s [-d ] [-j ] [-o ] [-p 
> > ] [--pciid=]  > *.shader_test files>\n",
> >  prog_name);
> >  }
> >  
> > @@ -450,6 +455,7 @@ main(int argc, char **argv)
> >  int opt;
> >  bool platf_overridden = 0;
> >  bool pci_id_overridden = 0;
> > +bool enable_prog_bin = 0;
> 
> Maybe generate_prog_bin here as well.

sure.

> 
> >  
> >  max_threads = omp_get_max_threads();
> >  
> > @@ -518,6 +524,9 @@ main(int argc, char **argv)
> >  setenv("INTEL_DEVID_OVERRIDE", optarg, 1);
> >  pci_id_overridden = 1;
> >  break;
> > +case LOADABLE_PROGRAM_BINARY_OPTION:
> > +enable_prog_bin = 1;
> > +break;
> >  default:
> >  fprintf(stderr, "Unknown option: %x\n", opt);
> >  print_usage(argv[0]);
> > @@ -858,18 +867,18 @@ main(int argc, char **argv)
> >  }
> >  } else if (type == TYPE_CORE || type == TYPE_COMPAT || type == 
> > TYPE_ES) {
> >  GLuint prog = glCreateProgram();
> > +GLint param;
> 
> So...putting this here means that you're not going to support generating
> program binaries for SSO-based programs.  That seems a bit unfortunate...
> 

I can consider this later.

> >  
> >  for (unsigned i = 0; i < num_shaders; i++) {
> >  GLuint s = glCreateShader(shader[i].type);
> >  glShaderSource(s, 1, &shader[i].text, 
> > &shader[i].length);
> >  glCompileShader(s);
> >  
> > -GLint param;
> >  glGetShaderiv(s, GL_COMPILE_STATUS, ¶m);
> >  if (unlikely(!param)) {
> > -GLchar log[4096];
> > +GLchar log[MAX_LOG_LEN];
> >  GLsizei length;
> > -glGetShaderInfoLog(s, 4096, &length, log);
> > +glGetShaderInfoLog(s, sizeof(log), &length, log);
> 
> It would be nice to make a helper function for getting the info log and
> printing an error, since you've now got it twice.  Should probably be a
> separate patch (and include the MAX_LOG_LEN change).
> 

I will work on it (another patch.)

> >  
> >  fprintf(stderr, "ERROR: %s fa

Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-03-14 Thread Dongwon Kim

Yeah, I am using "intel_run" script that lets "run" use intel_stub
layer instead. Pretty useful..

On Wed, Mar 14, 2018 at 02:54:08PM -0700, Kenneth Graunke wrote:
> On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote:
> > Add a new option, '--pciid' to override a pci id of the target arch
> > to support cross-architecture shader compilation. Not like "-p" option,
> > it is for accepting any GFX devices supported by the driver.
> > 
> > Setting both "-p" and "--pciid" is blocked to avoid conflict.
> > 
> > Signed-off-by: Dongwon Kim 
> > ---
> >  run.c | 44 ++--
> >  1 file changed, 42 insertions(+), 2 deletions(-)
> 
> Oh, another thing I forgot to mention - you might find intel_run and
> intel_stub.c to be useful.  They allow you to emulate any Intel GPU
> for purposes of running shader-db, without needing to have any graphics
> hardware present in your system.  This can be useful when building on
> Xeon build servers or the like...


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-03-14 Thread Dongwon Kim

Yeah, thought about that (checking name then -> try to parse it as PCI-ID)
but didn't implement it because it won't work when there are multiple
different PCI-ID bound to same 'name' (e.g. want to use a specific PCI-ID
hsw). But wait a minite I think the opposite way (check if it's PCI-ID
first) should cover that case

I will upload v2 with this change shortly.

On Wed, Mar 14, 2018 at 02:52:36PM -0700, Kenneth Graunke wrote:
> On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote:
> > Add a new option, '--pciid' to override a pci id of the target arch
> > to support cross-architecture shader compilation. Not like "-p" option,
> > it is for accepting any GFX devices supported by the driver.
> > 
> > Setting both "-p" and "--pciid" is blocked to avoid conflict.
> > 
> > Signed-off-by: Dongwon Kim 
> > ---
> >  run.c | 44 ++--
> >  1 file changed, 42 insertions(+), 2 deletions(-)
> 
> Hi Dongwon,
> 
> It looks like this does the exact same thing as -p, but it accepts
> arbitrary numerical PCI IDs instead of a platform name.  IMHO, I think
> we should just make -p try parsing it as a number of it doesn't match
> any of the names.  Then either -p bdw or -p 0x1616 would work.
> 
> It seems much less complicated that way.
> 
> --Ken


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Releasing 18.0

2018-03-14 Thread Kenneth Graunke

Hi Emil, all,

I think we've closed the last of the Mesa 18.0 blocker bugs.  It looks
like there are some patches nominated for the 18.0 branch still (fixing
some of those issues), but assuming things are merged, I think we're
ready to release.

I checked with Mark and Jason on IRC and they seemed fine with shipping.

Thanks!
--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH shader-db 4/4] run: handling binding of attribute variable name

2018-03-14 Thread Kenneth Graunke

On Friday, March 9, 2018 2:28:36 PM PDT Dongwon Kim wrote:
> Optional binding of variables can be processed before linking shader
> objects for creating shader program. It is activated by adding lines
> with a keyword "BindAttribLoc" followed by name and index as,
> 
> "BindAttribLoc name_str1 "
> 
> For example,
> 
> [require]
> ..
> BindAttrbLoc vertex 1
> BindAttrbLoc coord 2
> BindAttrbLoc col 3
> 
> This makes the shader-db run
> 
> glBindAttribLocation(p, 1, "vertex");
> glBindAttribLocation(p, 2, "coord");
> glBindAttribLocation(p, 3, "col");
> 
> before glLinkProgram() to include these binding info in binary shader
> program.
> 
> Signed-off-by: Dongwon Kim 

Matt, do you have an opinion on this?  This seems like the sort of
commands that would normally go in the [test] block, rather than the
[require] block.  But it looks like shader_runner doesn't have any
syntax for glBindAttribLocation today.

It's definitely a useful thing to have if we're going to use run.c
to produce shader binaries for ARB_get_program_binary...

> ---
>  run.c | 79 
> +++
>  1 file changed, 79 insertions(+)
> 
> diff --git a/run.c b/run.c
> index bbab5d9..fe2a97a 100644
> --- a/run.c
> +++ b/run.c
> @@ -76,6 +76,12 @@ struct shader {
>  int type;
>  };
>  
> +struct binding_var {
> +char *name;
> +GLint index;
> +struct binding_var *next;
> +};
> +
>  static bool
>  extension_in_string(const char *haystack, const char *needle)
>  {
> @@ -105,6 +111,10 @@ extension_in_string(const char *haystack, const char 
> *needle)
>  return false;
>  }
>  
> +#define SKIP_SPACES(str) while (*(str) == ' ') str++
> +
> +struct binding_var binding_head = {"NULL", -1, NULL};
> +
>  static struct shader *
>  get_shaders(const struct context_info *core, const struct context_info 
> *compat,
>  const struct context_info *es,
> @@ -120,6 +130,7 @@ get_shaders(const struct context_info *core, const struct 
> context_info *compat,
>  static const char *fp_req = "\nGL_ARB_fragment_program";
>  static const char *vp_req = "\nGL_ARB_vertex_program";
>  static const char *sso_req = "\nSSO ENABLED";
> +static const char *binding = "\nBindAttribLoc";
>  static const char *gs = "geometry shader]\n";
>  static const char *fs = "fragment ";
>  static const char *vs = "vertex ";
> @@ -186,11 +197,13 @@ get_shaders(const struct context_info *core, const 
> struct context_info *compat,
>  const struct context_info *info = *type == TYPE_CORE ? core : compat;
>  
>  const char *extension_text = text;
> +
>  while ((extension_text = memmem(extension_text, end_text - 
> extension_text,
>  "\nGL_", strlen("\nGL_"))) != NULL) {
>  extension_text += 1;
>  const char *newline = memchr(extension_text, '\n',
>   end_text - extension_text);
> +
>  if (memmem(info->extension_string, info->extension_string_len,
> extension_text, newline - extension_text) == NULL) {
>  fprintf(stderr, "SKIP: %s requires unavailable extension %.*s\n",
> @@ -202,6 +215,62 @@ get_shaders(const struct context_info *core, const 
> struct context_info *compat,
>  }
>  }
>  
> +/* process binding */
> +struct binding_var *binding_prev = &binding_head;
> +const char *pre_binding_text = text;
> +
> +while ((pre_binding_text = memmem(pre_binding_text, end_text - 
> pre_binding_text,
> +  binding, strlen(binding))) != NULL) {
> +pre_binding_text += strlen(binding);
> +
> +const char *newline = memchr(pre_binding_text, '\n', end_text - 
> pre_binding_text);
> +
> +SKIP_SPACES(pre_binding_text);
> +
> +char *endword = memchr(pre_binding_text, ' ', newline - 
> pre_binding_text);
> +
> +/* if there's no more space in the same line */
> +if (!endword) {
> +fprintf(stderr, "SKIP: can't find attr index for this 
> binding\n");
> +continue;
> +}
> +
> +char *binding_name = (char *)calloc(1, endword - pre_binding_text + 
> 1);
> +
> +strncpy(binding_name, pre_binding_text, endword - pre_binding_text);
> +
> +pre_binding_text = endword;
> +
> +SKIP_SPACES(pre_binding_text);
> +if (*pre_binding_text == '\n') {
> +fprintf(stderr, "SKIP: can't find attr variable name for this 
> binding\n");
> +continue;
> +}
> +
> +endword = memchr(pre_binding_text, ' ', newline - pre_binding_text);
> +
> +if (!endword)
> +endword = (char *)newline;
> +
> +char *index_string = calloc(1, endword - pre_binding_text + 1);
> +strncpy(index_string, pre_binding_text, endword - pre_binding_text);
> +
> +struct binding_var *binding_new = malloc(sizeof(struct binding_var));
> +
> +binding_new->index =

Re: [Mesa-dev] [PATCH shaderdb 3/3] run: shader program file created via GetProgramBinary (v3)

2018-03-14 Thread Kenneth Graunke

On Monday, February 26, 2018 2:17:05 PM PDT Dongwon Kim wrote:
> extraction of linked binary program to a file using glGetProgramBinary.
> This file is intended to be loaded by glProgramBinary in the graphic
> application running on the target system.
> 
> To enable this feature, a new option '--bin' has to be passed to the
> program execution.
> 
> v2: 1. define MAX_LOG_LEN and use it as the size of gl log
> 2. define MAX_PROG_SIZE and use it as the max size of extracted
>shader_program
> 3. out_file is now pointer allocated by strdup for the file name
> 
> v3: 1. automatically using original shader test file's name +  ".bin"
>as a filename for program binary - better way to cover the case
>with batch compilation of many shader test files in the same
>directory
> 2. remove --out= since it is now unnecessary (due to v3-1.)
>to provide custom file name. Instead, option, "--bin", which is
>basically a flag that enables getting program binary as a file.
> 3. Now it tries to get the length of binary by reading program's
>GL_PROGRAM_BINARY_LENGTH_OES parameter
> 
> Signed-off-by: Dongwon Kim 
> ---
>  run.c | 68 
> +++
>  1 file changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/run.c b/run.c
> index d066567..bbab5d9 100644
> --- a/run.c
> +++ b/run.c
> @@ -52,6 +52,9 @@
>  
>  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
>  
> +#define MAX_LOG_LEN 4096
> +#define MAX_PROG_SIZE (10*1024*1024) /* maximum 10MB for shader program */
> +
>  struct context_info {
>  char *extension_string;
>  int extension_string_len;
> @@ -358,18 +361,20 @@ const struct platform platforms[] = {
>  enum
>  {
>  PCI_ID_OVERRIDE_OPTION = CHAR_MAX + 1,
> +LOADABLE_PROGRAM_BINARY_OPTION,
>  };
>  
>  const struct option const long_options[] =
>  {
>  {"pciid", required_argument, NULL, PCI_ID_OVERRIDE_OPTION},
> +{"bin", no_argument, NULL, LOADABLE_PROGRAM_BINARY_OPTION},

This sounds like we're loading binaries.  Can we call it
GENERATE_PROGRAM_BINARY_OPTION instead?

>  {NULL, 0, NULL, 0}
>  };
>  
>  void print_usage(const char *prog_name)
>  {
>  fprintf(stderr,
> -"Usage: %s [-d ] [-j ] [-o ] [-p 
> ] [--pciid=]  *.shader_test files>\n",
> +"Usage: %s [-d ] [-j ] [-o ] [-p 
> ] [--pciid=]  *.shader_test files>\n",
>  prog_name);
>  }
>  
> @@ -450,6 +455,7 @@ main(int argc, char **argv)
>  int opt;
>  bool platf_overridden = 0;
>  bool pci_id_overridden = 0;
> +bool enable_prog_bin = 0;

Maybe generate_prog_bin here as well.

>  
>  max_threads = omp_get_max_threads();
>  
> @@ -518,6 +524,9 @@ main(int argc, char **argv)
>  setenv("INTEL_DEVID_OVERRIDE", optarg, 1);
>  pci_id_overridden = 1;
>  break;
> +case LOADABLE_PROGRAM_BINARY_OPTION:
> +enable_prog_bin = 1;
> +break;
>  default:
>  fprintf(stderr, "Unknown option: %x\n", opt);
>  print_usage(argv[0]);
> @@ -858,18 +867,18 @@ main(int argc, char **argv)
>  }
>  } else if (type == TYPE_CORE || type == TYPE_COMPAT || type == 
> TYPE_ES) {
>  GLuint prog = glCreateProgram();
> +GLint param;

So...putting this here means that you're not going to support generating
program binaries for SSO-based programs.  That seems a bit unfortunate...

>  
>  for (unsigned i = 0; i < num_shaders; i++) {
>  GLuint s = glCreateShader(shader[i].type);
>  glShaderSource(s, 1, &shader[i].text, &shader[i].length);
>  glCompileShader(s);
>  
> -GLint param;
>  glGetShaderiv(s, GL_COMPILE_STATUS, ¶m);
>  if (unlikely(!param)) {
> -GLchar log[4096];
> +GLchar log[MAX_LOG_LEN];
>  GLsizei length;
> -glGetShaderInfoLog(s, 4096, &length, log);
> +glGetShaderInfoLog(s, sizeof(log), &length, log);

It would be nice to make a helper function for getting the info log and
printing an error, since you've now got it twice.  Should probably be a
separate patch (and include the MAX_LOG_LEN change).

>  
>  fprintf(stderr, "ERROR: %s failed to compile:\n%s\n",
>  current_shader_name, log);
> @@ -879,6 +888,57 @@ main(int argc, char **argv)
>  }
>  
>  glLinkProgram(prog);
> +
> +glGetProgramiv(prog, GL_LINK_STATUS, ¶m);
> +if (unlikely(!param)) {
> +   GLchar log[MAX_LOG_LEN];
> +   GLsizei length;
> +   glGetProgramInfoLog(prog, sizeof(log), &length, log);
> +
> +   fprintf(stderr, "ERROR: fa

Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-03-14 Thread Kenneth Graunke

On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote:
> Add a new option, '--pciid' to override a pci id of the target arch
> to support cross-architecture shader compilation. Not like "-p" option,
> it is for accepting any GFX devices supported by the driver.
> 
> Setting both "-p" and "--pciid" is blocked to avoid conflict.
> 
> Signed-off-by: Dongwon Kim 
> ---
>  run.c | 44 ++--
>  1 file changed, 42 insertions(+), 2 deletions(-)

Oh, another thing I forgot to mention - you might find intel_run and
intel_stub.c to be useful.  They allow you to emulate any Intel GPU
for purposes of running shader-db, without needing to have any graphics
hardware present in your system.  This can be useful when building on
Xeon build servers or the like...

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-03-14 Thread Kenneth Graunke

On Monday, February 12, 2018 5:26:15 PM PDT Dongwon Kim wrote:
> Add a new option, '--pciid' to override a pci id of the target arch
> to support cross-architecture shader compilation. Not like "-p" option,
> it is for accepting any GFX devices supported by the driver.
> 
> Setting both "-p" and "--pciid" is blocked to avoid conflict.
> 
> Signed-off-by: Dongwon Kim 
> ---
>  run.c | 44 ++--
>  1 file changed, 42 insertions(+), 2 deletions(-)

Hi Dongwon,

It looks like this does the exact same thing as -p, but it accepts
arbitrary numerical PCI IDs instead of a platform name.  IMHO, I think
we should just make -p try parsing it as a number of it doesn't match
any of the names.  Then either -p bdw or -p 0x1616 would work.

It seems much less complicated that way.

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vbo: Correctly handle source arrays in vbo_split_copy.

2018-03-14 Thread Brian Paul

Reviewed-by: Brian Paul 


On Wed, Mar 14, 2018 at 2:47 PM,  wrote:

> From: Mathias Fröhlich 
>
> Hi,
>
> Seems that the big patch did break something.
> Below the fix.
>
> please review
>
> best
>
> Mathias
>
>
>
>
> The original approach did optimize away a bit too many fields.
> Restablish the pointer into the original array and correctly feed that
> one.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105471
> Fixes: 64d2a20480547d5897fd9d7b8fd306f2625138cb
> mesa: Make gl_vertex_array contain pointers to first order VAO members.
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/mesa/vbo/vbo_split_copy.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
> index 09b5b3b651..96828a073f 100644
> --- a/src/mesa/vbo/vbo_split_copy.c
> +++ b/src/mesa/vbo/vbo_split_copy.c
> @@ -63,6 +63,7 @@ struct copy_context {
> struct {
>GLuint attr;
>GLuint size;
> +  const struct gl_vertex_array *array;
>const GLubyte *src_ptr;
>
>struct gl_vertex_buffer_binding dstbinding;
> @@ -258,7 +259,7 @@ elt(struct copy_context *copy, GLuint elt_idx)
>GLuint i;
>
>for (i = 0; i < copy->nr_varying; i++) {
> - const struct gl_vertex_array *srcarray = ©->array[i];
> + const struct gl_vertex_array *srcarray = copy->varying[i].array;
>   const struct gl_vertex_buffer_binding* srcbinding
>  = srcarray->BufferBinding;
>   const GLubyte *srcptr
> @@ -449,6 +450,7 @@ replay_init(struct copy_context *copy)
>   GLuint j = copy->nr_varying++;
>
>   copy->varying[j].attr = i;
> + copy->varying[j].array = ©->array[i];
>   copy->varying[j].size = attr_size(attrib);
>   copy->vertex_size += attr_size(attrib);
>
> @@ -520,7 +522,7 @@ replay_init(struct copy_context *copy)
> /* Setup new vertex arrays to point into the output buffer:
>  */
> for (offset = 0, i = 0; i < copy->nr_varying; i++) {
> -  const struct gl_vertex_array *src = ©->array[i];
> +  const struct gl_vertex_array *src = copy->varying[i].array;
>const struct gl_array_attributes *srcattr = src->VertexAttrib;
>struct gl_vertex_array *dst = ©->dstarray[i];
>struct gl_vertex_buffer_binding *dstbind =
> ©->varying[i].dstbinding;
> @@ -576,7 +578,7 @@ replay_finish(struct copy_context *copy)
> /* Unmap VBO's */
> for (i = 0; i < copy->nr_varying; i++) {
>struct gl_buffer_object *vbo =
> - copy->array[i].BufferBinding->BufferObj;
> + copy->varying[i].array->BufferBinding->BufferObj;
>if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo,
> MAP_INTERNAL))
>   ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
> }
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)

2018-03-14 Thread Dave Airlie

On 15 March 2018 at 03:25, Daniel Stone  wrote:
> Hi,
>
> On 14 March 2018 at 13:04, Rob Clark  wrote:
>> I'm not sure everyone wants to be updating their dri3 in a forced
>> march setting, this allows a nicer approach, esp when you want
>> to build on distro that aren't brand new.
>
> I don't have that much of an opinion on whether the dependency should
> be mandatory or not. I originally had #ifdefs and removed them when
> reviewers asked me to. If people want to add them back, fine by me.
>
> That being said, these patches need changes, per comments below. One
> thing missing entirely is making the version negotiation conditional:
> when we call query_version for DRI3/Present, we need to make the
> version we pass in conditional on whether or not we have new XCB.
> Probably also wise to ifdef the multiplane_available variables, so
> it's really obvious where any users are missing ifdefs.

I was trying to minimise the ifdeffery, we could minimise the present ones
with #ifndef #define #endif blocks at the top, the dri3 one I think we just have
to live with.

>
> I'm happy to test this tomorrow and submit a new version if that's
> easier for people.

Please do.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Connor Abbott

FWIW, the way I imagined doing this was something like:

1. Add nir_deref_instr and nir_deref_type_pointer. At this point, just
make everything assert if the base deref isn't a nir_deref_var. This
will be a bit of a flag-day, but also very mechanical. It'll also help
us catch cases where we don't handle new-style derefs later.
2. Add a pass to flatten nir_deref_type_pointer into
nir_deref_type_var if possible (i.e. if there's a clear chain up to
the base variable without any phi nodes or whatever). This should
always be possible for GLSL, as well as SPIR-V unless
KHR_variable_pointers is enabled. We'll use this to avoid too much
churn in drivers, passes that haven't been updated, etc. We might also
want a pass to do the opposite, for converting passes where we don't
want to have codepaths for both forms at once.
3. Modify nir_lower_io to handle new-style derefs, especially for
shared variables (i.e. KHR_variable_pointers for anv). We might have
to modify a few other passes, too.
4. Add the required deref lowering passes to all drivers.
5. Rewrite glsl_to_nir and spirv_to_nir to emit the new-style derefs.
At the very least, we should be using this to implement the shared
variable bits of KHR_variable_pointers. If we add stride/offset
annotations to nir_deref_instr for UBO's and SSBO's, then we might
also be able to get rid of the vtn_deref stuff entirely (although I'm
not sure if that should be a goal right now).

At this point, we can fix things up and move everything else over to
new-style derefs at our leisure. Also, it should now be pretty
straightforward to add support for shared variable pointers to radv
without lowering everything to offsets up-front, which is nice.

Connor

On Wed, Mar 14, 2018 at 2:32 PM, Jason Ekstrand  wrote:
> All,
>
> Connor and I along with several others have been discussing for a while
> changing the way NIR dereferences work.  In particular, adding a new
> nir_deref_instr type where the first one in the chain takes a variable and
> is followed by a series of instructions which take another deref instruction
> and do an array or structure dereference on it.
>
> Much of the motivation for this is some of the upcoming SPIR-V stuff where
> we have more real pointers and deref chains don't really work anymore.  It
> will also allow for things such as CSE of common derefs which could make
> analysis easier.  This is similar to what LLVM does and it's working very
> well for them.
>
> The reason for this e-mail is that this is going to be a flag-day change.
> We've been talking about it for a while but this is going to be a major and
> fairly painful change in the short term so no one has actually done it.
> It's time we finally just suck it up and make it happen.  While we will try
> to make the change as incrementally and reviewably as possible but there is
> a real limit as to what is possible here.  My plan is to start cracking away
> at this on Monday and hopefully have something working for i965/anv by the
> end of the week or maybe some time the week after.  If anyone has something
> to say in opposition, please speak up now and not after I've spent a week
> straight frantically hacking on NIR.
>
> I would like everyone to be respectful of the fact that this will be a major
> change and very painful to rebase.  If you've got outstanding NIR, GLSL, or
> SPIR-V work that is likely to conflict with this, please try to land it
> before Monday so that we can avoid rebase conflicts.  If you have interest
> in reviewing this, please try to be responsive so that we can get it
> reviewed and landed before it becomes too painful.  I'll try to send out
> some preview patches as I go so that the data structures themselves can get
> some review before the rest of the changes have been made.
>
> I'm also asking for help from Rob, Bas, and Eric if there are changes needed
> in any of their drivers.  I suspect the impact on back-end drivers will be
> low because most of them don't use derefs directly, but it would be good of
> people were on hand to help catch bugs if nothing else.
>
> Thanks,
>
> --Jason Ekstrand
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] vbo: Correctly handle source arrays in vbo_split_copy.

2018-03-14 Thread Mathias . Froehlich

From: Mathias Fröhlich 

Hi,

Seems that the big patch did break something.
Below the fix.

please review

best

Mathias




The original approach did optimize away a bit too many fields.
Restablish the pointer into the original array and correctly feed that
one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105471
Fixes: 64d2a20480547d5897fd9d7b8fd306f2625138cb
mesa: Make gl_vertex_array contain pointers to first order VAO members.
Signed-off-by: Mathias Fröhlich 
---
 src/mesa/vbo/vbo_split_copy.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 09b5b3b651..96828a073f 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -63,6 +63,7 @@ struct copy_context {
struct {
   GLuint attr;
   GLuint size;
+  const struct gl_vertex_array *array;
   const GLubyte *src_ptr;
 
   struct gl_vertex_buffer_binding dstbinding;
@@ -258,7 +259,7 @@ elt(struct copy_context *copy, GLuint elt_idx)
   GLuint i;
 
   for (i = 0; i < copy->nr_varying; i++) {
- const struct gl_vertex_array *srcarray = ©->array[i];
+ const struct gl_vertex_array *srcarray = copy->varying[i].array;
  const struct gl_vertex_buffer_binding* srcbinding
 = srcarray->BufferBinding;
  const GLubyte *srcptr
@@ -449,6 +450,7 @@ replay_init(struct copy_context *copy)
  GLuint j = copy->nr_varying++;
 
  copy->varying[j].attr = i;
+ copy->varying[j].array = ©->array[i];
  copy->varying[j].size = attr_size(attrib);
  copy->vertex_size += attr_size(attrib);
 
@@ -520,7 +522,7 @@ replay_init(struct copy_context *copy)
/* Setup new vertex arrays to point into the output buffer:
 */
for (offset = 0, i = 0; i < copy->nr_varying; i++) {
-  const struct gl_vertex_array *src = ©->array[i];
+  const struct gl_vertex_array *src = copy->varying[i].array;
   const struct gl_array_attributes *srcattr = src->VertexAttrib;
   struct gl_vertex_array *dst = ©->dstarray[i];
   struct gl_vertex_buffer_binding *dstbind = ©->varying[i].dstbinding;
@@ -576,7 +578,7 @@ replay_finish(struct copy_context *copy)
/* Unmap VBO's */
for (i = 0; i < copy->nr_varying; i++) {
   struct gl_buffer_object *vbo =
- copy->array[i].BufferBinding->BufferObj;
+ copy->varying[i].array->BufferBinding->BufferObj;
   if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
  ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105506] Vulkan MSAA is broken on SI

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105506

--- Comment #5 from Jason Ekstrand  ---
I recommend you file a bug against the validation layers as at least the first
of the two comments Bas made should be invalid.  The second is valid but does
not do what you want.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] FLAG-DAY: NIR derefs

2018-03-14 Thread Jason Ekstrand

All,

Connor and I along with several others have been discussing for a while
changing the way NIR dereferences work.  In particular, adding a new
nir_deref_instr type where the first one in the chain takes a variable and
is followed by a series of instructions which take another deref
instruction and do an array or structure dereference on it.

Much of the motivation for this is some of the upcoming SPIR-V stuff where
we have more real pointers and deref chains don't really work anymore.  It
will also allow for things such as CSE of common derefs which could make
analysis easier.  This is similar to what LLVM does and it's working very
well for them.

The reason for this e-mail is that this is going to be a flag-day change.
We've been talking about it for a while but this is going to be a major and
fairly painful change in the short term so no one has actually done it.
It's time we finally just suck it up and make it happen.  While we will try
to make the change as incrementally and reviewably as possible but there is
a real limit as to what is possible here.  My plan is to start cracking
away at this on Monday and hopefully have something working for i965/anv by
the end of the week or maybe some time the week after.  If anyone has
something to say in opposition, please speak up now and not after I've
spent a week straight frantically hacking on NIR.

I would like everyone to be respectful of the fact that this will be a
major change and very painful to rebase.  If you've got outstanding NIR,
GLSL, or SPIR-V work that is likely to conflict with this, please try to
land it before Monday so that we can avoid rebase conflicts.  If you have
interest in reviewing this, please try to be responsive so that we can get
it reviewed and landed before it becomes too painful.  I'll try to send out
some preview patches as I go so that the data structures themselves can get
some review before the rest of the changes have been made.

I'm also asking for help from Rob, Bas, and Eric if there are changes
needed in any of their drivers.  I suspect the impact on back-end drivers
will be low because most of them don't use derefs directly, but it would be
good of people were on hand to help catch bugs if nothing else.

Thanks,

--Jason Ekstrand
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] st/nine: Fix bad tracking of bound vs textures

2018-03-14 Thread Dieter Nützel


For the series

Tested-by: Dieter Nützel 

on Polaris 20 (RX580) with several Wine-staging (Nine) apps, but I've to 
note, that I do NOT have Guild Wars 2 and Torchlight...


Dieter

Am 13.03.2018 23:09, schrieb Axel Davy:

An incorrect formula was used to compute bound_samplers_mask_vs.
Since s is above always 8 for vs and the variable is encoded on 8 bits,
it was always 0.
This resulted in commiting the samplers every call when
there was at least one texture read in the vs shader.

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/nine_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/nine/nine_state.c
b/src/gallium/state_trackers/nine/nine_state.c
index 26b2dea3bd..c81a05a952 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -980,7 +980,7 @@ update_textures_and_samplers(struct NineDevice9 
*device)

 context->changed.sampler[s] = ~0;
 }

-context->bound_samplers_mask_vs |= (1 << s);
+context->bound_samplers_mask_vs |= (1 << i);
 }

 cso_set_sampler_views(context->cso, PIPE_SHADER_VERTEX,
num_textures, view);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105506] Vulkan MSAA is broken on SI

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105506

Bas Nieuwenhuizen  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |NOTOURBUG

--- Comment #4 from Bas Nieuwenhuizen  ---
As discussed on #dri-devel, the example application contains several layout
issues such as

1) using UNDEFINED as the source layout for the vkCmdResolveImage.
2) using UNDEFINED as the initial layout for the second renderpass when you
want to preserve contents.

These caused issues with MSAA on a Vega. I am not completely certain that
fixing these will fix SI. Please fix these and then take a look in renderdoc on
where it is going wrong.

If you still suspect the driver after that we can take another look.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] Mesa release improvements - Feature and Stable releases

2018-03-14 Thread Andres Gomez

On Wed, 2018-03-14 at 16:02 +, Emil Velikov wrote:

[...]
> 
> Just double-checking:
> I would suspect you're not suggesting removing the existing email/poke scheme?

Partially. The "announce" mail for the pre-branching period will still
happen, pointing to the "Metabug" in which to add the WIP features that
developers intend to land before the deadline.

If some of the developers just reply by mail/IRC/you-name-it, then it
will be the release manager task to add the blocking bugs with the WIP
features, as a way of documenting them.

> Providing another means to devs to track/handle things is good IMHO.
> Whether developers will like it is up-to them. Everyone, your input is
> appreciated!
> 
> 
> I'm slightly worried that it might cause extra confusion.
> Some crude examples follow:
>  - I don't use bugzilla/etc to track my feature work - most teams

I don't think much interaction/documentation is needed. Just mention
the WIP feature and update its status eventually ... and only for the
ones developer X wants to have at branchpoint Y before that happens.
The rest of the work of developer X doesn't need to be in Bugzilla.

>  - Do I open another bug, or list my feature in the metabug - seeming
> an ongoing theme with metabugs

I think it should be a new blocking bug but I'm open to just document
it in the Metabug.

>  - Do I add the bug, reply to the email or both

Preferably, just add the bug.

Once the bug is created and all the parties are in Cc for the bug, I
understand there is no need for any other way of communication. I'm
still open to reconsidering, though.

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] nir: Use a freelist in nir_opt_dce to avoid spamming ralloc

2018-03-14 Thread Dieter Nützel


Hello Thomas,

is this useful even after '[Mesa-dev] [PATCH 0/2] V2: Use hash table 
cloning in copy propagation' landed?


I've running both together with Dave's '[Mesa-dev] [PATCH] radv/winsys: 
replace bo list searchs with a hash table.' patch.


Dieter

Am 24.01.2018 08:33, schrieb Thomas Helland:

2018-01-21 23:58 GMT+01:00 Eric Anholt :

Thomas Helland  writes:


Also, allocate worklist_elem in groups of 20, to reduce the burden of
allocation. Do not use rzalloc, as there is no need. This lets us 
drop

the number of calls to ralloc from aproximately 10% of all calls to
ralloc(130 000 calls), down to a mere 2000 calls to 
ralloc_array_size.

This cuts the runtime of shader-db by 1%, while at the same time
reducing the number of stalled cycles, executed cycles, and executed
instructions by about 1 % as reported by perf. I did a five-run
benchmark pre and post and got a statistical variance less than 0.1% 
pre
and post. This was with i965's ir validation polluting the benchmark, 
so

the numbers are even better in release builds.

Performance change as found with perf-diff:
4.74% -0.23%  libc-2.26.so[.] _int_malloc
1.88% -0.21%  libc-2.26.so[.] malloc
2.27% +0.16%  libmesa_dri_drivers.so  [.] match_value.part.7
2.95% -0.12%  libc-2.26.so[.] _int_free
  +0.11%  libmesa_dri_drivers.so  [.] worklist_push
1.22% -0.08%  libc-2.26.so[.] malloc_consolidate
0.16% -0.06%  libmesa_dri_drivers.so  [.] mark_live_cb
1.21% +0.06%  libmesa_dri_drivers.so  [.] match_expression.part.6
0.75% -0.05%  libc-2.26.so[.] cfree@GLIBC_2.2.5
0.50% -0.05%  libmesa_dri_drivers.so  [.] ralloc_size
0.57% +0.04%  libmesa_dri_drivers.so  [.] nir_replace_instr
1.29% -0.04%  libmesa_dri_drivers.so  [.] unsafe_free


I'm curious, since a NIR instruction worklist seems like a generally
useful thing to have:

Could nir_worklist.c keep the implementation of this?

Also, I wonder if it wouldn't be even better to have a u_dynarray of
instructions in the worklist, with push/pop on the end of the array, 
and

a struct set tracking the instructions in the array to avoid
double-adding.  I actually don't know if that would be better or not, 
so
I'd be happy with the worklist management just moved to 
nir_worklist.c.


I'll look into this to see what I can do. nir_worklist.c at this time 
has only
a block worklist. This numbers all the blocks, uses a bitset for 
checking

if the item is present, and uses an array with an index pointing to the
start of the queue of blocks in the buffer.

The same scheme could be easily used for ssa-defs, as these are
also numbered. I actually did this for the VRP pass I wrote years ago.

However, for instructions we do not have a way of numbering them,
so a different scheme would have to be used. A dynarray + set type
of thing, us you're suggesting, might get us where we want.
I'll see what I can come up with.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: Factorize duplicate code in st_BlitFramebuffer()

2018-03-14 Thread Marek Olšák

Pushed! Thanks.

Marek

On Tue, Feb 20, 2018 at 8:30 AM, Guillaume Charifi
 wrote:
> ---
>  src/mesa/state_tracker/st_cb_blit.c | 76 
> +
>  1 file changed, 26 insertions(+), 50 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_cb_blit.c 
> b/src/mesa/state_tracker/st_cb_blit.c
> index 8aa849b3fc..564ad5 100644
> --- a/src/mesa/state_tracker/st_cb_blit.c
> +++ b/src/mesa/state_tracker/st_cb_blit.c
> @@ -174,53 +174,29 @@ st_BlitFramebuffer(struct gl_context *ctx,
> if (mask & GL_COLOR_BUFFER_BIT) {
>struct gl_renderbuffer_attachment *srcAtt =
>   &readFB->Attachment[readFB->_ColorReadBufferIndex];
> +  GLuint i;
>
>blit.mask = PIPE_MASK_RGBA;
>
>if (srcAtt->Type == GL_TEXTURE) {
>   struct st_texture_object *srcObj = 
> st_texture_object(srcAtt->Texture);
> - GLuint i;
>
>   if (!srcObj || !srcObj->pt) {
>  return;
>   }
>
> - for (i = 0; i < drawFB->_NumColorDrawBuffers; i++) {
> -struct st_renderbuffer *dstRb =
> -   st_renderbuffer(drawFB->_ColorDrawBuffers[i]);
> + blit.src.resource = srcObj->pt;
> + blit.src.level = srcAtt->TextureLevel;
> + blit.src.box.z = srcAtt->Zoffset + srcAtt->CubeMapFace;
> + blit.src.format = srcObj->pt->format;
>
> -if (dstRb) {
> -   struct pipe_surface *dstSurf;
> -
> -   st_update_renderbuffer_surface(st, dstRb);
> -
> -   dstSurf = dstRb->surface;
> -
> -   if (dstSurf) {
> -  blit.dst.resource = dstSurf->texture;
> -  blit.dst.level = dstSurf->u.tex.level;
> -  blit.dst.box.z = dstSurf->u.tex.first_layer;
> -  blit.dst.format = dstSurf->format;
> -
> -  blit.src.resource = srcObj->pt;
> -  blit.src.level = srcAtt->TextureLevel;
> -  blit.src.box.z = srcAtt->Zoffset + srcAtt->CubeMapFace;
> -  blit.src.format = srcObj->pt->format;
> -
> -  if (!ctx->Color.sRGBEnabled)
> - blit.src.format = util_format_linear(blit.src.format);
> -
> -  st->pipe->blit(st->pipe, &blit);
> -  dstRb->defined = true; /* front buffer tracking */
> -   }
> -}
> - }
> + if (!ctx->Color.sRGBEnabled)
> +blit.src.format = util_format_linear(blit.src.format);
>}
>else {
>   struct st_renderbuffer *srcRb =
>  st_renderbuffer(readFB->_ColorReadBuffer);
>   struct pipe_surface *srcSurf;
> - GLuint i;
>
>   if (!srcRb)
>  return;
> @@ -232,31 +208,31 @@ st_BlitFramebuffer(struct gl_context *ctx,
>
>   srcSurf = srcRb->surface;
>
> - for (i = 0; i < drawFB->_NumColorDrawBuffers; i++) {
> -struct st_renderbuffer *dstRb =
> -   st_renderbuffer(drawFB->_ColorDrawBuffers[i]);
> + blit.src.resource = srcSurf->texture;
> + blit.src.level = srcSurf->u.tex.level;
> + blit.src.box.z = srcSurf->u.tex.first_layer;
> + blit.src.format = srcSurf->format;
> +  }
>
> -if (dstRb) {
> -   struct pipe_surface *dstSurf;
> +  for (i = 0; i < drawFB->_NumColorDrawBuffers; i++) {
> + struct st_renderbuffer *dstRb =
> +st_renderbuffer(drawFB->_ColorDrawBuffers[i]);
>
> -   st_update_renderbuffer_surface(st, dstRb);
> + if (dstRb) {
> +struct pipe_surface *dstSurf;
>
> -   dstSurf = dstRb->surface;
> +st_update_renderbuffer_surface(st, dstRb);
>
> -   if (dstSurf) {
> -  blit.dst.resource = dstSurf->texture;
> -  blit.dst.level = dstSurf->u.tex.level;
> -  blit.dst.box.z = dstSurf->u.tex.first_layer;
> -  blit.dst.format = dstSurf->format;
> +dstSurf = dstRb->surface;
>
> -  blit.src.resource = srcSurf->texture;
> -  blit.src.level = srcSurf->u.tex.level;
> -  blit.src.box.z = srcSurf->u.tex.first_layer;
> -  blit.src.format = srcSurf->format;
> +if (dstSurf) {
> +   blit.dst.resource = dstSurf->texture;
> +   blit.dst.level = dstSurf->u.tex.level;
> +   blit.dst.box.z = dstSurf->u.tex.first_layer;
> +   blit.dst.format = dstSurf->format;
>
> -  st->pipe->blit(st->pipe, &blit);
> -  dstRb->defined = true; /* front buffer tracking */
> -   }
> +   st->pipe->blit(st->pipe, &blit);
> +   dstRb->defined = true; /* front buffer tracking */
>  }
>   }
>}
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.fr

Re: [Mesa-dev] [PATCH 1/3] wayland-drm: Expose server-side xbgr2101010 and abgr2101010 formats.

2018-03-14 Thread Ilia Mirkin

On Tue, Mar 13, 2018 at 5:30 AM, Daniel Stone  wrote:
> Hi Mario,
>
> On 12 March 2018 at 20:45, Mario Kleiner  wrote:
>> This way the wayland server can signal support for these formats
>> to wayland EGL clients. This is currently used by nouveau for 10
>> bpc support.
>>
>> Tested with glmark2-wayland and glmark2-es2-wayland under weston
>> to now expose 10 bpc EGL configs under nouveau.
>
> Do we need a way to ensure that the backend driver does actually
> support BGR for texturing? AFAIK, if a client happens to select a BGR
> config on other drivers now - using a compositor which does not
> implement wl_drm - this will break for them.

I think in practice, every hw driver can support both for texturing if
it can support one, since swizzles are always possible (due to
ARB_texture_swizzle).

In practice at least nouveau prior to Mario's patches only supported
it one way. I just checked r600, radeonsi, i965 and freedreno, and
they appear to support both for texturing. I think that covers the
majority of the likely 10bpc users.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] egl/dri2: move wayland header inclusion where applicable

2018-03-14 Thread Andy Furniss


Emil Velikov wrote:

From: Emil Velikov 

Instead of indirectly pulling the wayland headers everywhere, use
forward declarations and #include only as needed.

Should effectively fix build errors like the following:

make[5]: Entering directory
'/.../src/gallium/state_trackers/omx/tizonia'
CC   h264dprc.lo
In file included from h264dprc.c:45:0:
.../src/egl/drivers/dri2/egl_dri2.h:47:10: fatal error:
wayland/wayland-egl/wayland-egl-backend.h: No such file or directory
   #include "wayland/wayland-egl/wayland-egl-backend.h"

Cc: Andy Furniss 
Cc: Dylan Baker 
Signed-off-by: Emil Velikov 
---
Dylan had epiphany a minute after I hit Send. Sorry about that.

Gents this should remove the need of any the following patches.
Please you give them a try, manually reverting the meson fix.


Autotools build is good for me with this patch.



Thanks!

https://patchwork.freedesktop.org/patch/208770/
https://patchwork.freedesktop.org/patch/208306/
https://patchwork.freedesktop.org/patch/208322/
---
  src/egl/drivers/dri2/egl_dri2.c |  1 +
  src/egl/drivers/dri2/egl_dri2.h | 12 +---
  src/egl/drivers/dri2/platform_wayland.c |  2 ++
  3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 864f7eb0c68..535806e4bfe 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -48,6 +48,7 @@
  #include 
  
  #ifdef HAVE_WAYLAND_PLATFORM

+#include 
  #include "wayland-drm.h"
  #include "wayland-drm-client-protocol.h"
  #include "linux-dmabuf-unstable-v1-client-protocol.h"
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index bd637f73c9d..adabc527f85 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -43,9 +43,15 @@
  #endif
  
  #ifdef HAVE_WAYLAND_PLATFORM

-#include 
-#include "wayland/wayland-egl/wayland-egl-backend.h"
-/* forward declarations of protocol elements */
+/* forward declarations to avoid pulling wayland headers everywhere */
+struct wl_egl_window;
+struct wl_event_queue;
+struct wl_callback;
+struct wl_display;
+struct wl_drm;
+struct wl_registry;
+struct wl_shm;
+struct wl_surface;
  struct zwp_linux_dmabuf_v1;
  #endif
  
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c

index 877f7933b9a..94f7defa657 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -49,6 +49,8 @@
  #include "wayland-drm-client-protocol.h"
  #include "linux-dmabuf-unstable-v1-client-protocol.h"
  
+#include "wayland/wayland-egl/wayland-egl-backend.h"

+
  #ifndef DRM_FORMAT_MOD_INVALID
  #define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
  #endif



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/5] i965/miptree: Map with movntdqa for linear buffers only

2018-03-14 Thread Nanley Chery

On Tue, Jan 09, 2018 at 11:17:01PM -0800, Scott D Phillips wrote:
> Removes a place where gtt mapping is used.
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 

This patch is
Reviewed-by: Nanley Chery 

> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index e4a3f163d2..fa4ae06399 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -3707,7 +3707,8 @@ intel_miptree_map(struct brw_context *brw,
>  #if defined(USE_SSE41)
> } else if (!(mode & GL_MAP_WRITE_BIT) &&
>!mt->compressed && cpu_has_sse4_1 &&
> -  (mt->surf.row_pitch % 16 == 0)) {
> +  (mt->surf.row_pitch % 16 == 0) &&
> +  (mt->surf.tiling == ISL_TILING_LINEAR)) {
>intel_miptree_map_movntdqa(brw, mt, map, level, slice);
>  #endif
> } else if (mt->surf.tiling != ISL_TILING_LINEAR) {
> @@ -3752,6 +3753,7 @@ intel_miptree_unmap(struct brw_context *brw,
> } else if (!(map->mode & GL_MAP_WRITE_BIT) &&
>!mt->compressed && cpu_has_sse4_1 &&
>(mt->surf.row_pitch % 16 == 0) &&
> +  (mt->surf.tiling == ISL_TILING_LINEAR) &&
>map->buffer) {
>intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
>  #endif
> -- 
> 2.14.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105507] Crash when destroying a newly resized EGLsurface with wayland egl (dri2)

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105507

--- Comment #2 from Daniel Stone  ---
You could place any orphaned wl_buffers on an per-surface list instead, and
spin at destruction until that emptied. I won't have the time to look into it
myself for a while though.

Johan - which test hits this?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105507] Crash when destroying a newly resized EGLsurface with wayland egl (dri2)

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105507

Emil Velikov  changed:

   What|Removed |Added

 CC||dan...@fooishbar.org

--- Comment #1 from Emil Velikov  ---
Daniel, any suggestions?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] egl/dri2: move wayland header inclusion where applicable

2018-03-14 Thread Emil Velikov

From: Emil Velikov 

Instead of indirectly pulling the wayland headers everywhere, use
forward declarations and #include only as needed.

Should effectively fix build errors like the following:

make[5]: Entering directory
'/.../src/gallium/state_trackers/omx/tizonia'
   CC   h264dprc.lo
In file included from h264dprc.c:45:0:
.../src/egl/drivers/dri2/egl_dri2.h:47:10: fatal error:
wayland/wayland-egl/wayland-egl-backend.h: No such file or directory
  #include "wayland/wayland-egl/wayland-egl-backend.h"

Cc: Andy Furniss 
Cc: Dylan Baker 
Signed-off-by: Emil Velikov 
---
Dylan had epiphany a minute after I hit Send. Sorry about that.

Gents this should remove the need of any the following patches.
Please you give them a try, manually reverting the meson fix.

Thanks!

https://patchwork.freedesktop.org/patch/208770/
https://patchwork.freedesktop.org/patch/208306/
https://patchwork.freedesktop.org/patch/208322/
---
 src/egl/drivers/dri2/egl_dri2.c |  1 +
 src/egl/drivers/dri2/egl_dri2.h | 12 +---
 src/egl/drivers/dri2/platform_wayland.c |  2 ++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 864f7eb0c68..535806e4bfe 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -48,6 +48,7 @@
 #include 
 
 #ifdef HAVE_WAYLAND_PLATFORM
+#include 
 #include "wayland-drm.h"
 #include "wayland-drm-client-protocol.h"
 #include "linux-dmabuf-unstable-v1-client-protocol.h"
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index bd637f73c9d..adabc527f85 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -43,9 +43,15 @@
 #endif
 
 #ifdef HAVE_WAYLAND_PLATFORM
-#include 
-#include "wayland/wayland-egl/wayland-egl-backend.h"
-/* forward declarations of protocol elements */
+/* forward declarations to avoid pulling wayland headers everywhere */
+struct wl_egl_window;
+struct wl_event_queue;
+struct wl_callback;
+struct wl_display;
+struct wl_drm;
+struct wl_registry;
+struct wl_shm;
+struct wl_surface;
 struct zwp_linux_dmabuf_v1;
 #endif
 
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 877f7933b9a..94f7defa657 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -49,6 +49,8 @@
 #include "wayland-drm-client-protocol.h"
 #include "linux-dmabuf-unstable-v1-client-protocol.h"
 
+#include "wayland/wayland-egl/wayland-egl-backend.h"
+
 #ifndef DRM_FORMAT_MOD_INVALID
 #define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
 #endif
-- 
2.16.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] st/nine: Fix bad tracking of bound vs textures

2018-03-14 Thread Patrick Rudolph

On Tue, 2018-03-13 at 23:09 +0100, Axel Davy wrote:
> An incorrect formula was used to compute bound_samplers_mask_vs.
> Since s is above always 8 for vs and the variable is encoded on 8
> bits,
> it was always 0.
> This resulted in commiting the samplers every call when
> there was at least one texture read in the vs shader.
> 
> Signed-off-by: Axel Davy 
The series is 
Reviewed-by: Patrick Rudolph 

Please also include it into 17.3 stable.

> ---
>  src/gallium/state_trackers/nine/nine_state.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/nine/nine_state.c
> b/src/gallium/state_trackers/nine/nine_state.c
> index 26b2dea3bd..c81a05a952 100644
> --- a/src/gallium/state_trackers/nine/nine_state.c
> +++ b/src/gallium/state_trackers/nine/nine_state.c
> @@ -980,7 +980,7 @@ update_textures_and_samplers(struct NineDevice9
> *device)
>  context->changed.sampler[s] = ~0;
>  }
>  
> -context->bound_samplers_mask_vs |= (1 << s);
> +context->bound_samplers_mask_vs |= (1 << i);
>  }
>  
>  cso_set_sampler_views(context->cso, PIPE_SHADER_VERTEX,
> num_textures, view);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] tizonia egl build fail

2018-03-14 Thread Emil Velikov

On 13 March 2018 at 19:20, Dylan Baker  wrote:
> Quoting Andy Furniss (2018-03-06 15:12:37)
>> make[5]: Entering directory
>> '/mnt/sdc1/Gits/mesa/src/gallium/state_trackers/omx/tizonia'
>>CC   h264dprc.lo
>> In file included from h264dprc.c:45:0:
>> ../../../../../src/egl/drivers/dri2/egl_dri2.h:47:10: fatal error:
>> wayland/wayland-egl/wayland-egl-backend.h: No such file or directory
>>   #include "wayland/wayland-egl/wayland-egl-backend.h"
>>^~~
>> compilation terminated.

> Emil, this was the other patch.
>
Thanks Dylan!

Please include the error in the commit message of your (better) patch.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)

2018-03-14 Thread Daniel Stone

Hi,

On 14 March 2018 at 13:04, Rob Clark  wrote:
> I'm not sure everyone wants to be updating their dri3 in a forced
> march setting, this allows a nicer approach, esp when you want
> to build on distro that aren't brand new.

I don't have that much of an opinion on whether the dependency should
be mandatory or not. I originally had #ifdefs and removed them when
reviewers asked me to. If people want to add them back, fine by me.

That being said, these patches need changes, per comments below. One
thing missing entirely is making the version negotiation conditional:
when we call query_version for DRI3/Present, we need to make the
version we pass in conditional on whether or not we have new XCB.
Probably also wise to ifdef the multiplane_available variables, so
it's really obvious where any users are missing ifdefs.

I'm happy to test this tomorrow and submit a new version if that's
easier for people.

> @@ -327,6 +327,7 @@ dri3_create_image_khr_pixmap_from_buffers(_EGLDisplay 
> *disp, _EGLContext *ctx,
>EGLClientBuffer buffer,
>const EGLint *attr_list)
>  {
> +#ifdef HAVE_DRI3_MODIFIERS
> struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
> struct dri2_egl_image *dri2_img;
> xcb_dri3_buffers_from_pixmap_cookie_t bp_cookie;
> @@ -376,6 +377,9 @@ dri3_create_image_khr_pixmap_from_buffers(_EGLDisplay 
> *disp, _EGLContext *ctx,
> }
>
> return &dri2_img->base;
> +#else
> +   return NULL;
> +#endif
>  }

Just ifdef out the entire function, don't return NULL.

> @@ -1272,6 +1276,7 @@ dri3_alloc_render_buffer(struct loader_dri3_drawable 
> *draw, unsigned int format,
> pixmap = xcb_generate_id(draw->conn);
> if (draw->multiplanes_available &&
> buffer->modifier != DRM_FORMAT_MOD_INVALID) {
> +#ifdef HAVE_DRI3_MODIFIERS
>xcb_dri3_pixmap_from_buffers(draw->conn,
> pixmap,
> draw->drawable,
> @@ -1284,6 +1289,7 @@ dri3_alloc_render_buffer(struct loader_dri3_drawable 
> *draw, unsigned int format,
> depth, buffer->cpp * 8,
> buffer->modifier,
> buffer_fds);
> +#endif
> } else {
>xcb_dri3_pixmap_from_buffer(draw->conn,
>pixmap,

This ifdef needs to wrap the branch, so that the single-buffer
xcb_dri3_pixmap_from_buffer() always gets called if we built against
old XCB, else new-server + old-XCB-Mesa never allocates a render
buffer for X11 surfaces.

> @@ -1567,7 +1575,7 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, 
> unsigned int format,
>(sync_fence = xcb_generate_id(draw->conn)),
>false,
>fence_fd);
> -
> +#ifdef HAVE_DRI3_MODIFIERS
> if (draw->multiplanes_available &&
> draw->ext->image->base.version >= 15 &&
> draw->ext->image->createImageFromDmaBufs2) {
> @@ -1586,7 +1594,9 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, 
> unsigned int format,
>width = bps_reply->width;
>height = bps_reply->height;
>free(bps_reply);
> -   } else {
> +   } else
> +#endif
> +   {
>xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
>xcb_dri3_buffer_from_pixmap_reply_t *bp_reply;

Jason complained about control flow being intermingled with #ifdefs
like this. I don't have any suggestions as to how to do it better
though, which is why I did it like this in the first place.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 5/8] intel: devinfo: add helper functions to fill fusing masks values

2018-03-14 Thread Lionel Landwerlin

There are a couple of ways we can get the fusing information from the
kernel :

  - Through DRM_I915_GETPARAM with the SLICE_MASK/SUBSLICE_MASK
parameters

  - Through the new DRM_IOCTL_I915_QUERY by requesting the
DRM_I915_QUERY_TOPOLOGY_INFO

The second method is more accurate and also gives us the EUs fusing
masks. It's also a requirement for CNL as this platform has asymetric
subslices and the first method SUBSLICE_MASK value is assumed uniform
across slices.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/dev/gen_device_info.c | 129 
 src/intel/dev/gen_device_info.h |  11 
 2 files changed, 140 insertions(+)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index c1bdc997f2c..a8c9f7738b2 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -28,8 +28,11 @@
 #include 
 #include "gen_device_info.h"
 #include "compiler/shader_enums.h"
+#include "util/bitscan.h"
 #include "util/macros.h"
 
+#include 
+
 /**
  * Get the PCI ID for the device name.
  *
@@ -913,6 +916,132 @@ fill_masks(struct gen_device_info *devinfo)
}
 }
 
+static void
+reset_masks(struct gen_device_info *devinfo)
+{
+   devinfo->subslice_slice_stride =
+  devinfo->eu_subslice_stride =
+  devinfo->eu_slice_stride = 0;
+
+   devinfo->num_slices =
+  devinfo->num_eu_per_subslice = 0;
+   memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
+
+   memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
+   memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
+   memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
+}
+
+void
+gen_device_info_update_from_masks(struct gen_device_info *devinfo,
+  uint32_t slice_mask,
+  uint32_t subslice_mask,
+  uint32_t n_eus)
+{
+   reset_masks(devinfo);
+
+   assert((slice_mask & 0xff) == slice_mask);
+
+   devinfo->slice_masks = slice_mask;
+   devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
+
+   uint32_t max_slices = util_last_bit(slice_mask);
+   uint32_t max_subslices = util_last_bit(subslice_mask);
+   devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
+   uint32_t n_subslices = 0;
+   for (int s = 0; s < util_last_bit(slice_mask); s++) {
+  if ((slice_mask & (1UL << s)) == 0)
+ continue;
+
+  for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
+ int subslice_offset = s * devinfo->subslice_slice_stride + b;
+
+ devinfo->subslice_masks[subslice_offset] =
+(subslice_mask >> (b * 8)) & 0xff;
+ devinfo->num_subslices[s] +=
+__builtin_popcount(devinfo->subslice_masks[subslice_offset]);
+  }
+
+  n_subslices += devinfo->num_subslices[s];
+   }
+
+   /* We expect the total number of EUs to be uniformly distributed throughout
+* the subslices.
+*/
+   assert((n_eus % n_subslices) == 0);
+   devinfo->num_eu_per_subslice = n_eus / n_subslices;
+
+   devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
+   devinfo->eu_slice_stride = devinfo->eu_subslice_stride * max_subslices;
+
+   for (int s = 0; s < max_slices; s++) {
+  if ((slice_mask & (1UL << s)) == 0)
+ continue;
+
+  for (int ss = 0; ss < max_subslices; ss++) {
+ if ((subslice_mask & (1UL << ss)) == 0)
+continue;
+
+ for (int b = 0; b < devinfo->eu_subslice_stride; b++) {
+int eus_offset = s * devinfo->eu_slice_stride +
+   ss * devinfo->eu_subslice_stride + b;
+
+devinfo->eu_masks[eus_offset] =
+   (((1UL << devinfo->num_eu_per_subslice) - 1) >> (b * 8)) & 0xff;
+ }
+  }
+   }
+}
+
+void
+gen_device_info_update_from_topology(struct gen_device_info *devinfo,
+ const struct drm_i915_query_topology_info 
*topology)
+{
+   reset_masks(devinfo);
+
+   devinfo->subslice_slice_stride = topology->subslice_stride;
+
+   devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 
8);
+   devinfo->eu_slice_stride = topology->max_subslices * 
devinfo->eu_subslice_stride;
+
+   assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 
8));
+   memcpy(&devinfo->slice_masks, topology->data, 
DIV_ROUND_UP(topology->max_slices, 8));
+   devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
+
+   uint32_t subslice_mask_len =
+  topology->max_slices * topology->subslice_stride;
+   assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
+   memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
+  subslice_mask_len);
+
+   uint32_t n_subslices = 0;
+   for (int s = 0; s < topology->max_slices; s++) {
+  if ((devinfo->slice_masks & (1UL << s)) == 0)
+ continue;
+
+  for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
+ de

[Mesa-dev] [PATCH v2 4/8] intel: devinfo: meson: include drm uapi

2018-03-14 Thread Lionel Landwerlin

Already available with the autotools build.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/dev/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/dev/meson.build b/src/intel/dev/meson.build
index 3346fe60c07..9369fd3c0da 100644
--- a/src/intel/dev/meson.build
+++ b/src/intel/dev/meson.build
@@ -28,6 +28,6 @@ files_libintel_dev = files(
 libintel_dev = static_library(
   ['intel_dev'],
   files_libintel_dev,
-  include_directories : [inc_common, inc_intel],
+  include_directories : [inc_common, inc_intel, inc_drm_uapi],
   c_args : [c_vis_args, no_override_init_args],
 )
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 3/8] drm-uapi: bump headers

2018-03-14 Thread Lionel Landwerlin

This commit is meant to be replaced with a proper bump from drm-next.
---
 include/drm-uapi/i915_drm.h | 146 +++-
 1 file changed, 145 insertions(+), 1 deletion(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 7f28eea4035..9dfebbbe117 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -102,6 +102,46 @@ enum drm_i915_gem_engine_class {
I915_ENGINE_CLASS_INVALID   = -1
 };
 
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+   I915_SAMPLE_BUSY = 0,
+   I915_SAMPLE_WAIT = 1,
+   I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+   (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+   ((class) << I915_PMU_CLASS_SHIFT | \
+   (instance) << I915_PMU_SAMPLE_BITS | \
+   (sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+   __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+   __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+   __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY  __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255/* table size 2k - maximum due to use
@@ -278,6 +318,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_OPEN 0x36
 #define DRM_I915_PERF_ADD_CONFIG   0x37
 #define DRM_I915_PERF_REMOVE_CONFIG0x38
+#define DRM_I915_QUERY 0x39
 
 #define DRM_IOCTL_I915_INITDRM_IOW( DRM_COMMAND_BASE + 
DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH   DRM_IO ( DRM_COMMAND_BASE + 
DRM_I915_FLUSH)
@@ -335,6 +376,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_OPEN   DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG  DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_QUERY, struct drm_i915_query)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1318,7 +1360,9 @@ struct drm_intel_overlay_attrs {
  * active on a given plane.
  */
 
-#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */
+#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set
+   * flags==0 to disable 
colorkeying.
+   */
 #define I915_SET_COLORKEY_DESTINATION  (1<<1)
 #define I915_SET_COLORKEY_SOURCE   (1<<2)
 struct drm_intel_sprite_colorkey {
@@ -1573,6 +1617,106 @@ struct drm_i915_perf_oa_config {
__u64 flex_regs_ptr;
 };
 
+struct drm_i915_query_item {
+   __u64 query_id;
+#define DRM_I915_QUERY_TOPOLOGY_INFO1
+
+   /*
+* When set to zero by userspace, this is filled with the size of the
+* data to be written at the data_ptr pointer. The kernel set this
+* value to a negative value to signal an error on a particular query
+* item.
+*/
+   __s32 length;
+
+   /*
+* Unused for now.
+*/
+   __u32 flags;
+
+   /*
+* Data will be written at the location pointed by data_ptr when the
+* value of length matches the length of the data to be written by the
+* kernel.
+*/
+   __u64 data_ptr;
+};
+
+struct drm_i915_query {
+   __u32 num_items;
+
+   /*
+* Unused for now.
+*/
+   __u32 flags;
+
+   /*
+* This point to an array of num_items drm_i915_query_item structures.
+*/
+   __u64 items_ptr;
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ *
+ * data: contains the 3 pieces of information :
+ *
+ * - the slice mask with one bit per slice telling whether a slice is
+ *   available. The availability of slice X can be queried with the following
+ *   formula :
+ *
+ *   (data[X / 8] >> (X % 8)) & 1
+ *
+ * - the subslice mask for each slice with one bit per subslice tell

[Mesa-dev] [PATCH v2 1/8] intel: devinfo: store number of EUs per subslice

2018-03-14 Thread Lionel Landwerlin

This will be reused to store values reported by the kernel. The main
use case will be for use as the input values of the metric sets
equations for the INTEL_performance_queries extension. By storing this
information in the gen_device_info we make this non GL specific so
this can be reused by Vulkan if we ever have an equivalent extension.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/dev/gen_device_info.c | 35 +--
 src/intel/dev/gen_device_info.h |  5 +
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index 1773009d33c..26c2651f0ff 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -92,6 +92,7 @@ static const struct gen_device_info gen_device_info_i965 = {
.has_negative_rhw_bug = true,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 8,
.num_thread_per_eu = 4,
.max_vs_threads = 16,
.max_gs_threads = 2,
@@ -110,6 +111,7 @@ static const struct gen_device_info gen_device_info_g4x = {
.is_g4x = true,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 10,
.num_thread_per_eu = 5,
.max_vs_threads = 32,
.max_gs_threads = 2,
@@ -127,6 +129,7 @@ static const struct gen_device_info gen_device_info_ilk = {
.has_surface_tile_offset = true,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 12,
.num_thread_per_eu = 6,
.max_vs_threads = 72,
.max_gs_threads = 32,
@@ -147,6 +150,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 
= {
.needs_unlit_centroid_workaround = true,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 6,
.num_thread_per_eu = 6, /* Not confirmed */
.max_vs_threads = 24,
.max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
@@ -174,6 +178,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 
= {
.needs_unlit_centroid_workaround = true,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 12,
.num_thread_per_eu = 6, /* Not confirmed */
.max_vs_threads = 60,
.max_gs_threads = 60,
@@ -205,6 +210,7 @@ static const struct gen_device_info gen_device_info_ivb_gt1 
= {
GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 6,
.num_thread_per_eu = 6,
.l3_banks = 2,
.max_vs_threads = 36,
@@ -232,6 +238,7 @@ static const struct gen_device_info gen_device_info_ivb_gt2 
= {
GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 12,
.num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
 * @max_wm_threads ... */
.l3_banks = 4,
@@ -260,6 +267,7 @@ static const struct gen_device_info gen_device_info_byt = {
GEN7_FEATURES, .is_baytrail = true, .gt = 1,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 4,
.num_thread_per_eu = 8,
.l3_banks = 1,
.has_llc = false,
@@ -294,6 +302,7 @@ static const struct gen_device_info gen_device_info_hsw_gt1 
= {
HSW_FEATURES, .gt = 1,
.num_slices = 1,
.num_subslices = { 1, },
+   .num_eu_per_subslice = 10,
.num_thread_per_eu = 7,
.l3_banks = 2,
.max_vs_threads = 70,
@@ -321,6 +330,7 @@ static const struct gen_device_info gen_device_info_hsw_gt2 
= {
HSW_FEATURES, .gt = 2,
.num_slices = 1,
.num_subslices = { 2, },
+   .num_eu_per_subslice = 10,
.num_thread_per_eu = 7,
.l3_banks = 4,
.max_vs_threads = 280,
@@ -348,6 +358,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 
= {
HSW_FEATURES, .gt = 3,
.num_slices = 2,
.num_subslices = { 2, },
+   .num_eu_per_subslice = 10,
.num_thread_per_eu = 7,
.l3_banks = 8,
.max_vs_threads = 280,
@@ -398,6 +409,7 @@ static const struct gen_device_info gen_device_info_bdw_gt1 
= {
.is_broadwell = true,
.num_slices = 1,
.num_subslices = { 2, },
+   .num_eu_per_subslice = 8,
.num_thread_per_eu = 7,
.l3_banks = 2,
.max_cs_threads = 42,
@@ -421,6 +433,7 @@ static const struct gen_device_info gen_device_info_bdw_gt2 
= {
.is_broadwell = true,
.num_slices = 1,
.num_subslices = { 3, },
+   .num_eu_per_subslice = 8,
.num_thread_per_eu = 7,
.l3_banks = 4,
.max_cs_threads = 56,
@@ -444,6 +457,7 @@ static const struct gen_device_info gen_device_info_bdw_gt3 
= {
.is_broadwell = true,
.num_slices = 2,
.num_subslices = { 3, 3, },
+   .num_eu_per_subslice = 8,
.num_thread_per_eu = 7,
.l3_banks = 8,
.max_cs_threads = 56,
@@ -468,6 +482,7 @@ static const struct gen_device_info gen_device_info_chv = {
.has_integer_dword_mul = false,
.num_slices = 1,
.num_subslices = { 2, },
+   .num_eu_per_subslice = 8,
.num_thread_per_eu = 7,
.l3_banks = 2,
.max_vs_threa

[Mesa-dev] [PATCH v2 7/8] i965: perf: add support for new equation operators

2018-03-14 Thread Lionel Landwerlin

Some equations of the CNL metrics started to use operators we haven't
defined yet, just add those.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_oa.py | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_oa.py 
b/src/mesa/drivers/dri/i965/brw_oa.py
index 7931c825f08..06995a6bf40 100644
--- a/src/mesa/drivers/dri/i965/brw_oa.py
+++ b/src/mesa/drivers/dri/i965/brw_oa.py
@@ -125,6 +125,18 @@ def emit_umin(tmp_id, args):
 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0]))
 return tmp_id + 1
 
+def emit_lshft(tmp_id, args):
+c("uint64_t tmp{0} = {1} << {2};".format(tmp_id, args[1], args[0]))
+return tmp_id + 1
+
+def emit_rshft(tmp_id, args):
+c("uint64_t tmp{0} = {1} >> {2};".format(tmp_id, args[1], args[0]))
+return tmp_id + 1
+
+def emit_and(tmp_id, args):
+c("uint64_t tmp{0} = {1} & {2};".format(tmp_id, args[1], args[0]))
+return tmp_id + 1
+
 ops = {}
 # (n operands, emitter)
 ops["FADD"] = (2, emit_fadd)
@@ -138,6 +150,9 @@ ops["UDIV"] = (2, emit_udiv)
 ops["UMUL"] = (2, emit_umul)
 ops["USUB"] = (2, emit_usub)
 ops["UMIN"] = (2, emit_umin)
+ops["<<"]   = (2, emit_lshft)
+ops[">>"]   = (2, emit_rshft)
+ops["AND"]  = (2, emit_and)
 
 def brkt(subexp):
 if " " in subexp:
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/8] intel: devinfo: store slice/subslice/eu masks

2018-03-14 Thread Lionel Landwerlin

We want to store values coming from the kernel but as a first step, we
can generate mask values out the numbers already stored in the
gen_device_info masks.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/dev/gen_device_info.c | 43 +
 src/intel/dev/gen_device_info.h | 39 -
 2 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index 26c2651f0ff..c1bdc997f2c 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -872,6 +872,47 @@ static const struct gen_device_info 
gen_device_info_icl_1x8 = {
GEN11_FEATURES(1, 1, subslices(1), 6),
 };
 
+/* Generate slice/subslice/eu masks from number of
+ * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
+ * structure.
+ *
+ * These can be overridden with values reported by the kernel either from
+ * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
+ * through the i915 query uapi.
+ */
+static void
+fill_masks(struct gen_device_info *devinfo)
+{
+   devinfo->slice_masks = (1UL << devinfo->num_slices) - 1;
+
+   /* Subslice masks */
+   unsigned max_subslices = 0;
+   for (int s = 0; s < devinfo->num_slices; s++)
+  max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
+   devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
+
+   for (int s = 0; s < devinfo->num_slices; s++) {
+  devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
+ (1UL << devinfo->num_subslices[s]) - 1;
+   }
+
+   /* EU masks */
+   devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
+   devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
+
+   for (int s = 0; s < devinfo->num_slices; s++) {
+  for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
+ for (int b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
+int subslice_offset =
+   s * devinfo->eu_slice_stride + ss * devinfo->eu_subslice_stride;
+
+devinfo->eu_masks[subslice_offset + b_eu] =
+   (((1UL << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 
0xff;
+ }
+  }
+   }
+}
+
 bool
 gen_get_device_info(int devid, struct gen_device_info *devinfo)
 {
@@ -885,6 +926,8 @@ gen_get_device_info(int devid, struct gen_device_info 
*devinfo)
   return false;
}
 
+   fill_masks(devinfo);
+
/* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
 *
 * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h
index 17285ffed88..793ce094850 100644
--- a/src/intel/dev/gen_device_info.h
+++ b/src/intel/dev/gen_device_info.h
@@ -28,10 +28,16 @@
 #include 
 #include 
 
+#include "util/macros.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define GEN_DEVICE_MAX_SLICES   (6)  /* Maximum on gen10 */
+#define GEN_DEVICE_MAX_SUBSLICES(8)  /* Maximum on gen11 */
+#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */
+
 /**
  * Intel hardware information and quirks
  */
@@ -112,7 +118,7 @@ struct gen_device_info
/**
 * Number of subslices for each slice (used to be uniform until CNL).
 */
-   unsigned num_subslices[3];
+   unsigned num_subslices[GEN_DEVICE_MAX_SUBSLICES];
 
/**
 * Number of EU per subslice.
@@ -124,6 +130,37 @@ struct gen_device_info
 */
unsigned num_thread_per_eu;
 
+   /**
+* A bit mask of the slices available.
+*/
+   uint8_t slice_masks;
+
+   /**
+* An array of bit mask of the subslices available, use 
subslice_slice_stride
+* to access this array.
+*/
+   uint8_t subslice_masks[GEN_DEVICE_MAX_SLICES *
+  DIV_ROUND_UP(GEN_DEVICE_MAX_SUBSLICES, 8)];
+
+   /**
+* An array of bit mask of EUs available, use eu_slice_stride &
+* eu_subslice_stride to access this array.
+*/
+   uint8_t eu_masks[GEN_DEVICE_MAX_SLICES *
+GEN_DEVICE_MAX_SUBSLICES *
+DIV_ROUND_UP(GEN_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
+
+   /**
+* Stride to access subslice_masks[].
+*/
+   uint16_t subslice_slice_stride;
+
+   /**
+* Strides to access eu_masks[].
+*/
+   uint16_t eu_slice_stride;
+   uint16_t eu_subslice_stride;
+
unsigned l3_banks;
unsigned max_vs_threads;   /**< Maximum Vertex Shader threads */
unsigned max_tcs_threads;  /**< Maximum Hull Shader threads */
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 0/8] i965: add support for performance queries on CNL

2018-03-14 Thread Lionel Landwerlin

Hi all,

Here a v2 to enable perf queries on CNL.

It moves some of the data stored into the brw_context into
gen_device_info. The makes it reusable by other API (Vulkan) if they
develop perf queries capabilities in the future.

One of the patch is quite big, you can look at this series on my
github :

   https://github.com/djdeath/mesa/tree/wip/djdeath/query-topology

Thanks,


Lionel Landwerlin (8):
  intel: devinfo: store number of EUs per subslice
  intel: devinfo: store slice/subslice/eu masks
  drm-uapi: bump headers
  intel: devinfo: meson: include drm uapi
  intel: devinfo: add helper functions to fill fusing masks values
  i965: perf: query topology
  i965: perf: add support for new equation operators
  i965: add performance query support on CNL

 include/drm-uapi/i915_drm.h   |   146 +-
 src/intel/dev/gen_device_info.c   |   207 +-
 src/intel/dev/gen_device_info.h   |55 +-
 src/intel/dev/meson.build | 2 +-
 src/mesa/drivers/dri/i965/Makefile.am | 1 +
 src/mesa/drivers/dri/i965/Makefile.sources| 4 +-
 src/mesa/drivers/dri/i965/brw_oa.py   |15 +
 src/mesa/drivers/dri/i965/brw_oa_cnl.xml  | 10410 
 src/mesa/drivers/dri/i965/brw_performance_query.c |   185 +-
 src/mesa/drivers/dri/i965/meson.build | 2 +-
 10 files changed, 10949 insertions(+), 78 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_oa_cnl.xml

--
2.16.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 6/8] i965: perf: query topology

2018-03-14 Thread Lionel Landwerlin

With the introduction of asymmetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam to tell userspace what subslices
are available.

We introduce a new uAPI in the kernel driver to report exactly what
part of the GPU are fused and require this to be available on Gen10+.

Prior generations can continue to rely on GETPARAM on older kernels.

This patch is quite a lot of code because we have to support lots of
different kernel versions, ranging from not providing any information
(for Haswell on 4.13 through 4.17), to being able to query through
GETPARAM (for gen8/9 on 4.13 through 4.17), to finally requiring 4.17
for Gen10+.

This change stores topology information in a unified way on
brw_context.topology from the various kernel APIs. And then generates
the appropriate values for the equations from that unified topology.

v2: Move slice/subslice masks fields to gen_device_info (Rafael)

Signed-off-by: Lionel Landwerlin 
Acked-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 182 +-
 1 file changed, 111 insertions(+), 71 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 13eff31ee61..3b52db6e74e 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1921,6 +1921,101 @@ init_oa_configs(struct brw_context *brw)
}
 }
 
+static bool
+query_topology(struct brw_context *brw)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   struct drm_i915_query_item item = {
+  .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
+   };
+   struct drm_i915_query query = {
+  .num_items = 1,
+  .items_ptr = (uintptr_t) &item,
+   };
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, &query))
+  return false;
+
+   struct drm_i915_query_topology_info *topo_info =
+  (struct drm_i915_query_topology_info *) calloc(1, item.length);
+   item.data_ptr = (uintptr_t) topo_info;
+
+   if (drmIoctl(screen->fd, DRM_IOCTL_I915_QUERY, &query) ||
+   item.length <= 0)
+  return false;
+
+   gen_device_info_update_from_topology(&brw->screen->devinfo,
+topo_info);
+
+   free(topo_info);
+
+   return true;
+}
+
+static bool
+getparam_topology(struct brw_context *brw)
+{
+   __DRIscreen *screen = brw->screen->driScrnPriv;
+   drm_i915_getparam_t gp;
+   int ret;
+
+   int slice_mask = 0;
+   gp.param = I915_PARAM_SLICE_MASK;
+   gp.value = &slice_mask;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+   if (ret)
+  return false;
+
+   int subslice_mask = 0;
+   gp.param = I915_PARAM_SUBSLICE_MASK;
+   gp.value = &subslice_mask;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+   if (ret)
+  return false;
+
+   gen_device_info_update_from_masks(&brw->screen->devinfo,
+ slice_mask,
+ subslice_mask,
+ brw->screen->eu_total);
+
+   return true;
+}
+
+static void
+compute_topology_builtins(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   brw->perfquery.sys_vars.slice_mask = devinfo->slice_masks;
+   brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices;
+
+   for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) {
+  brw->perfquery.sys_vars.n_eu_sub_slices +=
+ __builtin_popcount(devinfo->subslice_masks[i]);
+   }
+
+   for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
+  brw->perfquery.sys_vars.n_eus += 
__builtin_popcount(devinfo->eu_masks[i]);
+
+   brw->perfquery.sys_vars.eu_threads_count =
+  brw->perfquery.sys_vars.n_eus * devinfo->num_thread_per_eu;
+
+   /* At the moment the subslice mask builtin has groups of 3bits for each
+* slice.
+*
+* Ideally equations would be updated to have a slice/subslice query
+* function/operator.
+*/
+   brw->perfquery.sys_vars.subslice_mask = 0;
+   for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) {
+  for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) {
+ if (devinfo->subslice_masks[s * devinfo->subslice_slice_stride +
+ ss / 8] & (1UL << (ss % 8)))
+brw->perfquery.sys_vars.subslice_mask |= 1UL << (s * 3 + ss);
+  }
+   }
+}
+
 static bool
 init_oa_sys_vars(struct brw_context *brw)
 {
@@ -1934,83 +2029,28 @@ init_oa_sys_vars(struct brw_context *brw)
if (!read_sysfs_drm_device_file_uint64(brw,  "gt_max_freq_mhz", 
&max_freq_mhz))
   return false;
 
-   brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 100;
-   brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 100;
-   brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
-
-   brw->perfquery.sys_vars.revision = intel_device_get_revision(screen->fd);
-   brw->perfquery.sys_vars.n_eu_slices = devinfo

Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-14 Thread Chris Wilson

Quoting Nanley Chery (2018-03-14 17:14:15)
> On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote:
> > Rename the (un)map_gtt functions to (un)map_map (map by
> > returning a map) and add new functions (un)map_tiled_memcpy that
> > return a shadow buffer populated with the intel_tiled_memcpy
> > functions.
> >
> > Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> > tiling, when support is added for those formats.
> >
> > v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
> >
> > v3: Add units to parameter names of tile_extents (Nanley Chery)
> > Use _mesa_align_malloc for the shadow copy (Nanley)
> > Continue using gtt maps on gen4 (Nanley)
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 
> > ---
> >  1 file changed, 86 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index c6213b21629..fba17bf5b7b 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -31,6 +31,7 @@
> >  #include "intel_image.h"
> >  #include "intel_mipmap_tree.h"
> >  #include "intel_tex.h"
> > +#include "intel_tiled_memcpy.h"
> >  #include "intel_blit.h"
> >  #include "intel_fbo.h"
> >  
> > @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree 
> > *mt)
> >  }
> >  
> >  static void
> > -intel_miptree_map_gtt(struct brw_context *brw,
> > -  struct intel_mipmap_tree *mt,
> > -  struct intel_miptree_map *map,
> > -  unsigned int level, unsigned int slice)
> > +intel_miptree_map_map(struct brw_context *brw,
> > +  struct intel_mipmap_tree *mt,
> > +  struct intel_miptree_map *map,
> > +  unsigned int level, unsigned int slice)
> >  {
> > unsigned int bw, bh;
> > void *base;
> > @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
> >  }
> >  
> >  static void
> > -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> > +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
> >  {
> > intel_miptree_unmap_raw(mt);
> >  }
> >  
> > +/* Compute extent parameters for use with tiled_memcpy functions.
> > + * xs are in units of bytes and ys are in units of strides. */
> > +static inline void
> > +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> > + unsigned int level, unsigned int slice, unsigned int *x1_B,
> > + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> > +{
> > +   unsigned int block_width, block_height;
> > +   unsigned int x0_el, y0_el;
> > +
> > +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> > +
> > +   assert(map->x % block_width == 0);
> > +   assert(map->y % block_height == 0);
> > +
> > +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> > +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> > +   *y1_el = map->y / block_height + y0_el;
> > +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> > +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> > +}
> > +
> > +static void
> > +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> > +   struct intel_mipmap_tree *mt,
> > +   struct intel_miptree_map *map,
> > +   unsigned int level, unsigned int slice)
> > +{
> > +   unsigned int x1, x2, y1, y2;
> > +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> > +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> > +   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 
> > 16);
> > +
> > +   assert(map->ptr);
> > +
> > +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> 
> It looks like we'll generate extra copies using this function, but only
> in a few corner cases. I think the following places should be using the
> INVALIDATE flag, but aren't:
> * _mesa_store_cleartexsubimage
> * generate_mipmap_uncompressed
> 
> > +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> > +  src += mt->offset;
> > +
> 
> It seems possible that the buffer object had a WC memory type during
> rendering. In that case, we need an sfence here right?
> 
> This stuff is pretty new to me, so perhaps others would like to chime
> in.
> 
> > +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> > +  mt->surf.row_pitch, brw->has_swizzling, 
> > mt->surf.tiling,
> > +  memcpy);
> > +
> > +  intel_miptree_unmap_raw(mt);
> > +   }
> > +}
> > +
> > +static void
> > +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> > + struct intel_mipmap_tree *mt,
> > + struct intel_miptree_map *map,
> > + unsigned int lev

Re: [Mesa-dev] [PATCH v3] i965/miptree: Use cpu tiling/detiling when mapping

2018-03-14 Thread Nanley Chery

On Mon, Mar 12, 2018 at 10:52:55AM -0700, Scott D Phillips wrote:
> Rename the (un)map_gtt functions to (un)map_map (map by
> returning a map) and add new functions (un)map_tiled_memcpy that
> return a shadow buffer populated with the intel_tiled_memcpy
> functions.
>
> Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> tiling, when support is added for those formats.
>
> v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
>
> v3: Add units to parameter names of tile_extents (Nanley Chery)
> Use _mesa_align_malloc for the shadow copy (Nanley)
> Continue using gtt maps on gen4 (Nanley)
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 94 
> ---
>  1 file changed, 86 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index c6213b21629..fba17bf5b7b 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -31,6 +31,7 @@
>  #include "intel_image.h"
>  #include "intel_mipmap_tree.h"
>  #include "intel_tex.h"
> +#include "intel_tiled_memcpy.h"
>  #include "intel_blit.h"
>  #include "intel_fbo.h"
>  
> @@ -3046,10 +3047,10 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
>  }
>  
>  static void
> -intel_miptree_map_gtt(struct brw_context *brw,
> -  struct intel_mipmap_tree *mt,
> -  struct intel_miptree_map *map,
> -  unsigned int level, unsigned int slice)
> +intel_miptree_map_map(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt,
> +  struct intel_miptree_map *map,
> +  unsigned int level, unsigned int slice)
>  {
> unsigned int bw, bh;
> void *base;
> @@ -3093,11 +3094,81 @@ intel_miptree_map_gtt(struct brw_context *brw,
>  }
>  
>  static void
> -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
>  {
> intel_miptree_unmap_raw(mt);
>  }
>  
> +/* Compute extent parameters for use with tiled_memcpy functions.
> + * xs are in units of bytes and ys are in units of strides. */
> +static inline void
> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> + unsigned int level, unsigned int slice, unsigned int *x1_B,
> + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> +{
> +   unsigned int block_width, block_height;
> +   unsigned int x0_el, y0_el;
> +
> +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> +
> +   assert(map->x % block_width == 0);
> +   assert(map->y % block_height == 0);
> +
> +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> +   *y1_el = map->y / block_height + y0_el;
> +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> +}
> +
> +static void
> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> +   struct intel_mipmap_tree *mt,
> +   struct intel_miptree_map *map,
> +   unsigned int level, unsigned int slice)
> +{
> +   unsigned int x1, x2, y1, y2;
> +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> +   map->stride = _mesa_format_row_stride(mt->format, map->w);
> +   map->buffer = map->ptr = _mesa_align_malloc(map->stride * (y2 - y1), 16);
> +
> +   assert(map->ptr);
> +
> +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {

It looks like we'll generate extra copies using this function, but only
in a few corner cases. I think the following places should be using the
INVALIDATE flag, but aren't:
* _mesa_store_cleartexsubimage
* generate_mipmap_uncompressed

> +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  src += mt->offset;
> +

It seems possible that the buffer object had a WC memory type during
rendering. In that case, we need an sfence here right?

This stuff is pretty new to me, so perhaps others would like to chime
in.

> +  tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
> +  mt->surf.row_pitch, brw->has_swizzling, 
> mt->surf.tiling,
> +  memcpy);
> +
> +  intel_miptree_unmap_raw(mt);
> +   }
> +}
> +
> +static void
> +intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
> + struct intel_mipmap_tree *mt,
> + struct intel_miptree_map *map,
> + unsigned int level,
> + unsigned int slice)
> +{
> +   if (map->mode & GL_MAP_WRITE_BIT) {
> +  unsigned int x1, x2, y1, y2;
> +  tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> +
> +  char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> +  dst

Re: [Mesa-dev] [PATCH] meson: require amdgpu >= 2.4.91

2018-03-14 Thread Marek Olšák

For the series:

Reviewed-by: Marek Olšák 

You can push the series now if you want to. That would be simplest.

Marek

On Wed, Mar 14, 2018 at 12:10 PM, Dylan Baker  wrote:
> Signed-off-by: Dylan Baker 
>
> ---
>
> Marek, can you either squash this into your other patch or push this along 
> with
> that?
>
>  meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/meson.build b/meson.build
> index 51b470253f5..e21ac74a1e3 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1036,7 +1036,7 @@ dep_libdrm_nouveau = []
>  dep_libdrm_etnaviv = []
>  dep_libdrm_freedreno = []
>  if with_amd_vk or with_gallium_radeonsi
> -  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.90')
> +  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.91')
>  endif
>  if (with_gallium_radeonsi or with_dri_r100 or with_dri_r200 or
>  with_gallium_r300 or with_gallium_r600)
> --
> 2.16.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)

2018-03-14 Thread Mathias Fröhlich

Hi,

On Wednesday, 14 March 2018 15:51:03 CET Brian Paul wrote:
> FWIW, I'd like to see this sooner rather than later too.  I spent 
> several hours yesterday trying to update our build script to 
> build/install XCB 1.13 on Fedora, Ubuntu, etc. without totally succeeding.

I did just rebuild the 1.13 srpm from koji.fedora... on fedora27.
I have put those I have into my freedesktop home directory:

/home/frohlich/xcb-1.13-rpms

Not that I want to maintain them, but to share what at this current minute 
helps for me.

IMO not just requiring the most recent version of such a system library would 
be a very good thing!

best

Mathias

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] meson+dri3: allow building against older xcb (v3)

2018-03-14 Thread Dylan Baker

Quoting Dylan Baker (2018-03-14 09:42:36)
> Quoting Rob Clark (2018-03-14 06:04:58)
> > Similar to previous patch, make xcb 1.13 optional.
> > 
> > Signed-off-by: Rob Clark 
> > ---
> >  meson.build | 11 ---
> >  1 file changed, 8 insertions(+), 3 deletions(-)
> > 
> > diff --git a/meson.build b/meson.build
> > index c201644c372..0e2f73e67b6 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1235,9 +1235,14 @@ if with_platform_x11
> >  dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8')
> >  
> >  if with_dri3
> > -  pre_args += ['-DHAVE_DRI3', '-DHAVE_DRI3_MODIFIERS']
> > -  dep_xcb_dri3 = dependency('xcb-dri3', version : '>= 1.13')
> > -  dep_xcb_present = dependency('xcb-present', version: '>= 1.13')
> > +  pre_args += '-DHAVE_DRI3'
> > +  dep_xcb_dri3 = dependency('xcb-dri3')
> > +  dep_xcb_present = dependency('xcb-present')
> > +  # until xcb-dri3 has been around long enough to make a 
> > hard-dependency:
> > +  if (dep_xcb_dri3.version().version_compare('>= 1.13') and
> > +  dep_xcb_present.version().version_compare('>= 1.13'))
> > +pre_args += '-DHAVE_DRI3_MODIFIERS'
> > +  endif
> >dep_xcb_sync = dependency('xcb-sync')
> >dep_xshmfence = dependency('xshmfence', version : '>= 1.1')
> >  endif
> > -- 
> > 2.14.3
> > 
> 
> I don't care about building against old versions of xcb either, but this is 
> very
> minimally intrusive so I don't have a problem with it,
> 
> Reviewed-by: Dylan Baker 

Oh, just this patch, I'm not qualified to review the first one :)

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] meson+dri3: allow building against older xcb (v3)

2018-03-14 Thread Dylan Baker

Quoting Rob Clark (2018-03-14 06:04:58)
> Similar to previous patch, make xcb 1.13 optional.
> 
> Signed-off-by: Rob Clark 
> ---
>  meson.build | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index c201644c372..0e2f73e67b6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1235,9 +1235,14 @@ if with_platform_x11
>  dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8')
>  
>  if with_dri3
> -  pre_args += ['-DHAVE_DRI3', '-DHAVE_DRI3_MODIFIERS']
> -  dep_xcb_dri3 = dependency('xcb-dri3', version : '>= 1.13')
> -  dep_xcb_present = dependency('xcb-present', version: '>= 1.13')
> +  pre_args += '-DHAVE_DRI3'
> +  dep_xcb_dri3 = dependency('xcb-dri3')
> +  dep_xcb_present = dependency('xcb-present')
> +  # until xcb-dri3 has been around long enough to make a hard-dependency:
> +  if (dep_xcb_dri3.version().version_compare('>= 1.13') and
> +  dep_xcb_present.version().version_compare('>= 1.13'))
> +pre_args += '-DHAVE_DRI3_MODIFIERS'
> +  endif
>dep_xcb_sync = dependency('xcb-sync')
>dep_xshmfence = dependency('xshmfence', version : '>= 1.1')
>  endif
> -- 
> 2.14.3
> 

I don't care about building against old versions of xcb either, but this is very
minimally intrusive so I don't have a problem with it,

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105510] Unable to build swrAVX@sha/rasterizer_core_threads.cpp

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105510

Clayton Craft  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #1 from Clayton Craft  ---
The offending commit was reverted.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] configure.ac: blacklist libdrm 2.4.90

2018-03-14 Thread Emil Velikov

On 14 March 2018 at 01:03, Marek Olšák  wrote:
> From: Marek Olšák 
>
> Cc: 18.0 17.3 17.2 
> ---
>  configure.ac | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/configure.ac b/configure.ac
> index 621dc32..e29ce68 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2601,20 +2601,27 @@ if test -n "$with_gallium_drivers"; then
>  radeon_llvm_check $LLVM_REQUIRED_R600 "r600"
>
>  llvm_add_component "asmparser" "r600"
>  llvm_add_component "bitreader" "r600"
>  fi
>  ;;
>  xradeonsi)
>  HAVE_GALLIUM_RADEONSI=yes
>  PKG_CHECK_MODULES([RADEON], [libdrm >= $LIBDRM_RADEON_REQUIRED 
> libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
>  PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED 
> libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
> +
> +# Blacklist libdrm_amdgpu 2.4.90 because it breaks older radeonsi
> +libdrm_version=`pkg-config libdrm_amdgpu --modversion`
> +if test "x$libdrm_version" = x2.4.90; then
> +AC_MSG_ERROR([radeonsi can't use libdrm 2.4.90 due to a 
> compatibility issue. Use a newer or older version.])
> +fi
> +
Please include a reference next to the check.
Pretty much anything will do - fd.o/other bug report, ML thread,
failing app, other.

With that the series is:
Reviewed-by: Emil Velikov 

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105396] tc compatible htile sets depth of htiles of discarded fragments to 1.0

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105396

--- Comment #2 from James Legg  ---
https://patchwork.freedesktop.org/patch/208935/ fixes it for me on my RX 480,
but I haven't had any reviews on that patch yet and I'm not sure if I'm heading
in the right direction. It would also be good to test this on other GPUs
including Vega.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] meson: radeonsi cannot be built with drm 2.4.89

2018-03-14 Thread Dylan Baker

Signed-off-by: Dylan Baker 

Cc: 18.0 17.3 17.2 
Cc: Emil Vilikov 
---

Emil, I don't know what the appropriate thing to do is here, this is the meson
equivalent of Marek's first patch, but this doesn't make any sense on master.

 meson.build | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 7ecd1fd67de..cb43c82c908 100644
--- a/meson.build
+++ b/meson.build
@@ -979,7 +979,9 @@ dep_libdrm_nouveau = []
 dep_libdrm_etnaviv = []
 dep_libdrm_freedreno = []
 if with_amd_vk or with_gallium_radeonsi
-  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.89')
+  dep_libdrm_amdgpu = dependency(
+'libdrm_amdgpu', version : ['>= 2.4.89', '!= 2.4.90']
+  )
 endif
 if (with_gallium_radeonsi or with_dri_r100 or with_dri_r200 or
 with_gallium_r300 or with_gallium_r600)
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] meson: require amdgpu >= 2.4.91

2018-03-14 Thread Dylan Baker

Signed-off-by: Dylan Baker 

---

Marek, can you either squash this into your other patch or push this along with
that?

 meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 51b470253f5..e21ac74a1e3 100644
--- a/meson.build
+++ b/meson.build
@@ -1036,7 +1036,7 @@ dep_libdrm_nouveau = []
 dep_libdrm_etnaviv = []
 dep_libdrm_freedreno = []
 if with_amd_vk or with_gallium_radeonsi
-  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.90')
+  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.91')
 endif
 if (with_gallium_radeonsi or with_dri_r100 or with_dri_r200 or
 with_gallium_r300 or with_gallium_r600)
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] autotools+dri3: allow building against older xcb (v4)

2018-03-14 Thread Emil Velikov

On 14 March 2018 at 14:51, Brian Paul  wrote:
> On 03/14/2018 08:40 AM, Rob Clark wrote:
>>
>> On Wed, Mar 14, 2018 at 10:29 AM, Emil Velikov 
>> wrote:
>>>
>>> On 14 March 2018 at 13:04, Rob Clark  wrote:

 From: Dave Airlie 

 I'm not sure everyone wants to be updating their dri3 in a forced
 march setting, this allows a nicer approach, esp when you want
 to build on distro that aren't brand new.

 I'm sure there are plenty of ways this patch could be cleaner,
 and I've also not built it against an updated dri3.

 For meson I've just left it alone, since if you are using meson
 you probably don't mind xcb updates, and if you are using meson
 you can fix this better than me.

 v3: just don't put a version in for dri3/present without
 modifiers, should allow building with 1.11 as well
 v4: small fix to meson build

 (feel free to supply meson followups)

>>> IIRC Matt seemed also on board with making the new xcb a hard
>>> requirement.
>>>
>>> I don't know the exact usecase he was thinking, yet gut feeling
>>> suggests that my earlier suggestion [1] should work for everyone -
>>> Dave, Matt, Marek, etc.
>>> If the concern is writing the code - I can help ;-)
>>>
>>
>> I care *significantly* less about the build against 1.12, run against
>> 1.13 case than I do about getting back to the point where I don't have
>> to carry around these patches to build mesa (otherwise it is just a
>> matter of time before I accidentally push them just because I needed
>> them to test whatever it was that I was intending to push ;-)
>>
>> If someone wants to build on top of this and make something more
>> fancy, by all means.  But I really would like to push something that
>> removes the 1.13 dependency like today(ish), whether that be this
>> patchset or reverting the patches that added the 1.13 dependency and
>> trying again later.
>
>
> FWIW, I'd like to see this sooner rather than later too.  I spent several
> hours yesterday trying to update our build script to build/install XCB 1.13
> on Fedora, Ubuntu, etc. without totally succeeding.
>
I could have save you some time, if the script is available somewhere ;-)

FTR the oibaf repo has the Ubuntu bits.. Up-to date distros like
Gentoo, Arch are fine as well - doubt they are your target audience,
though.

-Emil

[1] https://launchpad.net/~oibaf/+archive/ubuntu/graphics-drivers
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105510] Unable to build swrAVX@sha/rasterizer_core_threads.cpp

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105510

Bug ID: 105510
   Summary: Unable to build swrAVX@sha/rasterizer_core_threads.cpp
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Keywords: regression
  Severity: normal
  Priority: medium
 Component: Drivers/Gallium/swr
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: clayton.a.cr...@intel.com
QA Contact: mesa-dev@lists.freedesktop.org

I have bisected this failure to the following commit:

commit de0d10db93d85de79c7b4451c4851ace2976f8f4
Author: Apple SWE 
Date:   Tue Mar 13 18:24:26 2018 -0700

Add processor topology calculation implementation for Darwin/OSX targets.


Full output from build failure:

23:00:35 FAILED:
src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o 
23:00:35 ccache g++  -Isrc/gallium/drivers/swr/swrAVX@sha
-Isrc/gallium/drivers/swr -I../src/gallium/drivers/swr
-Isrc/gallium/drivers/swr/rasterizer -I../src/gallium/drivers/swr/rasterizer
-I../src/gallium/drivers/swr/rasterizer/archrast
-Isrc/gallium/drivers/swr/rasterizer/jitter
-I../src/gallium/drivers/swr/rasterizer/jitter
-Isrc/gallium/drivers/swr/rasterizer/core
-I../src/gallium/drivers/swr/rasterizer/core
-Isrc/gallium/drivers/swr/rasterizer/codegen
-I../src/gallium/drivers/swr/rasterizer/codegen
-Isrc/gallium/drivers/swr/rasterizer/core/backends -I/usr/lib/llvm-4.0/include
-fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch
-Wnon-virtual-dtor -std=c++11 -O2 -g '-DVERSION="18.1.0-devel"'
-DPACKAGE_VERSION=VERSION
'-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa";'
-DGLX_USE_TLS -DHAVE_ST_VDPAU -DENABLE_ST_OMX_BELLAGIO -DHAVE_X11_PLATFORM
-DGLX_INDIRECT_RENDERING -DGLX_DIRECT_RENDERING -DGLX_USE_DRM
-DHAVE_DRM_PLATFORM -DHAVE_SURFACELESS_PLATFORM -DENABLE_SHADER_CACHE
-DHAVE___BUILTIN_BSWAP32 -DHAVE___BUILTIN_BSWAP64 -DHAVE___BUILTIN_CLZ
-DHAVE___BUILTIN_CLZLL -DHAVE___BUILTIN_CTZ -DHAVE___BUILTIN_EXPECT
-DHAVE___BUILTIN_FFS -DHAVE___BUILTIN_FFSLL -DHAVE___BUILTIN_POPCOUNT
-DHAVE___BUILTIN_POPCOUNTLL -DHAVE___BUILTIN_UNREACHABLE
-DHAVE_FUNC_ATTRIBUTE_CONST -DHAVE_FUNC_ATTRIBUTE_FLATTEN
-DHAVE_FUNC_ATTRIBUTE_MALLOC -DHAVE_FUNC_ATTRIBUTE_PURE
-DHAVE_FUNC_ATTRIBUTE_UNUSED -DHAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT
-DHAVE_FUNC_ATTRIBUTE_WEAK -DHAVE_FUNC_ATTRIBUTE_FORMAT
-DHAVE_FUNC_ATTRIBUTE_PACKED -DHAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL
-DHAVE_FUNC_ATTRIBUTE_VISIBILITY -DHAVE_FUNC_ATTRIBUTE_ALIAS
-DHAVE_FUNC_ATTRIBUTE_NORETURN -DUSE_SSE41 -DUSE_GCC_ATOMIC_BUILTINS
-DUSE_X86_64_ASM -DMAJOR_IN_SYSMACROS -DHAVE_SYS_SYSCTL_H -DHAVE_LINUX_FUTEX_H
-DHAVE_STRTOF -DHAVE_MKOSTEMP -DHAVE_POSIX_MEMALIGN -DHAVE_TIMESPEC_GET
-DHAVE_MEMFD_CREATE -DHAVE_STRTOD_L -DHAVE_DLADDR -DHAVE_DL_ITERATE_PHDR
-DHAVE_LIBDRM -DHAVE_ZLIB -DHAVE_PTHREAD -DHAVE_LLVM=0x0400
-DMESA_LLVM_VERSION_PATCH=1 -DHAVE_WAYLAND_PLATFORM -DWL_HIDE_DEPRECATED
-DHAVE_DRI3 -Wall -fno-math-errno -fno-trapping-math -Wno-non-virtual-dtor
-fPIC -D__STDC_CONSTANT_MACROS -D_GNU_SOURCE -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS -pthread -fvisibility=hidden -fno-strict-aliasing -mavx
-DKNOB_ARCH=KNOB_ARCH_AVX -MD -MQ
'src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o' -MF
'src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o.d' -o
'src/gallium/drivers/swr/swrAVX@sha/rasterizer_core_threads.cpp.o' -c
../src/gallium/drivers/swr/rasterizer/core/threads.cpp
23:00:35 ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:268:18: error:
‘sysctlbyname’ was not declared in this scope
23:00:35  int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
23:00:35   ^~~~
23:00:35 ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:268:18: note:
suggested alternative: ‘ttyname’
23:00:35  int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
23:00:35   ^~~~
23:00:35   ttyname
23:00:35 In file included from
../src/gallium/drivers/swr/rasterizer/common/os.h:267:0,
23:00:35  from
../src/gallium/drivers/swr/rasterizer/core/threads.cpp:44:
23:00:35 ../src/gallium/drivers/swr/rasterizer/common/swr_assert.h:65:26:
error: expected unqualified-id before ‘do’
23:00:35  #define _SWR_MACRO_START do {
23:00:35   ^
23:00:35 ../src/gallium/drivers/swr/rasterizer/common/swr_assert.h:131:5: note:
in expansion of macro ‘_SWR_MACRO_START’
23:00:35  _SWR_MACRO_START \
23:00:35  ^~~~
23:00:35 ../src/gallium/drivers/swr/rasterizer/common/swr_assert.h:151:41:
note: in expansion of macro ‘_SWR_ASSERT’
23:00:35  #define SWR_ASSERT(e, ...)  _SWR_ASSERT(true, e,
##__VA_ARGS__)
23:00:35  ^~~
23:00:35 ../src/gallium/drivers/swr/rasterizer/core/threads.cpp:269:5: note: in
expansion of macro ‘SWR_ASSERT’
23:00:35  SWR_

Re: [Mesa-dev] [PATCH 2/2] fixup! dri3: allow building against older xcb (v3)

2018-03-14 Thread Dylan Baker

Quoting Dylan Baker (2018-03-13 19:45:37)
> Wrap it in parens and it can span multiple lines
> 
> On March 13, 2018 5:40:15 PM PDT, Rob Clark  wrote:
> 
> On Tue, Mar 13, 2018 at 7:47 PM, Rob Clark  wrote:
>  On Tue, Mar 13, 2018 at 7:27 PM, Rob Clark  
> wrote:
>  On Tue, Mar 13, 2018 at 7:10 PM, Dylan Baker 
>  wrote:
>  Quoting Rob Clark (2018-03-13 16:04:00)
>  ---
>  I'm a bit unsure about the xcb-present version 
> dependency, as that was
>  added in a different commit.  OTOH I guess Dave is 
> building vulkan with
>  his patch so it is perhaps not a built-time dependency.
> 
>   meson.build | 11 ---
>   1 file changed, 8 insertions(+), 3 deletions(-)
> 
>  diff --git a/meson.build b/meson.build
>  index c201644c372..30f1919e6f5 100644
>  --- a/meson.build
>  +++ b/meson.build
>  @@ -1235,9 +1235,14 @@ if with_platform_x11
>   dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 
> 1.8')
> 
>   if with_dri3
>  -  pre_args += ['-DHAVE_DRI3', 
> '-DHAVE_DRI3_MODIFIERS']
>  -  dep_xcb_dri3 = dependency('xcb-dri3', version : 
> '>= 1.13')
>  -  dep_xcb_present = dependency('xcb-present', 
> version: '>= 1.13')
>  +  pre_args += '-DHAVE_DRI3'
>  +  dep_xcb_dri3 = dependency('xcb-dri3')
>  +  dep_xcb_present = dependency('xcb-present')
>  +  # until xcb-dri3 has been around long enough to 
> make a hard-dependency:
>  +  dep_xcb_dri3_modifiers = dependency('xcb-dri3', 
> version : '>= 1.13', required : false)
>  +  if dep_xcb_dri3_modifiers.found()
> 
>  I think you could simplify this by doing:
> 
>   if dep_xcb_dri3.version().version_compare('>= 1.13')
> 
>  ahh, yeah, and I guess that will get rid of the confusing error 
> msg
>  about xcb-dri3 1.13 not found..
> 
> 
> 
>  Or should we be checking for xcb_dri3 and xcb_present >= 
> 1.13?
> 
>  I'm not entirely sure why we were checking for xcb-present >= 
> 1.13..
>  if that is actually a build time requirement then I think Dave's
>  initial patch needs some more ifdef..  (but otoh, if it was, I 
> guess
>  he would have noticed.)
> 
>  Anyways, I did a build w/ anv+radv enabled with xcb-present == 
> 1.12
>  (and xcb-dri3 1.12).. and 
> 61309c2a727d52d543207d6ae79fcb3e68b5cff3
>  looks like it just cares about >= 1.12 of both of those 
> (although not
>  sure if it is a compile time dependency).
> 
>  So *possibly* for both meson and autotools we should require 
> 1.12, and
>  optionally 1.13 for HAVE_DRI3_MODIFIERS?
> 
> 
>  so mystery solved, Dave #ifdef'd out the present dependencies too ;-)
> 
>  so this is what I end up with:
> 
>  @@ -1235,9 +1235,14 @@ if with_platform_x11
>   dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8')
> 
>   if with_dri3
>  -  pre_args += ['-DHAVE_DRI3', '-DHAVE_DRI3_MODIFIERS']
>  -  dep_xcb_dri3 = dependency('xcb-dri3', version : '>= 1.13')
>  -  dep_xcb_present = dependency('xcb-present', version: '>= 
> 1.13')
>  +  pre_args += '-DHAVE_DRI3'
>  +  dep_xcb_dri3 = dependency('xcb-dri3')
>  +  dep_xcb_present = dependency('xcb-present')
>  +  # until xcb-dri3 has been around long enough to make a 
> hard-dependency:
>  +  if dep_xcb_dri3.version().version_compare('>= 1.13') and
>  + dep_xcb_present.version().version_compare('>= 1.13')

Sorry, I was replying form mobile last night,

  if (dep_xcb_dri3.version().version_compare('>= 1.13') and
  dep_xcb_present.version().version_compare('>= 1.13'))

will work. Meson's recursive descent parser sometimes leaves something to be
desired.

> 
> hmm, annoyingly enough I found that on rawhide (meson 0.45.0), I seem
> to need the entire if statement on a single line, instead of split in
> two like this.
> 
> BR,
> -R
> 
>  +pre_args += '-DHAVE_DRI3_MODIFIERS'
>  +  endif
> dep_xcb_sync = dependency('xcb-sync')
> dep_xshmfence = dependency('xshmfence', version : '>= 1.1')
>   endif
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] Mesa release improvements - Feature and Stable releases

2018-03-14 Thread Emil Velikov

On 14 March 2018 at 11:20, Andres Gomez  wrote:
> Hi,
>
> On Mon, 2018-03-12 at 18:02 +, Emil Velikov wrote:
>> Hi Andres,
>>
>> On 12 March 2018 at 15:57, Andres Gomez  wrote:
>> >
>
> [...]
>
>> >
>> > 18.1 example:
>> >
>> >1. Create a Metabug for the 18.1 branch point.
>> >2. Announce the Metabug in mesa-dev and give 1 week (?) for developers
>> >   to complete their features. Advice to block the Metabug with other
>> >   feature bugs.
>> >3. Developers create bugs with the WIP features they want to include in
>> >   18.1 and block the Metabug.
>> >4. After 1 week, check the status
>> >* If there are no blockers, close the Metabug and create the 18.1
>> >   branch point.
>> >* If there are blockers; coordinate with the developers of the
>> >   blockers and decide whether to give a bit more of margin if the
>> >   feature is almost complete or just remove the blocking bugs
>> >   leaving the WIP features out, close the Metabug and create the
>> >   18.1 branch point.
>> >5. Release 18.1-0-rc1.
>> >6. Create a Metabug to track the status of the final 18.1.0 release.
>> >7. Block this Metabug with regressions found from 18.1.0-rcX.
>> >8. Once we reach stability, close the Metabug and announce the final
>> >   release of 18.1.0.
>> >
>>
>> I might sound a bit negative, yet I'm not sure what this brings us.
>> Can you please elaborate?
>>
>> The original goal is to have the time based releases, as opposed to
>> feature ones.
>> That was reiterated by developers not too long ago.
>
> Ugh!
>
> I had very similar comments from Juan, so I may have explained myself
> very badly ...
>
Guessing that I might have read more than what was said :-\

>> So far, there has been an announcement email 2-4 weeks before the
>> branch point, aiming to:
>>  - remind, and
>>  - seek feedback about required features
>>
>> The email was also followed by weekly ping/reminder.
>>
>> IIRC suggestions and requests that are made in timely fashion* have
>> always been accepted.
>> If we're adopt the above approach, this will:
>>  - lead to noticeable delays in the branch point, which combined with
>>  - the current delays getting the blocking bugs fixed. equals
>>  - even greater delays and less time based releases
>>
>> Furthermore I'm a bit worried that this might have negative impact on
>> developers:
>> I don't know any instances, yet some developers may put extra pressure
>> on themselves trying to get 'too many' features merged. Leading to
>> stress, burn out and others.
>>
>>
>> Perhaps we can somehow utilise your suggestion while ensuring that my
>> grim 'predictions' do not come true?
>
> My suggestion is not to change the paradigm (time based vs feature
> based releases) but rather to have better visibility of how the time
> based feature releases are done.
>
> In other words, I'm not expecting to delay the time of the branchpoint.
> I still believe we can have tiny flexibility for features that are just
> about to land. I also believe this is the current way we are working,
> isn't it?
>
> The proposal only intends to have a central point (a Metabug) in which
> to track the status of the branch point rather than just in several
> mails and in multiple pings which may happen by different ways (mail,
> IRC, ... ?).
>
> And the same for tracking the final release.
>
> WDYT? Is this too complicated or time consuming for the release manager
> at the given time? Do you think it would be useful?
>
Just double-checking:
I would suspect you're not suggesting removing the existing email/poke scheme?

Providing another means to devs to track/handle things is good IMHO.
Whether developers will like it is up-to them. Everyone, your input is
appreciated!


I'm slightly worried that it might cause extra confusion.
Some crude examples follow:
 - I don't use bugzilla/etc to track my feature work - most teams
 - Do I open another bug, or list my feature in the metabug - seeming
an ongoing theme with metabugs
 - Do I add the bug, reply to the email or both


-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions

2018-03-14 Thread Alejandro Piñeiro

On 14/03/18 16:08, Daniel Schürmann wrote:
>
> On 14.03.2018 16:03, Alejandro Piñeiro wrote:
>> On 14/03/18 15:55, Daniel Schürmann wrote:
>>> Not sure, if I'm asked here :)
>>> As AMD_gcn_shader seems to be the only extension without new
>>> capability,
>>> I am fine with just handling it as if.
>> Well, I was exactly asking this, if everybody involved is fine with
>> this. Bonus points to get a review to this patch.
>>
>>> Additionally, we might want to rename it to gcn_shader to be consistent
>>> (or add the vendor names to all capabilities).
>> Makes sense.
>>
>>> Do you want to introduce one field per capability or have some
>>> capabilities merged (like now)?
>> Which capabilities are merged?
> storage_16bit: SpvCapabilityStorageUniformBufferBlock16,
> SpvCapabilityStorageUniform16, SpvCapabilityStoragePushConstant16,
> SpvCapabilityStorageInputOutput16
> variable_pointers: SpvCapabilityVariablePointersStorageBuffer,
> SpvCapabilityVariablePointers
> subgroup_arithmetic: SpvCapabilityGroupNonUniformArithmetic,
> SpvCapabilityGroupNonUniformClustered
> subgroup_shuffle: SpvCapabilityGroupNonUniformShuffle,
> SpvCapabilityGroupNonUniformShuffleRelative
> tessellation: SpvCapabilityTessellation,
> SpvCapabilityTessellationPointSize

Oh true. Thanks for the detailed list. So now replying to your question:
I think that it would be better to keep capabilities merged. Mostly
because it is working right now, and I don't see any big advantage to
start to split it, unless we want start to fine-grain spirv_to_nir
support for each capability defined at each extension, and that seems a
little overkill.

>>>
>>> On 11.03.2018 16:25, Alejandro Piñeiro wrote:
 FWIW, this is the patch that Im more interested to get a review. It is
 also the one that probably would need some discussion. Fortunately
 this
 one can be reviewed independently of the rest of the patches, so the
 others can wait a little. Getting this into would make the rebase of
 this series more easy.

 So: ping (please)


 On 08/03/18 16:00, Alejandro Piñeiro wrote:
> So now, during spirv_to_nir, it uses the capability instead of the
> extension. Note that we are really doing here is treating
> SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader
> is not the first SPV extension supported. For example, the capability
> draw_parameters infers if the extension
> SPV_KHR_shader_draw_parameters
> is supported or not.
>
> This could be seen as counter-intuitive, and that it would be easier
> to define which extensions are supported, and based our checks on
> that, but we need to take into account that some capabilities are
> optional from core, and others came from new extensions.
>
> Also this commit would make the implementation of
> ARB_spirv_extensions
> easier.
> ---
>
> Note that I'm aware that this can be somewhat confusing at first. But
> most of the SPV extensions defines a new capability, so it makes
> sense
> to add one, and compute the other based on that. As I mention on a
> different patch on this series, it was easier to compute extensions
> from capabilities, instead of the other way around, because core
> SPIR-V defines optional capabilities without the need of an
> extension.
>
> Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't
> define a new capability (the first one I see that doesn't do
> that), so
> I'm somewhat forcing that here.
>
>
>    src/amd/vulkan/radv_shader.c  | 2 --
>    src/compiler/shader_info.h    | 4 
>    src/compiler/spirv/nir_spirv.h    | 1 -
>    src/compiler/spirv/spirv_to_nir.c | 2 +-
>    4 files changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_shader.c
> b/src/amd/vulkan/radv_shader.c
> index 85672e600d7..46017290654 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device
> *device,
>    .multiview = true,
>    .subgroup_basic = true,
>    .variable_pointers = true,
> -    },
> -    .exts = {
>    .AMD_gcn_shader = true,
>    },
>    };
> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
> index b1e200070f7..502b7901370 100644
> --- a/src/compiler/shader_info.h
> +++ b/src/compiler/shader_info.h
> @@ -51,10 +51,6 @@ struct spirv_supported_capabilities {
>   bool subgroup_quad;
>   bool subgroup_shuffle;
>   bool subgroup_vote;
> -};
> -
> -/* The supported extensions which add extended instructions */
> -struct spirv_supported_extensions {
>   bool AMD_gcn_shader;
>    };
>    diff --git a/src/compiler/sp

Re: [Mesa-dev] soft fp64 support - main body (glsl/gallium)

2018-03-14 Thread Elie Tournier

On Tue, Mar 13, 2018 at 04:54:27PM -0700, Matt Turner wrote:
> On Mon, Mar 12, 2018 at 9:24 PM, Dave Airlie  wrote:
> > This is the main code for the soft fp64 work. It's mostly Elie's
> > code with a bunch of changes by me.
> >
> > This patchset has all the glsl lowering code. (using float64.glsl,
> > yes I know checked in files are bad, but not bad enough for anyone
> > to have solved int64.glsl yet, so we have a precedent).
> 
Hi Matt

> Have you thought about making a NIR backend for R600?
> 
> Elie sent patches for lowering fp64 operations in NIR, and it's what
> I'm going to start from when I do the analogous project for some
> future Intel hardware. It's sad to duplicate all of this code, much
> less all of this effort.
Let me know when you start this project, I will be happy to help.

Some (bad) idea here. Can we wire the GLSL IR version for your hardware?
Once the NIR version finished, we just remove it.

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir/vtn: fix OpConvertXToY

2018-03-14 Thread Jason Ekstrand

Samuel sent out a patch to fix this already which I've reviewed.  Feel free
to merge it.  I gave him comments on the second so maybe best to leave that
one be.

That said, I do sort-of like your macro...

On Wed, Mar 14, 2018 at 8:08 AM, Rob Clark  wrote:

> These opcodes don't care about src/dst type, only src/dst size.
>
> Signed-off-by: Rob Clark 
> ---
>  src/compiler/spirv/vtn_alu.c | 14 ++
>  1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
> index f0b69b38f83..1ca1f951200 100644
> --- a/src/compiler/spirv/vtn_alu.c
> +++ b/src/compiler/spirv/vtn_alu.c
> @@ -349,11 +349,17 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder
> *b,
>
> /* Conversions: */
> case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
> +
> +#define nir_type_cast(type, basetype) (nir_alu_type_get_type_size(type)
> | nir_type_ ## basetype)
> +#define typed_conversion_op(src, srctype, dst, dsttype) \
> +  nir_type_conversion_op(nir_type_cast(src, srctype),
> nir_type_cast(dst, dsttype), nir_rounding_mode_undef)
> +
> +   case SpvOpConvertFToU:  return typed_conversion_op(src, float, dst,
> uint);
> +   case SpvOpConvertFToS:  return typed_conversion_op(src, float, dst,
>  int);
> +   case SpvOpConvertSToF:  return typed_conversion_op(src,   int, dst,
> float);
> +   case SpvOpConvertUToF:  return typed_conversion_op(src,  uint, dst,
> float);
> +
> case SpvOpUConvert:
> -   case SpvOpConvertFToU:
> -   case SpvOpConvertFToS:
> -   case SpvOpConvertSToF:
> -   case SpvOpConvertUToF:
> case SpvOpSConvert:
> case SpvOpFConvert:
>return nir_type_conversion_op(src, dst, nir_rounding_mode_undef);
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nir/vtn: fix OpConvertXToY

2018-03-14 Thread Rob Clark

These opcodes don't care about src/dst type, only src/dst size.

Signed-off-by: Rob Clark 
---
 src/compiler/spirv/vtn_alu.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index f0b69b38f83..1ca1f951200 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -349,11 +349,17 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
 
/* Conversions: */
case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
+
+#define nir_type_cast(type, basetype) (nir_alu_type_get_type_size(type) | 
nir_type_ ## basetype)
+#define typed_conversion_op(src, srctype, dst, dsttype) \
+  nir_type_conversion_op(nir_type_cast(src, srctype), nir_type_cast(dst, 
dsttype), nir_rounding_mode_undef)
+
+   case SpvOpConvertFToU:  return typed_conversion_op(src, float, dst,  uint);
+   case SpvOpConvertFToS:  return typed_conversion_op(src, float, dst,   int);
+   case SpvOpConvertSToF:  return typed_conversion_op(src,   int, dst, float);
+   case SpvOpConvertUToF:  return typed_conversion_op(src,  uint, dst, float);
+
case SpvOpUConvert:
-   case SpvOpConvertFToU:
-   case SpvOpConvertFToS:
-   case SpvOpConvertSToF:
-   case SpvOpConvertUToF:
case SpvOpSConvert:
case SpvOpFConvert:
   return nir_type_conversion_op(src, dst, nir_rounding_mode_undef);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions

2018-03-14 Thread Daniel Schürmann



On 14.03.2018 16:03, Alejandro Piñeiro wrote:

On 14/03/18 15:55, Daniel Schürmann wrote:

Not sure, if I'm asked here :)
As AMD_gcn_shader seems to be the only extension without new capability,
I am fine with just handling it as if.

Well, I was exactly asking this, if everybody involved is fine with
this. Bonus points to get a review to this patch.


Additionally, we might want to rename it to gcn_shader to be consistent
(or add the vendor names to all capabilities).

Makes sense.


Do you want to introduce one field per capability or have some
capabilities merged (like now)?

Which capabilities are merged?
storage_16bit: SpvCapabilityStorageUniformBufferBlock16, 
SpvCapabilityStorageUniform16, SpvCapabilityStoragePushConstant16, 
SpvCapabilityStorageInputOutput16
variable_pointers: SpvCapabilityVariablePointersStorageBuffer, 
SpvCapabilityVariablePointers
subgroup_arithmetic: SpvCapabilityGroupNonUniformArithmetic, 
SpvCapabilityGroupNonUniformClustered
subgroup_shuffle: SpvCapabilityGroupNonUniformShuffle, 
SpvCapabilityGroupNonUniformShuffleRelative

tessellation: SpvCapabilityTessellation, SpvCapabilityTessellationPointSize


On 11.03.2018 16:25, Alejandro Piñeiro wrote:

FWIW, this is the patch that Im more interested to get a review. It is
also the one that probably would need some discussion. Fortunately this
one can be reviewed independently of the rest of the patches, so the
others can wait a little. Getting this into would make the rebase of
this series more easy.

So: ping (please)


On 08/03/18 16:00, Alejandro Piñeiro wrote:

So now, during spirv_to_nir, it uses the capability instead of the
extension. Note that we are really doing here is treating
SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader
is not the first SPV extension supported. For example, the capability
draw_parameters infers if the extension SPV_KHR_shader_draw_parameters
is supported or not.

This could be seen as counter-intuitive, and that it would be easier
to define which extensions are supported, and based our checks on
that, but we need to take into account that some capabilities are
optional from core, and others came from new extensions.

Also this commit would make the implementation of ARB_spirv_extensions
easier.
---

Note that I'm aware that this can be somewhat confusing at first. But
most of the SPV extensions defines a new capability, so it makes sense
to add one, and compute the other based on that. As I mention on a
different patch on this series, it was easier to compute extensions
from capabilities, instead of the other way around, because core
SPIR-V defines optional capabilities without the need of an extension.

Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't
define a new capability (the first one I see that doesn't do that), so
I'm somewhat forcing that here.


   src/amd/vulkan/radv_shader.c  | 2 --
   src/compiler/shader_info.h    | 4 
   src/compiler/spirv/nir_spirv.h    | 1 -
   src/compiler/spirv/spirv_to_nir.c | 2 +-
   4 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 85672e600d7..46017290654 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
   .multiview = true,
   .subgroup_basic = true,
   .variable_pointers = true,
-    },
-    .exts = {
   .AMD_gcn_shader = true,
   },
   };
diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index b1e200070f7..502b7901370 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -51,10 +51,6 @@ struct spirv_supported_capabilities {
  bool subgroup_quad;
  bool subgroup_shuffle;
  bool subgroup_vote;
-};
-
-/* The supported extensions which add extended instructions */
-struct spirv_supported_extensions {
  bool AMD_gcn_shader;
   };
   diff --git a/src/compiler/spirv/nir_spirv.h
b/src/compiler/spirv/nir_spirv.h
index 87d4120c380..d2766abb7f9 100644
--- a/src/compiler/spirv/nir_spirv.h
+++ b/src/compiler/spirv/nir_spirv.h
@@ -60,7 +60,6 @@ struct spirv_to_nir_options {
  bool lower_workgroup_access_to_offsets;
    struct spirv_supported_capabilities caps;
-   struct spirv_supported_extensions exts;
    struct {
     void (*func)(void *private_data,
diff --git a/src/compiler/spirv/spirv_to_nir.c
b/src/compiler/spirv/spirv_to_nir.c
index 66b87c049bb..6aa4a4d6b6f 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -374,7 +374,7 @@ vtn_handle_extension(struct vtn_builder *b,
SpvOp opcode,
     if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
    val->ext_handler = vtn_handle_glsl450_instruction;
     } else if ((strcmp((const char *)&w[2],
"SPV_AMD_gcn_shader") == 0)
-    && (b->options && b->options

Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions

2018-03-14 Thread Alejandro Piñeiro

On 14/03/18 15:55, Daniel Schürmann wrote:
> Not sure, if I'm asked here :)
> As AMD_gcn_shader seems to be the only extension without new capability,
> I am fine with just handling it as if.

Well, I was exactly asking this, if everybody involved is fine with
this. Bonus points to get a review to this patch.

>
> Additionally, we might want to rename it to gcn_shader to be consistent
> (or add the vendor names to all capabilities).

Makes sense.

>
> Do you want to introduce one field per capability or have some
> capabilities merged (like now)?

Which capabilities are merged?

>
>
> On 11.03.2018 16:25, Alejandro Piñeiro wrote:
>> FWIW, this is the patch that Im more interested to get a review. It is
>> also the one that probably would need some discussion. Fortunately this
>> one can be reviewed independently of the rest of the patches, so the
>> others can wait a little. Getting this into would make the rebase of
>> this series more easy.
>>
>> So: ping (please)
>>
>>
>> On 08/03/18 16:00, Alejandro Piñeiro wrote:
>>> So now, during spirv_to_nir, it uses the capability instead of the
>>> extension. Note that we are really doing here is treating
>>> SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader
>>> is not the first SPV extension supported. For example, the capability
>>> draw_parameters infers if the extension SPV_KHR_shader_draw_parameters
>>> is supported or not.
>>>
>>> This could be seen as counter-intuitive, and that it would be easier
>>> to define which extensions are supported, and based our checks on
>>> that, but we need to take into account that some capabilities are
>>> optional from core, and others came from new extensions.
>>>
>>> Also this commit would make the implementation of ARB_spirv_extensions
>>> easier.
>>> ---
>>>
>>> Note that I'm aware that this can be somewhat confusing at first. But
>>> most of the SPV extensions defines a new capability, so it makes sense
>>> to add one, and compute the other based on that. As I mention on a
>>> different patch on this series, it was easier to compute extensions
>>> from capabilities, instead of the other way around, because core
>>> SPIR-V defines optional capabilities without the need of an extension.
>>>
>>> Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't
>>> define a new capability (the first one I see that doesn't do that), so
>>> I'm somewhat forcing that here.
>>>
>>>
>>>   src/amd/vulkan/radv_shader.c  | 2 --
>>>   src/compiler/shader_info.h    | 4 
>>>   src/compiler/spirv/nir_spirv.h    | 1 -
>>>   src/compiler/spirv/spirv_to_nir.c | 2 +-
>>>   4 files changed, 1 insertion(+), 8 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_shader.c
>>> b/src/amd/vulkan/radv_shader.c
>>> index 85672e600d7..46017290654 100644
>>> --- a/src/amd/vulkan/radv_shader.c
>>> +++ b/src/amd/vulkan/radv_shader.c
>>> @@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>>   .multiview = true,
>>>   .subgroup_basic = true,
>>>   .variable_pointers = true,
>>> -    },
>>> -    .exts = {
>>>   .AMD_gcn_shader = true,
>>>   },
>>>   };
>>> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
>>> index b1e200070f7..502b7901370 100644
>>> --- a/src/compiler/shader_info.h
>>> +++ b/src/compiler/shader_info.h
>>> @@ -51,10 +51,6 @@ struct spirv_supported_capabilities {
>>>  bool subgroup_quad;
>>>  bool subgroup_shuffle;
>>>  bool subgroup_vote;
>>> -};
>>> -
>>> -/* The supported extensions which add extended instructions */
>>> -struct spirv_supported_extensions {
>>>  bool AMD_gcn_shader;
>>>   };
>>>   diff --git a/src/compiler/spirv/nir_spirv.h
>>> b/src/compiler/spirv/nir_spirv.h
>>> index 87d4120c380..d2766abb7f9 100644
>>> --- a/src/compiler/spirv/nir_spirv.h
>>> +++ b/src/compiler/spirv/nir_spirv.h
>>> @@ -60,7 +60,6 @@ struct spirv_to_nir_options {
>>>  bool lower_workgroup_access_to_offsets;
>>>    struct spirv_supported_capabilities caps;
>>> -   struct spirv_supported_extensions exts;
>>>    struct {
>>>     void (*func)(void *private_data,
>>> diff --git a/src/compiler/spirv/spirv_to_nir.c
>>> b/src/compiler/spirv/spirv_to_nir.c
>>> index 66b87c049bb..6aa4a4d6b6f 100644
>>> --- a/src/compiler/spirv/spirv_to_nir.c
>>> +++ b/src/compiler/spirv/spirv_to_nir.c
>>> @@ -374,7 +374,7 @@ vtn_handle_extension(struct vtn_builder *b,
>>> SpvOp opcode,
>>>     if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
>>>    val->ext_handler = vtn_handle_glsl450_instruction;
>>>     } else if ((strcmp((const char *)&w[2],
>>> "SPV_AMD_gcn_shader") == 0)
>>> -    && (b->options && b->options->exts.AMD_gcn_shader)) {
>>> +    && (b->options && b->options->caps.AMD_gcn_shader)) {
>>>    val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
>>>     } else {
>>>    vtn_fail("Un

[Mesa-dev] [Bug 105506] Vulkan MSAA is broken on SI

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105506

--- Comment #3 from Turo Lamminen  ---
Created attachment 138108
  --> https://bugs.freedesktop.org/attachment.cgi?id=138108&action=edit
vktrace trace

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [vulkan][intel] SIGBUS, Bus error during command buffer recording

2018-03-14 Thread Vyacheslav Gonakhchyan

Hi!

When recording command buffer I get error:

Program received signal SIGBUS, Bus error.

anv_state_stream_alloc (stream=stream@entry=0x9dbf9dd8, size=64,
alignment=alignment@entry=32) at vulkan/anv_allocator.c:913

913   VG_NOACCESS_WRITE(&sb->block, stream->block);
0  in anv_state_stream_alloc of vulkan/anv_allocator.c:913
1  in anv_cmd_buffer_alloc_dynamic_state of vulkan/anv_batch_chain.c:654
2  in anv_cmd_buffer_push_constants of vulkan/anv_cmd_buffer.c:729
3  in cmd_buffer_flush_push_constants of vulkan/genX_cmd_buffer.c:2420
4  in gen9_cmd_buffer_flush_state of vulkan/genX_cmd_buffer.c:2571
5  in gen9_CmdDrawIndexed of vulkan/genX_cmd_buffer.c:2709
6  in ?? of /usr/lib/libVkLayer_core_validation.so
7  in ?? of /usr/lib/libVkLayer_parameter_validation.so
8  in ?? of /usr/lib/libVkLayer_threading.so
9  in vkcmd_create_secondary_command_buffer of vkcmd.c:207
10 in vkcmd_create_secondary_command_buffer_for_inst of vkcmd.c:88
11 in scn_load_scene of scene.c:407
12 in create_scene of main.c:903
13 in main of main.c:583

I enabled validation layers and everything is fine (no output). This
happens for push constants. I use 80 bytes size. I have 128 bytes on my
system. I send the same range for both stages. The function that records
buffer: https://pastebin.com/vN2WjA1W
I use Intel Corporation HD Graphics 630. I increased dedicated memory to
1024MB and nothing changed. I also tried to reduce push constant size to 64
(send only matrix) and it did not help. What could be the reason?

Thanks,
Vyacheslav
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] Mesa 17.3.x release problems and process improvements

2018-03-14 Thread Andres Gomez

On Mon, 2018-03-12 at 15:48 +, Emil Velikov wrote:
> On 12 March 2018 at 14:20, Andres Gomez  wrote:

[...]

> > On Tue, 2018-03-06 at 19:34 +, Emil Velikov wrote:
> > 
> > [...]
> > 
> > > A few other ideas that were also came to mind:
> > > 
> > >  - Round robin - where me/Igalia team will check for outstanding
> > >patches, backports, etc.
> > 
> > I'm open to this. So far Juan and I have been doing this task while
> > being on relase duty but maybe it is better to explictly agree among us
> > (on a specific policy/shift rotation).
> > 
> 
> If there's an agreement to have a the per-team maintainer, this won't
> be needed... I think.
> 
> In the meanwhile, do share how you envision this?

Maybe I'm not understanding your proposal and you have something else
in mind but, as I see it, during the 2 weeks before a bugfix release
happens, this is what I was doing at the beginning of my working day:

 * Check the new landed patches. Identify the ones tagged for the
   stable branch and cross check them with the threads in the -stable
   ML.
 * Apply the nominated patches and let Travis-CI check they were not
   breaking the stable queue.
 * If any nominated patch was breaking Travis-CI or not applying into
   the stable queue (with a trivial conflict resolution), ping the
   author to ask for a backport, or clarification.
 * From the list of landed patches, identify non nominated ones that
   look like they should get into the stable branch. I did this is a
   loose more relaxed way.
 * Check in the -stable ML for stagnated threads and poke the authors,
   if needed. I did this more often when getting closer to the release
   date.
 * Nightly we (Igalia) have our own custom automation to run piglit and
   VK-GL-CTS with i965 and the software drivers in search of
   regressions in the stable queue.

> > >  - Have two distinct emails - an announcement and a second RFC that
> > >lists the rejected patches and ones with outstanding backports
> > 
> > I don't think this would be really necessary, specially if we adopt
> > GitLab.
> > 
> 
> The idea is what to do, until we adopt it or any other solution. Would
> the split help people?

To be honest, so far we keep a review system based on a mailing list, I
think the -stable one suffices, without needing a new one. I'm not
opposing, though.

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/6] spirv/radv: add AMD_gcn_shader capability, remove current extensions

2018-03-14 Thread Daniel Schürmann


Not sure, if I'm asked here :)
As AMD_gcn_shader seems to be the only extension without new capability,
I am fine with just handling it as if.

Additionally, we might want to rename it to gcn_shader to be consistent
(or add the vendor names to all capabilities).

Do you want to introduce one field per capability or have some 
capabilities merged (like now)?



On 11.03.2018 16:25, Alejandro Piñeiro wrote:

FWIW, this is the patch that Im more interested to get a review. It is
also the one that probably would need some discussion. Fortunately this
one can be reviewed independently of the rest of the patches, so the
others can wait a little. Getting this into would make the rebase of
this series more easy.

So: ping (please)


On 08/03/18 16:00, Alejandro Piñeiro wrote:

So now, during spirv_to_nir, it uses the capability instead of the
extension. Note that we are really doing here is treating
SPV_AMD_gcn_shader as other supported extensions. SPV_AMD_gcn_shader
is not the first SPV extension supported. For example, the capability
draw_parameters infers if the extension SPV_KHR_shader_draw_parameters
is supported or not.

This could be seen as counter-intuitive, and that it would be easier
to define which extensions are supported, and based our checks on
that, but we need to take into account that some capabilities are
optional from core, and others came from new extensions.

Also this commit would make the implementation of ARB_spirv_extensions
easier.
---

Note that I'm aware that this can be somewhat confusing at first. But
most of the SPV extensions defines a new capability, so it makes sense
to add one, and compute the other based on that. As I mention on a
different patch on this series, it was easier to compute extensions
from capabilities, instead of the other way around, because core
SPIR-V defines optional capabilities without the need of an extension.

Having said so, I have read the SPV_AMD_gcn_shader, and it doesn't
define a new capability (the first one I see that doesn't do that), so
I'm somewhat forcing that here.


  src/amd/vulkan/radv_shader.c  | 2 --
  src/compiler/shader_info.h| 4 
  src/compiler/spirv/nir_spirv.h| 1 -
  src/compiler/spirv/spirv_to_nir.c | 2 +-
  4 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 85672e600d7..46017290654 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -214,8 +214,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
.multiview = true,
.subgroup_basic = true,
.variable_pointers = true,
-   },
-   .exts = {
.AMD_gcn_shader = true,
},
};
diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index b1e200070f7..502b7901370 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -51,10 +51,6 @@ struct spirv_supported_capabilities {
 bool subgroup_quad;
 bool subgroup_shuffle;
 bool subgroup_vote;
-};
-
-/* The supported extensions which add extended instructions */
-struct spirv_supported_extensions {
 bool AMD_gcn_shader;
  };
  
diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h

index 87d4120c380..d2766abb7f9 100644
--- a/src/compiler/spirv/nir_spirv.h
+++ b/src/compiler/spirv/nir_spirv.h
@@ -60,7 +60,6 @@ struct spirv_to_nir_options {
 bool lower_workgroup_access_to_offsets;
  
 struct spirv_supported_capabilities caps;

-   struct spirv_supported_extensions exts;
  
 struct {

void (*func)(void *private_data,
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 66b87c049bb..6aa4a4d6b6f 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -374,7 +374,7 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
   val->ext_handler = vtn_handle_glsl450_instruction;
} else if ((strcmp((const char *)&w[2], "SPV_AMD_gcn_shader") == 0)
-&& (b->options && b->options->exts.AMD_gcn_shader)) {
+&& (b->options && b->options->caps.AMD_gcn_shader)) {
   val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
} else {
   vtn_fail("Unsupported extension");


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 105507] Crash when destroying a newly resized EGLsurface with wayland egl (dri2)

2018-03-14 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=105507

Bug ID: 105507
   Summary: Crash when destroying a newly resized EGLsurface with
wayland egl (dri2)
   Product: Mesa
   Version: 17.3
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: EGL/Wayland
  Assignee: wayland-b...@lists.freedesktop.org
  Reporter: johan.hels...@qt.io
QA Contact: mesa-dev@lists.freedesktop.org

In dri2_wl_surface_release_buffers, a wl_buffer is not destroyed if it's
locked. Afterwards it's set to null regardless
(dri2_surf->color_buffers[i].wl_buffer = NULL;)

Normally, this is fine, since the buffer will be released by the
wl_buffer_release event when the compositor is done with it. But if the
EGLSurface is destroyed first, then the event queue for the surface (and for
the wl_buffer) is destroyed, and the wl_release event then causes a crash
because we try to use a destroyed event queue.

One solution would be to maintain a separate list of buffers we tried to
destroy, but couldn't because they were locked. And make sure they are
destroyed in dri2_wl_destroy_surface.

This might not be a problem users frequently run into, but it's causing many
unit tests in Qt to be flaky, and we probably have to blacklist them until this
is fixed (https://bugreports.qt.io/browse/QTBUG-66848)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] spirv: update arguments for vtn_nir_alu_op_for_spirv_opcode()

2018-03-14 Thread Jason Ekstrand

On March 14, 2018 03:42:04 Samuel Iglesias Gonsálvez  
wrote:



We don't need anymore the source and destination's data type, just
their bitsize.

Signed-off-by: Samuel Iglesias Gonsálvez 
---
 src/compiler/spirv/spirv_to_nir.c |  4 ++--
 src/compiler/spirv/vtn_alu.c  | 30 ++
 src/compiler/spirv/vtn_private.h  |  2 +-
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c

index 3de45c47371..516fce1ecec 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1703,8 +1703,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
  };

  nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
- src_alu_type,
- dst_alu_type);
+ 
nir_alu_type_get_type_size(src_alu_type),
+ 
nir_alu_type_get_type_size(dst_alu_type));

  nir_const_value src[4];

  for (unsigned i = 0; i < count - 4; i++) {
diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index 15158b39520..3720a9484ff 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -275,7 +275,7 @@ vtn_handle_bitcast(struct vtn_builder *b, struct 
vtn_ssa_value *dest,

 nir_op
 vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
 SpvOp opcode, bool *swap,
-nir_alu_type src, nir_alu_type dst)
+unsigned src_bit_size, unsigned dst_bit_size)
 {
/* Indicates that the first two arguments should be swapped.  This is
 * used for implementing greater-than and less-than-or-equal.
@@ -388,8 +388,8 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
   default:
  unreachable("Invalid opcode");
   }
-  src_type |= nir_alu_type_get_type_size(src);
-  dst_type |= nir_alu_type_get_type_size(dst);
+  src_type |= src_bit_size;
+  dst_type |= dst_bit_size;
   return nir_type_conversion_op(src_type, dst_type, 
nir_rounding_mode_undef);
}
/* Derivatives: */
@@ -575,10 +575,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
case SpvOpFUnordLessThanEqual:
case SpvOpFUnordGreaterThanEqual: {
   bool swap;
-  nir_alu_type src_alu_type = 
nir_get_nir_type_for_glsl_type(vtn_src[0]->type);

-  nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
+  unsigned src_bit_size =
+ 
nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(vtn_src[0]->type));

+  unsigned dst_bit_size =
+ nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(type));


Just use glsl_type_get_bit_size


   nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
-  src_alu_type, dst_alu_type);
+  src_bit_size, dst_bit_size);

   if (swap) {
  nir_ssa_def *tmp = src[0];
@@ -602,10 +604,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
case SpvOpFOrdLessThanEqual:
case SpvOpFOrdGreaterThanEqual: {
   bool swap;
-  nir_alu_type src_alu_type = 
nir_get_nir_type_for_glsl_type(vtn_src[0]->type);

-  nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
+  unsigned src_bit_size =
+ 
nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(vtn_src[0]->type));

+  unsigned dst_bit_size =
+ nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(type));


Same here


   nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
-  src_alu_type, dst_alu_type);
+  src_bit_size, dst_bit_size);

   if (swap) {
  nir_ssa_def *tmp = src[0];
@@ -640,10 +644,12 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,

default: {
   bool swap;
-  nir_alu_type src_alu_type = 
nir_get_nir_type_for_glsl_type(vtn_src[0]->type);

-  nir_alu_type dst_alu_type = nir_get_nir_type_for_glsl_type(type);
+  unsigned src_bit_size =
+ 
nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(vtn_src[0]->type));

+  unsigned dst_bit_size =
+ nir_alu_type_get_type_size(nir_get_nir_type_for_glsl_type(type));


And here

With those changes made, rb.


   nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
-  src_alu_type, dst_alu_type);
+  src_bit_size, dst_bit_size);

   if (swap) {
  nir_ssa_def *tmp = src[0];
diff --git a/src/compiler/spirv/vtn_private.h 
b/src/compiler/spirv/vtn_private.h

index a8fa612384f..70f660fbd48 100644
--- a/src/compiler/

[Mesa-dev] [PATCH 3/3] RFC: nir+vtn: vec8+vec16 support

2018-03-14 Thread Rob Clark

This introduces new vec8 and vec16 instructions (which are the only
instructions taking more than 4 sources), in order to construct 8 and 16
component vectors.

nir_build_alu() is a bit ugly.. perhaps re-work to take an array of
src's?  Current approach should be something the compiler could inline
and optimize reasonably well, but I guess it could do equally well with
an array of sources instead?

I possibly missed some spots.  But probably the best way to track those
down is get farther through OpenCL CTS.  I don't expect this should
cause any issues with vulkan or gl, and I guess it should be fine to fix
cl issues as we go.
---
 src/compiler/glsl/glsl_to_nir.cpp |  5 ++-
 src/compiler/nir/nir.h| 30 +++---
 src/compiler/nir/nir_builder.h| 49 ---
 src/compiler/nir/nir_builder_opcodes_h.py |  2 +-
 src/compiler/nir/nir_constant_expressions.py  | 33 +--
 src/compiler/nir/nir_lower_alu_to_scalar.c| 13 --
 src/compiler/nir/nir_lower_io_to_scalar.c |  4 +-
 src/compiler/nir/nir_lower_load_const_to_scalar.c |  2 +-
 src/compiler/nir/nir_opcodes.py   | 39 +-
 src/compiler/nir/nir_print.c  | 19 ++---
 src/compiler/nir/nir_validate.c   |  4 +-
 src/compiler/spirv/spirv_to_nir.c |  6 ++-
 src/compiler/spirv/vtn_alu.c  | 32 +++
 src/compiler/spirv/vtn_glsl450.c  |  5 ++-
 14 files changed, 190 insertions(+), 53 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 80eb15f1ab1..c6c7b094794 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1588,7 +1588,10 @@ nir_visitor::visit(ir_expression *ir)
   nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type);
   result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type,
  nir_rounding_mode_undef),
- srcs[0], NULL, NULL, NULL);
+ srcs[0], NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL);
   /* b2i and b2f don't have fixed bit-size versions so the builder will
* just assume 32 and we have to fix it up here.
*/
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 6a51b7c4ab1..8e5b3a493e0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -118,16 +118,16 @@ typedef enum {
 } nir_rounding_mode;
 
 typedef union {
-   float f32[4];
-   double f64[4];
-   int8_t i8[4];
-   uint8_t u8[4];
-   int16_t i16[4];
-   uint16_t u16[4];
-   int32_t i32[4];
-   uint32_t u32[4];
-   int64_t i64[4];
-   uint64_t u64[4];
+   float f32[16];
+   double f64[16];
+   int8_t i8[16];
+   uint8_t u8[16];
+   int16_t i16[16];
+   uint16_t u16[16];
+   int32_t i32[16];
+   uint32_t u32[16];
+   int64_t i64[16];
+   uint64_t u64[16];
 } nir_const_value;
 
 typedef struct nir_constant {
@@ -138,7 +138,7 @@ typedef struct nir_constant {
 * by the type associated with the \c nir_variable.  Constants may be
 * scalars, vectors, or matrices.
 */
-   nir_const_value values[4];
+   nir_const_value values[16];
 
/* we could get this from the var->type but makes clone *much* easier to
 * not have to care about the type.
@@ -663,7 +663,7 @@ typedef struct {
 * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
 * a swizzle of {2, x, 1, 0} where x means "don't care."
 */
-   uint8_t swizzle[4];
+   uint8_t swizzle[16];
 } nir_alu_src;
 
 typedef struct {
@@ -678,7 +678,7 @@ typedef struct {
 
bool saturate;
 
-   unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+   unsigned write_mask : 16; /* ignored if dest.is_ssa is true */
 } nir_alu_dest;
 
 typedef enum {
@@ -807,14 +807,14 @@ typedef struct {
/**
 * The number of components in each input
 */
-   unsigned input_sizes[4];
+   unsigned input_sizes[16];
 
/**
 * The type of vector that each input takes. Note that negate and
 * absolute value are only allowed on inputs with int or float type and
 * behave differently on the two.
 */
-   nir_alu_type input_types[4];
+   nir_alu_type input_types[16];
 
nir_op_algebraic_property algebraic_properties;
 } nir_op_info;
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 36e0ae3ac63..9d1974f7a02 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -291,7 +291,11 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int 
w)
 
 static inline nir_ssa_def *
 nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
-  nir_ssa_def *src1, nir_ssa_def *src2, nir_ss

[Mesa-dev] [PATCH 1/3] glsl_types: refactor/prep for vec8/vec16

2018-03-14 Thread Rob Clark

Refactor things so there isn't so much typing involved to add new
things.

Also drops a pointless conditional (out of bounds rows or columns
already returns error_type in all paths.. might as well drop it
rather than make the check more convoluted in the next patch by
adding the vec8/vec16 case).

Signed-off-by: Rob Clark 
---
 src/compiler/builtin_type_macros.h |  77 ++--
 src/compiler/glsl_types.cpp| 120 ++---
 src/compiler/glsl_types.h  |   1 +
 3 files changed, 49 insertions(+), 149 deletions(-)

diff --git a/src/compiler/builtin_type_macros.h 
b/src/compiler/builtin_type_macros.h
index 807691824d3..dd8204a1981 100644
--- a/src/compiler/builtin_type_macros.h
+++ b/src/compiler/builtin_type_macros.h
@@ -31,25 +31,24 @@
 DECL_TYPE(error,  GL_INVALID_ENUM, GLSL_TYPE_ERROR, 0, 0)
 DECL_TYPE(void,   GL_INVALID_ENUM, GLSL_TYPE_VOID,  0, 0)
 
-DECL_TYPE(bool,   GL_BOOL, GLSL_TYPE_BOOL,  1, 1)
-DECL_TYPE(bvec2,  GL_BOOL_VEC2,GLSL_TYPE_BOOL,  2, 1)
-DECL_TYPE(bvec3,  GL_BOOL_VEC3,GLSL_TYPE_BOOL,  3, 1)
-DECL_TYPE(bvec4,  GL_BOOL_VEC4,GLSL_TYPE_BOOL,  4, 1)
-
-DECL_TYPE(int,GL_INT,  GLSL_TYPE_INT,   1, 1)
-DECL_TYPE(ivec2,  GL_INT_VEC2, GLSL_TYPE_INT,   2, 1)
-DECL_TYPE(ivec3,  GL_INT_VEC3, GLSL_TYPE_INT,   3, 1)
-DECL_TYPE(ivec4,  GL_INT_VEC4, GLSL_TYPE_INT,   4, 1)
-
-DECL_TYPE(uint,   GL_UNSIGNED_INT,  GLSL_TYPE_UINT, 1, 1)
-DECL_TYPE(uvec2,  GL_UNSIGNED_INT_VEC2, GLSL_TYPE_UINT, 2, 1)
-DECL_TYPE(uvec3,  GL_UNSIGNED_INT_VEC3, GLSL_TYPE_UINT, 3, 1)
-DECL_TYPE(uvec4,  GL_UNSIGNED_INT_VEC4, GLSL_TYPE_UINT, 4, 1)
-
-DECL_TYPE(float,  GL_FLOAT,GLSL_TYPE_FLOAT, 1, 1)
-DECL_TYPE(vec2,   GL_FLOAT_VEC2,   GLSL_TYPE_FLOAT, 2, 1)
-DECL_TYPE(vec3,   GL_FLOAT_VEC3,   GLSL_TYPE_FLOAT, 3, 1)
-DECL_TYPE(vec4,   GL_FLOAT_VEC4,   GLSL_TYPE_FLOAT, 4, 1)
+#define DECL_VEC_TYPE(stype, vtype, btype, etype, ...)   \
+   DECL_TYPE(stype,  etype ##__VA_ARGS__, btype, 1, 1)   \
+   DECL_TYPE(vtype ## 2, etype ##_VEC2 ##__VA_ARGS__, btype, 2, 1)   \
+   DECL_TYPE(vtype ## 3, etype ##_VEC3 ##__VA_ARGS__, btype, 3, 1)   \
+   DECL_TYPE(vtype ## 4, etype ##_VEC4 ##__VA_ARGS__, btype, 4, 1)
+
+DECL_VEC_TYPE(bool,  bvec,   GLSL_TYPE_BOOL,GL_BOOL)
+DECL_VEC_TYPE(int,   ivec,   GLSL_TYPE_INT, GL_INT)
+DECL_VEC_TYPE(uint,  uvec,   GLSL_TYPE_UINT,GL_UNSIGNED_INT)
+DECL_VEC_TYPE(float, vec,GLSL_TYPE_FLOAT,   GL_FLOAT)
+DECL_VEC_TYPE(float16_t, f16vec, GLSL_TYPE_FLOAT16, GL_FLOAT16, _NV)
+DECL_VEC_TYPE(double,dvec,   GLSL_TYPE_DOUBLE,  GL_DOUBLE)
+DECL_VEC_TYPE(int64_t,   i64vec, GLSL_TYPE_INT64,   GL_INT64, _ARB)
+DECL_VEC_TYPE(uint64_t,  u64vec, GLSL_TYPE_UINT64,  GL_UNSIGNED_INT64, _ARB)
+DECL_VEC_TYPE(int16_t,   i16vec, GLSL_TYPE_INT16,   GL_INT16, _NV)
+DECL_VEC_TYPE(uint16_t,  u16vec, GLSL_TYPE_UINT16,  GL_UNSIGNED_INT16, _NV)
+DECL_VEC_TYPE(int8_t,i8vec,  GLSL_TYPE_INT8,GL_INT8, _NV)
+DECL_VEC_TYPE(uint8_t,   u8vec,  GLSL_TYPE_UINT8,   GL_UNSIGNED_INT8, _NV)
 
 DECL_TYPE(mat2,   GL_FLOAT_MAT2,   GLSL_TYPE_FLOAT, 2, 2)
 DECL_TYPE(mat3,   GL_FLOAT_MAT3,   GLSL_TYPE_FLOAT, 3, 3)
@@ -62,11 +61,6 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3)
 DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4)
 DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4)
 
-DECL_TYPE(float16_t, GL_FLOAT16_NV,GLSL_TYPE_FLOAT16, 1, 1)
-DECL_TYPE(f16vec2,   GL_FLOAT16_VEC2_NV,   GLSL_TYPE_FLOAT16, 2, 1)
-DECL_TYPE(f16vec3,   GL_FLOAT16_VEC3_NV,   GLSL_TYPE_FLOAT16, 3, 1)
-DECL_TYPE(f16vec4,   GL_FLOAT16_VEC4_NV,   GLSL_TYPE_FLOAT16, 4, 1)
-
 DECL_TYPE(f16mat2,   GL_FLOAT16_MAT2_AMD,   GLSL_TYPE_FLOAT16, 2, 2)
 DECL_TYPE(f16mat3,   GL_FLOAT16_MAT3_AMD,   GLSL_TYPE_FLOAT16, 3, 3)
 DECL_TYPE(f16mat4,   GL_FLOAT16_MAT4_AMD,   GLSL_TYPE_FLOAT16, 4, 4)
@@ -78,11 +72,6 @@ DECL_TYPE(f16mat3x4, GL_FLOAT16_MAT3x4_AMD, 
GLSL_TYPE_FLOAT16, 4, 3)
 DECL_TYPE(f16mat4x2, GL_FLOAT16_MAT4x2_AMD, GLSL_TYPE_FLOAT16, 2, 4)
 DECL_TYPE(f16mat4x3, GL_FLOAT16_MAT4x3_AMD, GLSL_TYPE_FLOAT16, 3, 4)
 
-DECL_TYPE(double,  GL_DOUBLE,GLSL_TYPE_DOUBLE, 1, 1)
-DECL_TYPE(dvec2,   GL_DOUBLE_VEC2,   GLSL_TYPE_DOUBLE, 2, 1)
-DECL_TYPE(dvec3,   GL_DOUBLE_VEC3,   GLSL_TYPE_DOUBLE, 3, 1)
-DECL_TYPE(dvec4,   GL_DOUBLE_VEC4,   GLSL_TYPE_DOUBLE, 4, 1)
-
 DECL_TYPE(dmat2,   GL_DOUBLE_MAT2,   GLSL_TYPE_DOUBLE, 2, 2)
 DECL_TYPE(dmat3,   GL_DOUBLE_MAT3,   GLSL_TYPE_DOUBLE, 3, 3)
 DECL_TYPE(dmat4,   GL_DOUBLE_MAT4,   GLSL_TYPE_DOUBLE, 4, 4)
@@ -94,36 +83,6 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
 DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
 DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
 
-DECL_TYPE(int64_t,  GL_INT64_ARB,  GLSL_TYPE_INT64,   1, 1)
-DECL_TYPE(i64vec2,  GL_INT64_VEC2_ARB, GLSL_TYPE_INT64,   2, 1)
-DECL_TYPE(i64vec3,  GL_INT64_VEC3_ARB, GLSL_TYPE_INT64,   3, 1)
-DECL_TYPE(i64vec4,  GL_INT64_VEC4_ARB, G

[Mesa-dev] [PATCH 2/3] glsl_types: vec8/vec16 support

2018-03-14 Thread Rob Clark

Not used in GL but 8 and 16 component vectors exist in OpenCL.

Signed-off-by: Rob Clark 
---
OpenCL committee: "Sure everyone switched to scalar instruction sets,
but let's double down on the vec4"  :-P

 src/compiler/builtin_type_macros.h |  4 +++-
 src/compiler/glsl_types.cpp|  8 +++-
 src/compiler/nir/nir_print.c   |  4 +++-
 src/compiler/nir/nir_validate.c|  4 +++-
 src/compiler/nir_types.cpp | 10 ++
 src/compiler/spirv/spirv_to_nir.c  |  3 +--
 6 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/compiler/builtin_type_macros.h 
b/src/compiler/builtin_type_macros.h
index dd8204a1981..55ad2b89554 100644
--- a/src/compiler/builtin_type_macros.h
+++ b/src/compiler/builtin_type_macros.h
@@ -35,7 +35,9 @@ DECL_TYPE(void,   GL_INVALID_ENUM, GLSL_TYPE_VOID,  0, 0)
DECL_TYPE(stype,  etype ##__VA_ARGS__, btype, 1, 1)   \
DECL_TYPE(vtype ## 2, etype ##_VEC2 ##__VA_ARGS__, btype, 2, 1)   \
DECL_TYPE(vtype ## 3, etype ##_VEC3 ##__VA_ARGS__, btype, 3, 1)   \
-   DECL_TYPE(vtype ## 4, etype ##_VEC4 ##__VA_ARGS__, btype, 4, 1)
+   DECL_TYPE(vtype ## 4, etype ##_VEC4 ##__VA_ARGS__, btype, 4, 1)   \
+   DECL_TYPE(vtype ## 8,  0, btype, 8, 1)   \
+   DECL_TYPE(vtype ## 16, 0, btype, 16, 1)
 
 DECL_VEC_TYPE(bool,  bvec,   GLSL_TYPE_BOOL,GL_BOOL)
 DECL_VEC_TYPE(int,   ivec,   GLSL_TYPE_INT, GL_INT)
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 8b18f2f3210..b8caddb4066 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -498,7 +498,12 @@ glsl_type::vec(unsigned components, const glsl_type *const 
ts[])
 {
unsigned n = components;
 
-   if (n == 0 || n > 4)
+   if (components == 8)
+  n = 5;
+   else if (components == 16)
+  n = 6;
+
+   if (n == 0 || n > 6)
   return error_type;
 
return ts[n - 1];
@@ -508,6 +513,7 @@ glsl_type::vec(unsigned components, const glsl_type *const 
ts[])
   static const glsl_type *const ts[] = { \
  sname ## _type, vname ## 2_type,\
  vname ## 3_type, vname ## 4_type,   \
+ vname ## 8_type, vname ## 16_type,  \
   }; \
   glsl_type::vec(components, ts);\
})
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 7888dbd3384..21f13097651 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -85,7 +85,9 @@ print_register(nir_register *reg, print_state *state)
   fprintf(fp, "r%u", reg->index);
 }
 
-static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
+static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4",
+   "error", "error", "error", "vec8",
+   "error", "error", "error", "vec16"};
 
 static void
 print_register_decl(nir_register *reg, print_state *state)
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index a49948fbb48..725ba43152c 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -294,7 +294,9 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state)
 
validate_assert(state, def->parent_instr == state->instr);
 
-   validate_assert(state, def->num_components <= 4);
+   validate_assert(state, (def->num_components <= 4) ||
+  (def->num_components == 8) ||
+  (def->num_components == 16));
 
list_validate(&def->uses);
list_validate(&def->if_uses);
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index ee6b06aea63..78b66803f08 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -366,15 +366,17 @@ glsl_scalar_type(enum glsl_base_type base_type)
 const glsl_type *
 glsl_vector_type(enum glsl_base_type base_type, unsigned components)
 {
-   assert(components > 1 && components <= 4);
-   return glsl_type::get_instance(base_type, components, 1);
+   const glsl_type *t = glsl_type::get_instance(base_type, components, 1);
+   assert(t != glsl_type::error_type);
+   return t;
 }
 
 const glsl_type *
 glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned 
columns)
 {
-   assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4);
-   return glsl_type::get_instance(base_type, rows, columns);
+   const glsl_type *t = glsl_type::get_instance(base_type, rows, columns);
+   assert(t != glsl_type::error_type);
+   return t;
 }
 
 const glsl_type *
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 42a559122a6..953c9b86c3a 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -934,7 +934,6 @@ vtn_type_layout_std430(struct vtn_builder *b, struct 
vtn_type *type,
 
case vtn_base_type_vector: {
   uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
-  assert(type->length > 0 && type->length <= 4);
   unsigned

1 2 >

1 - 100 of 140 matches

Mail list logo