[Mesa-dev] [PATCH] freedreno: Fix meson build.

2019-01-25 Thread Vinson Lee
../src/gallium/drivers/freedreno/freedreno_resource.c: In function 
‘fd_resource_create_with_modifiers’:
../src/gallium/drivers/freedreno/freedreno_resource.c:884:30: error: 
‘DRM_FORMAT_MOD_QCOM_COMPRESSED’ undeclared (first use in this function)
   allow_ubwc = find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
  ^

Fixes: 1ce5d757d04a ("freedreno: core buffer modifier support")
Signed-off-by: Vinson Lee 
---
 src/gallium/drivers/freedreno/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/meson.build 
b/src/gallium/drivers/freedreno/meson.build
index 1e3a3037014b..25aa8ecdb455 100644
--- a/src/gallium/drivers/freedreno/meson.build
+++ b/src/gallium/drivers/freedreno/meson.build
@@ -209,7 +209,7 @@ files_libfreedreno = files(
 )
 
 freedreno_includes = [
-  inc_src, inc_include, inc_gallium, inc_gallium_aux,
+  inc_src, inc_include, inc_drm_uapi, inc_gallium, inc_gallium_aux,
   inc_freedreno, include_directories('ir3'),
 ]
 
-- 
2.19.1.dropbox.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] Common KMS renderonly support

2019-01-25 Thread Qiang Yu
Thanks Rob, I'm OK with this kmsro approach.

But I have to point out that this will break XServer AIGLX:
1. modesetting DDX will report the display drm driver name like meson
  as DRI2 driver name
2. libglx.so used by xserver will look after meson_dri.so for dlopen
3. then dlsym __driDriverGetExtensions_meson for init

With this serial applied, 2&3 will not be true any more. This may need
the fix in modesetting or libglx.so.

But as AIGLX is mostly not used, so I'm OK for this serial
go into mesa first, just a remind for somebody who cares.

Regards,
Qiang

On Fri, Jan 25, 2019 at 6:36 AM Rob Herring  wrote:
>
> This series aims to make supporting new platforms containing
> renderonly GPUs easier with less copy-n-paste. This hasn't been a big
> issue so far as the current renderonly drivers (vc4 and etnaviv) only
> exists on a few platforms. This is changing with i.MX+freedreno,
> armada+etnaviv and a slew of platforms using Mali lima and panfrost
> drivers.
>
> I've taken the kmsro winsys from Eric, extended the pipe-loader to
> fall back to kmsro, added etnaviv support, and switched imx to use
> kmsro.
>
> I've tested this with the panfrost tree. Help testing on i.MX would be
> nice. A git branch is here[1].
>
> Rob
>
> [1] https://github.com/robherring/mesa winsys-renderonly
>
> Eric Anholt (1):
>   pl111: Rename the pl111 driver to "kmsro".
>
> Rob Herring (3):
>   pipe-loader: Fallback to kmsro driver when no matching driver name
> found
>   kmsro: Add etnaviv renderonly support
>   Switch imx to kmsro and remove the imx winsys
>
>  .travis.yml   |  2 +-
>  Android.mk|  7 ++-
>  Makefile.am   |  2 +-
>  configure.ac  | 28 ---
>  meson.build   | 12 ++---
>  meson_options.txt |  4 +-
>  src/gallium/Android.mk|  3 +-
>  src/gallium/Makefile.am   |  8 +--
>  .../auxiliary/pipe-loader/pipe_loader_drm.c   | 18 +++
>  .../auxiliary/target-helpers/drm_helper.h | 35 +++--
>  .../target-helpers/drm_helper_public.h|  2 +-
>  src/gallium/drivers/imx/Automake.inc  |  9 
>  .../drivers/{pl111 => kmsro}/Android.mk   |  6 +--
>  src/gallium/drivers/kmsro/Automake.inc|  9 
>  .../drivers/{imx => kmsro}/Makefile.am|  4 +-
>  .../drivers/{pl111 => kmsro}/Makefile.sources |  0
>  src/gallium/drivers/pl111/Automake.inc|  9 
>  src/gallium/drivers/pl111/Makefile.am |  8 ---
>  src/gallium/meson.build   | 23 -
>  src/gallium/targets/dri/Makefile.am   |  3 +-
>  src/gallium/targets/dri/meson.build   |  8 +--
>  src/gallium/targets/dri/target.c  |  2 +-
>  src/gallium/winsys/imx/drm/Android.mk | 40 ---
>  src/gallium/winsys/imx/drm/Makefile.am| 35 -
>  src/gallium/winsys/imx/drm/Makefile.sources   |  3 --
>  src/gallium/winsys/imx/drm/imx_drm_public.h   | 34 -
>  src/gallium/winsys/imx/drm/imx_drm_winsys.c   | 50 ---
>  src/gallium/winsys/imx/drm/meson.build| 33 
>  .../winsys/{pl111 => kmsro}/drm/Android.mk|  2 +-
>  .../winsys/{pl111 => kmsro}/drm/Makefile.am   | 12 -
>  src/gallium/winsys/kmsro/drm/Makefile.sources |  3 ++
>  .../drm/kmsro_drm_public.h}   |  8 +--
>  .../drm/kmsro_drm_winsys.c}   | 42 +++-
>  .../winsys/{pl111 => kmsro}/drm/meson.build   | 23 ++---
>  src/gallium/winsys/pl111/drm/Makefile.sources |  3 --
>  35 files changed, 130 insertions(+), 360 deletions(-)
>  delete mode 100644 src/gallium/drivers/imx/Automake.inc
>  rename src/gallium/drivers/{pl111 => kmsro}/Android.mk (91%)
>  create mode 100644 src/gallium/drivers/kmsro/Automake.inc
>  rename src/gallium/drivers/{imx => kmsro}/Makefile.am (55%)
>  rename src/gallium/drivers/{pl111 => kmsro}/Makefile.sources (100%)
>  delete mode 100644 src/gallium/drivers/pl111/Automake.inc
>  delete mode 100644 src/gallium/drivers/pl111/Makefile.am
>  delete mode 100644 src/gallium/winsys/imx/drm/Android.mk
>  delete mode 100644 src/gallium/winsys/imx/drm/Makefile.am
>  delete mode 100644 src/gallium/winsys/imx/drm/Makefile.sources
>  delete mode 100644 src/gallium/winsys/imx/drm/imx_drm_public.h
>  delete mode 100644 src/gallium/winsys/imx/drm/imx_drm_winsys.c
>  delete mode 100644 src/gallium/winsys/imx/drm/meson.build
>  rename src/gallium/winsys/{pl111 => kmsro}/drm/Android.mk (97%)
>  rename src/gallium/winsys/{pl111 => kmsro}/drm/Makefile.am (87%)
>  create mode 100644 src/gallium/winsys/kmsro/drm/Makefile.sources
>  rename src/gallium/winsys/{pl111/drm/pl111_drm_public.h => 
> kmsro/drm/kmsro_drm_public.h} (89%)
>  rename src/gallium/winsys/{pl111/drm/pl111_drm_winsys.c => 
> kmsro/drm/kmsro_drm_winsys.c} (63%)
>  rename 

[Mesa-dev] [PATCH] radeonsi: fix crashing performance counters (division by zero)

2019-01-25 Thread Marek Olšák
From: Marek Olšák 

Fixes: e2b9329f17 "radeonsi: move remaining perfcounter code into 
si_perfcounter.c"
---
 src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 2da14f8868f..d55394f2cba 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -1326,21 +1326,21 @@ void si_init_perfcounters(struct si_screen *screen)
pc->separate_instance = 
debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
 
pc->blocks = CALLOC(num_blocks, sizeof(struct si_pc_block));
if (!pc->blocks)
goto error;
pc->num_blocks = num_blocks;
 
for (i = 0; i < num_blocks; ++i) {
struct si_pc_block *block = >blocks[i];
block->b = [i];
-   block->num_instances = block->b->instances;
+   block->num_instances = MAX2(1, block->b->instances);
 
if (!strcmp(block->b->b->name, "CB") ||
!strcmp(block->b->b->name, "DB"))
block->num_instances = screen->info.max_se;
else if (!strcmp(block->b->b->name, "TCC"))
block->num_instances = screen->info.num_tcc_blocks;
else if (!strcmp(block->b->b->name, "IA"))
block->num_instances = MAX2(1, screen->info.max_se / 2);
 
if (si_pc_block_has_per_instance_groups(pc, block)) {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] tgsi: remove culldist semantic from docs

2019-01-25 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jan 23, 2019 at 8:14 PM Timothy Arceri 
wrote:

> The semantic was removed in e6d93893662d.
> ---
>  src/gallium/docs/source/tgsi.rst | 18 --
>  1 file changed, 18 deletions(-)
>
> diff --git a/src/gallium/docs/source/tgsi.rst
> b/src/gallium/docs/source/tgsi.rst
> index 277f25ca41b..59410cfd663 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -3205,24 +3205,6 @@ tessellation evaluation shaders, respectively. Only
> the value written in the
>  last vertex processing stage is used.
>
>
> -TGSI_SEMANTIC_CULLDIST
> -""
> -
> -Used as distance to plane for performing application-defined culling
> -of individual primitives against a plane. When components of vertex
> -elements are given this label, these values are assumed to be a
> -float32 signed distance to a plane. Primitives will be completely
> -discarded if the plane distance for all of the vertices in the
> -primitive are < 0. If a vertex has a cull distance of NaN, that
> -vertex counts as "out" (as if its < 0);
> -The limits on both clip and cull distances are bound
> -by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_COUNT define which defines
> -the maximum number of components that can be used to hold the
> -distances and by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT
> -which specifies the maximum number of registers which can be
> -annotated with those semantics.
> -
> -
>  TGSI_SEMANTIC_CLIPDIST
>  ""
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] MR: Move pln emul to the fs_visitor.

2019-01-25 Thread Rafael Antognolli
Move the pln emul code to the fs_visitor, so we get some optimizations
that don't happen at the fs_generator level, mainly better scheduling.

One big caveat of this change is that we don't use NF types and the
accumulator anymore, but apparently we don't need the extra precision.

https://gitlab.freedesktop.org/mesa/mesa/merge_requests/160

--
Rafael
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: fix clamp shadow reference for more hardware

2019-01-25 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Jan 22, 2019 at 10:59 PM Timothy Arceri 
wrote:

> Fixes the following piglit test on my VEGA and matches the behaviour in the
> tgsi backend.
>
>
> tests/spec/glsl-1.10/execution/samplers/glsl-fs-shadow2D-clamp-z.shader_test
>
> Fixes: 625dcbbc4566 ("amd/common: pass address components individually to
> ac_build_image_intrinsic")
> ---
>  src/amd/common/ac_nir_to_llvm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index f509fc31df..b60ef86986 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3586,7 +3586,7 @@ static void visit_tex(struct ac_nir_context *ctx,
> nir_tex_instr *instr)
>  * It's unnecessary if the original texture format was
>  * Z32_FLOAT, but we don't know that here.
>  */
> -   if (args.compare && ctx->ac.chip_class == VI &&
> ctx->abi->clamp_shadow_reference)
> +   if (args.compare && ctx->ac.chip_class >= VI &&
> ctx->abi->clamp_shadow_reference)
> args.compare = ac_build_clamp(>ac,
> ac_to_float(>ac, args.compare));
>
> /* pack derivatives */
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: re-initialize query buffers if they are reused

2019-01-25 Thread Marek Olšák
From: Marek Olšák 

Fixes: e0f0d3675d4 "radeonsi: factor si_query_buffer logic out of si_query_hw"
---
 src/gallium/drivers/radeonsi/si_query.c | 20 
 src/gallium/drivers/radeonsi/si_query.h |  1 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 266b9d3ce84..c115e7787b2 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -542,34 +542,46 @@ void si_query_buffer_reset(struct si_context *sctx, 
struct si_query_buffer *buff
while (buffer->previous) {
struct si_query_buffer *qbuf = buffer->previous;
buffer->previous = qbuf->previous;
 
si_resource_reference(>buf, NULL);
buffer->buf = qbuf->buf; /* move ownership */
FREE(qbuf);
}
buffer->results_end = 0;
 
+   if (!buffer->buf)
+   return;
+
/* Discard even the oldest buffer if it can't be mapped without a 
stall. */
-   if (buffer->buf &&
-   (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, 
RADEON_USAGE_READWRITE) ||
-!sctx->ws->buffer_wait(buffer->buf->buf, 0, 
RADEON_USAGE_READWRITE))) {
+   if (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, 
RADEON_USAGE_READWRITE) ||
+   !sctx->ws->buffer_wait(buffer->buf->buf, 0, 
RADEON_USAGE_READWRITE)) {
si_resource_reference(>buf, NULL);
+   } else {
+   /* Re-initialize it before begin. */
+   buffer->prepare_buffer_before_begin = true;
}
 }
 
 bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer 
*buffer,
   bool (*prepare_buffer)(struct si_context *, struct 
si_query_buffer*),
   unsigned size)
 {
-   if (buffer->buf && buffer->results_end + size >= 
buffer->buf->b.b.width0)
+   if (buffer->buf && buffer->results_end + size >= 
buffer->buf->b.b.width0) {
+   if (buffer->prepare_buffer_before_begin &&
+   prepare_buffer && unlikely(!prepare_buffer(sctx, buffer))) {
+   si_resource_reference(>buf, NULL);
+   return false;
+   }
+   buffer->prepare_buffer_before_begin = false;
return true;
+   }
 
if (buffer->buf) {
struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
memcpy(qbuf, buffer, sizeof(*qbuf));
buffer->previous = qbuf;
}
 
buffer->results_end = 0;
 
/* Queries are normally read by the CPU after
diff --git a/src/gallium/drivers/radeonsi/si_query.h 
b/src/gallium/drivers/radeonsi/si_query.h
index aaf0bd03aca..be393c2fbd2 100644
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -170,20 +170,21 @@ struct si_query_hw_ops {
  struct si_resource *buffer, uint64_t va);
void (*clear_result)(struct si_query_hw *, union pipe_query_result *);
void (*add_result)(struct si_screen *screen,
   struct si_query_hw *, void *buffer,
   union pipe_query_result *result);
 };
 
 struct si_query_buffer {
/* The buffer where query results are stored. */
struct si_resource  *buf;
+   boolprepare_buffer_before_begin;
/* Offset of the next free result after current query data */
unsignedresults_end;
/* If a query buffer is full, a new buffer is created and the old one
 * is put in here. When we calculate the result, we sum up the samples
 * from all buffers. */
struct si_query_buffer  *previous;
 };
 
 void si_query_buffer_destroy(struct si_screen *sctx, struct si_query_buffer 
*buffer);
 void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer 
*buffer);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw

2019-01-25 Thread Marek Olšák
Timothy, can you please test the attached fix?

Thanks,
Marek

On Wed, Jan 2, 2019 at 10:58 PM Timothy Arceri 
wrote:

> This commit seems to cause bad stuttering in the Batman Arkham City
> benchmark.
>
> On 7/12/18 1:00 am, Nicolai Hähnle wrote:
> > From: Nicolai Hähnle 
> >
> > This is a move towards using composition instead of inheritance for
> > different query types.
> >
> > This change weakens out-of-memory error reporting somewhat, though this
> > should be acceptable since we didn't consistently report such errors in
> > the first place.
> > ---
> >   src/gallium/drivers/radeonsi/si_perfcounter.c |   8 +-
> >   src/gallium/drivers/radeonsi/si_query.c   | 177 +-
> >   src/gallium/drivers/radeonsi/si_query.h   |  17 +-
> >   src/gallium/drivers/radeonsi/si_texture.c |   7 +-
> >   4 files changed, 99 insertions(+), 110 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
> b/src/gallium/drivers/radeonsi/si_perfcounter.c
> > index 0b3d8f89273..f0d10c054c4 100644
> > --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
> > +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
> > @@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct si_screen
> *sscreen,
> >   struct si_query_group *group = query->groups;
> >   query->groups = group->next;
> >   FREE(group);
> >   }
> >
> >   FREE(query->counters);
> >
> >   si_query_hw_destroy(sscreen, rquery);
> >   }
> >
> > -static bool si_pc_query_prepare_buffer(struct si_screen *screen,
> > -struct si_query_hw *hwquery,
> > -struct r600_resource *buffer)
> > +static bool si_pc_query_prepare_buffer(struct si_context *ctx,
> > +struct si_query_buffer *qbuf)
> >   {
> >   /* no-op */
> >   return true;
> >   }
> >
> >   static void si_pc_query_emit_start(struct si_context *sctx,
> >  struct si_query_hw *hwquery,
> >  struct r600_resource *buffer, uint64_t
> va)
> >   {
> >   struct si_query_pc *query = (struct si_query_pc *)hwquery;
> > @@ -1055,23 +1054,20 @@ struct pipe_query *si_create_batch_query(struct
> pipe_context *ctx,
> >   counter->base = group->result_base + j;
> >   counter->stride = group->num_counters;
> >
> >   counter->qwords = 1;
> >   if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
> >   counter->qwords = screen->info.max_se;
> >   if (group->instance < 0)
> >   counter->qwords *= block->num_instances;
> >   }
> >
> > - if (!si_query_hw_init(screen, >b))
> > - goto error;
> > -
> >   return (struct pipe_query *)query;
> >
> >   error:
> >   si_pc_query_destroy(screen, >b.b);
> >   return NULL;
> >   }
> >
> >   static bool si_init_block_names(struct si_screen *screen,
> >   struct si_pc_block *block)
> >   {
> > diff --git a/src/gallium/drivers/radeonsi/si_query.c
> b/src/gallium/drivers/radeonsi/si_query.c
> > index 479a1bbf2c4..5b0fba0ed92 100644
> > --- a/src/gallium/drivers/radeonsi/si_query.c
> > +++ b/src/gallium/drivers/radeonsi/si_query.c
> > @@ -514,86 +514,129 @@ static struct pipe_query
> *si_query_sw_create(unsigned query_type)
> >   query = CALLOC_STRUCT(si_query_sw);
> >   if (!query)
> >   return NULL;
> >
> >   query->b.type = query_type;
> >   query->b.ops = _query_ops;
> >
> >   return (struct pipe_query *)query;
> >   }
> >
> > -void si_query_hw_destroy(struct si_screen *sscreen,
> > -  struct si_query *rquery)
> > +void si_query_buffer_destroy(struct si_screen *sscreen, struct
> si_query_buffer *buffer)
> >   {
> > - struct si_query_hw *query = (struct si_query_hw *)rquery;
> > - struct si_query_buffer *prev = query->buffer.previous;
> > + struct si_query_buffer *prev = buffer->previous;
> >
> >   /* Release all query buffers. */
> >   while (prev) {
> >   struct si_query_buffer *qbuf = prev;
> >   prev = prev->previous;
> >   r600_resource_reference(>buf, NULL);
> >   FREE(qbuf);
> >   }
> >
> > - r600_resource_reference(>buffer.buf, NULL);
> > - r600_resource_reference(>workaround_buf, NULL);
> > - FREE(rquery);
> > + r600_resource_reference(>buf, NULL);
> > +}
> > +
> > +void si_query_buffer_reset(struct si_context *sctx, struct
> si_query_buffer *buffer)
> > +{
> > + /* Discard all query buffers except for the oldest. */
> > + while (buffer->previous) {
> > + struct si_query_buffer *qbuf = buffer->previous;
> > + buffer->previous = qbuf->previous;
> > +
> > + r600_resource_reference(>buf, NULL);
> > + buffer->buf = qbuf->buf; /* move ownership */
> > + 

Re: [Mesa-dev] [Freedreno] [RFC 0/4] freedreno: Move some compiler offset computations to NIR

2019-01-25 Thread Rob Clark
On Fri, Jan 25, 2019 at 10:48 AM Eduardo Lima Mitev  wrote:
>
> There are a bunch of instructions emitted on ir3_compiler_nir related to
> offset computations for IO opcodes (ssbo, image, etc). This small series
> explores the possibility of moving these instructions to NIR, where we
> have higher chances of optimizing them.
>
> The series introduces a new, freedreno specific NIR pass,
> 'ir3_nir_lower_sampler_io' (final name not set). The pass is executed
> early on ir3_optimize_nir(), and the goal is to centralize all these
> computations there, hoping that later NIR passes will produce better
> code than what is currently emitted.

I can think of a few other things that would be interesting to lower
to driver specific nir opcodes (imul and various lowering for tex
instructions come to mind.. but probably also ubo and ssbo address
calculation.. maybe it could even make sense for some of the single
src alu instructions that translate into multiple ir3 instructions,
not sure)..

Are you thinking about having separate passes for each?  I guess at
least for alu instructions we might able to use nir_algebraic so
having things split up might be easier.

> So far, we have just implemented byte-offset computation for image store
> and atomics. This seemed like a good first target given the amount of
> instructions being emitted for it by the backend.
>
> This is an RFC series because there are a few open questions, but we
> wanted to gather feedback already now, in case this effort is something
> not worth it; and also hoping that somebody else will give it a try
> against other shaders and on other gens (we have just tried this on
> a5xx).
>
> * We have so far been unable to see any improvement in generated code
> (not a penalty either). shader-db has not been specially useful. Few
> shaders there exercise image store or image atomic ops, and of those
> that do, most require higher versions of GLSL than what freedreno
> supports, so they get skipped. The few that do actually run, don't
> show any meaningful difference.

I guess it would be easy enough to construct shaders that would
benefit from this, but maybe that is cheating..

Possibly UBO and SSBO is a better target, I guess there you might be
more likely to see patterns of access of successive elements (ie.
foo[idx], foo[idx+1], etc)?

Anyways, since we don't try to do (and I'd rather not do) any sort of
CSE post nir->ir3 I think starting to introduce more ir3 specific
nir->nir lowering seems like a thing we need, so I'm pretty happy that
someone is looking at this :-)

BR,
-R

> Then other shaders picked from tests suites are simple enough not to
> produce any difference in code either.
>
> There is still on-going work looking for cases where the pass helps
> instruction stats, whether writing custom shaders or porting complex
> shader from shader-db to run on GLES 310.
>
> There is though an open question here as to whether moving backend
> code to NIR is a benefit in and of itself.
>
> * The series adds a nir_op_imad opcode that didn't exist before, and
> perhaps not generally useful even for freedreno outside this pass,
> because it maps to IR3_MAD_S24 which is probably not suitable for
> generic integer multiply-add.
>
> * The pass currently has 2 alternative code-paths to emit the
> multiplication by the bytes-per-pixel of an image format. In one
> case, since this value can be obtained at compile time, it is
> emitted as an immediate by nir_imul_imm. The other alternative is
> emitting an nir_imul with an SSA value that will map to
> image_dims[0] at shader runtime.
>
> The former case is uglier but produces better code (a single SHL
> instruction), whereas the latter involves a generic imul, for which
> the backend emits a lot of code to cover for overflow.
>
> The doubt here is whether we should introduce a (lower precision)
> version of imul that maps directly to IR3_IMUL_S.
>
>
> A live (WIP) tree of the series can be found at:
> 
>
> We plan to continue moving computations to the pass if we see
> good opportunities.
>
> Feedback very welcome,
>
> cheers,
> Eduardo
>
> Eduardo Lima Mitev (4):
>   nir: Add a new intrinsic 'load_image_stride'
>   nir: Add a new ALU nir_op_imad
>   ir3/nir: Add a new pass 'ir3_nir_lower_sampler_io'
>   ir3: Use ir3_nir_lower_sampler_io pass
>
>  src/compiler/nir/nir_intrinsics.py   |   2 +
>  src/compiler/nir/nir_opcodes.py  |   1 +
>  src/freedreno/Makefile.sources   |   1 +
>  src/freedreno/ir3/ir3_compiler_nir.c |  61 ++--
>  src/freedreno/ir3/ir3_nir.c  |   1 +
>  src/freedreno/ir3/ir3_nir.h  |   1 +
>  src/freedreno/ir3/ir3_nir_lower_sampler_io.c | 349 +++
>  7 files changed, 383 insertions(+), 33 deletions(-)
>  create mode 100644 src/freedreno/ir3/ir3_nir_lower_sampler_io.c
>
> --
> 2.20.1
>
> ___
> 

Re: [Mesa-dev] Thoughts after hitting 100 merge requests?

2019-01-25 Thread Rob Clark
On Fri, Jan 25, 2019 at 4:26 PM Eric Anholt  wrote:
>
> Rob Clark  writes:
>
> > I guess as discovered with
> > https://gitlab.freedesktop.org/mesa/mesa/merge_requests/47 maybe we
> > should wait to turn on merging MRs via web until we have at least some
> > basic build-test CI wired up.. the downside is slower 'maintainer'
> > response (if I am working on some long running branch I tend to
> > postpone pushing changes from others until I am in a good state to
> > rebase, but at least when I merge things via cmdline I do a build test
> > and sanity test on at at least one driver ;-))
>
> We should be days, not weeks, away from having build-test CI wired up.
> I actually thought Igalia was supposed to be working on it again
> already, but then Eric Engestrom started working on a new variation, so
> I'm not sure what's going on.
>
> I'm skeptical that we need to do an MR policy change given that
> build-breakage and especially unit-test-breakage has been a recurring
> issue in the absence of CI, even before MRs.

Ok, that is good news that it is close, I think if it is just a matter
of days (or even a week or two), we shouldn't change the process.  If
it would be more of a longer term thing I'd worry about breaking the
build too frequently compared to the less convenient cmdline process..

(Hmm, I guess I should take a look at what sort of API gitlab offers,
but that will probably have to wait until after the branchpoint.. tbh
I'd actually be pretty happy w/ a gitlab equiv of 'git pw as -s' for
merging things from cmdline.)

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: drop the xcb-xrandr version requirement

2019-01-25 Thread Marek Olšák
From: Marek Olšák 

autotools doesn't have any requirement. This fixes meson on Ubuntu 16.04.
---
 meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 34e2a032548..7f16c3070fe 100644
--- a/meson.build
+++ b/meson.build
@@ -1389,21 +1389,21 @@ if with_platform_x11
   dep_xxf86vm = dependency('xxf86vm')
 endif
 dep_glproto = dependency('glproto', version : '>= 1.4.14')
   endif
   if (with_egl or (
   with_gallium_vdpau or with_gallium_xvmc or with_gallium_xa or
   with_gallium_omx != 'disabled'))
 dep_xcb_xfixes = dependency('xcb-xfixes')
   endif
   if with_xlib_lease
-dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
+dep_xcb_xrandr = dependency('xcb-randr')
 dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3')
   endif
 endif
 
 if get_option('gallium-extra-hud')
   pre_args += '-DHAVE_GALLIUM_EXTRA_HUD=1'
 endif
 
 _sensors = get_option('lmsensors')
 if _sensors != 'false'
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/fs: Get rid of fs_inst::equals

2019-01-25 Thread Jason Ekstrand
There are piles of fields that it doesn't check so using it is a lie.
The only reason why it's not causing problem is because it has exactly
one user which only uses it for MOV instructions (which aren't very
interesting) and only on Sandy Bridge and earlier hardware.  Just get
rid of it and inline it in the one place that it's actually used.

Cc: Iago Toral Quiroga 
---
 src/intel/compiler/brw_fs.cpp  | 29 +++--
 src/intel/compiler/brw_ir_fs.h |  1 -
 2 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0359eb079f7..6b66116f46b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -212,27 +212,6 @@ fs_visitor::DEP_RESOLVE_MOV(const fs_builder , int grf)
ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));
 }
 
-bool
-fs_inst::equals(fs_inst *inst) const
-{
-   return (opcode == inst->opcode &&
-   dst.equals(inst->dst) &&
-   src[0].equals(inst->src[0]) &&
-   src[1].equals(inst->src[1]) &&
-   src[2].equals(inst->src[2]) &&
-   saturate == inst->saturate &&
-   predicate == inst->predicate &&
-   conditional_mod == inst->conditional_mod &&
-   mlen == inst->mlen &&
-   base_mrf == inst->base_mrf &&
-   target == inst->target &&
-   eot == inst->eot &&
-   header_size == inst->header_size &&
-   shadow_compare == inst->shadow_compare &&
-   exec_size == inst->exec_size &&
-   offset == inst->offset);
-}
-
 bool
 fs_inst::is_send_from_grf() const
 {
@@ -3410,7 +3389,13 @@ fs_visitor::remove_duplicate_mrf_writes()
   if (inst->opcode == BRW_OPCODE_MOV &&
  inst->dst.file == MRF) {
  fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
-if (prev_inst && inst->equals(prev_inst)) {
+if (prev_inst && prev_inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.equals(prev_inst->dst) &&
+inst->src[0].equals(prev_inst->src[0]) &&
+ inst->saturate == prev_inst->saturate &&
+ inst->predicate == prev_inst->predicate &&
+ inst->conditional_mod == prev_inst->conditional_mod &&
+ inst->exec_size == prev_inst->exec_size) {
inst->remove(block);
progress = true;
continue;
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 08e3d83d910..d05357e822e 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -347,7 +347,6 @@ public:
 
void resize_sources(uint8_t num_sources);
 
-   bool equals(fs_inst *inst) const;
bool is_send_from_grf() const;
bool is_partial_write() const;
bool is_copy_payload(const brw::simple_allocator _alloc) const;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: GLES2 fix for OES float/half-float textures.

2019-01-25 Thread Eric Anholt
Nick Kreeger  writes:

> The OES_texture* extensions for float and half-float are valid when
> GLES2 is present w/ the matching
> OES_texture_float/OES_texture_half_float extensions. This fix ensures
> that these formats are valid for this configuration.

I don't see OES_texture_float.txt specifying these tokens as additions
to the list of valid base internal formats (table 3.8 of the GLES2
spec), so I think this change is invalid.  Sized internalformats were
introduced with GLES3.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/fs: Get rid of fs_inst::equals

2019-01-25 Thread Jason Ekstrand
There are piles of fields that it doesn't check so using it is a lie.
The only reason why it's not causing problem is because it has exactly
one user which only uses it for MOV instructions (which aren't very
interesting) and only on Sandy Bridge and earlier hardware.  Just get
rid of it and inline it in the one place that it's actually used.

Cc: Iago Toral Quiroga 
---
 src/intel/compiler/brw_fs.cpp  | 29 +++--
 src/intel/compiler/brw_ir_fs.h |  1 -
 2 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0359eb079f7..6b66116f46b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -212,27 +212,6 @@ fs_visitor::DEP_RESOLVE_MOV(const fs_builder , int grf)
ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));
 }
 
-bool
-fs_inst::equals(fs_inst *inst) const
-{
-   return (opcode == inst->opcode &&
-   dst.equals(inst->dst) &&
-   src[0].equals(inst->src[0]) &&
-   src[1].equals(inst->src[1]) &&
-   src[2].equals(inst->src[2]) &&
-   saturate == inst->saturate &&
-   predicate == inst->predicate &&
-   conditional_mod == inst->conditional_mod &&
-   mlen == inst->mlen &&
-   base_mrf == inst->base_mrf &&
-   target == inst->target &&
-   eot == inst->eot &&
-   header_size == inst->header_size &&
-   shadow_compare == inst->shadow_compare &&
-   exec_size == inst->exec_size &&
-   offset == inst->offset);
-}
-
 bool
 fs_inst::is_send_from_grf() const
 {
@@ -3410,7 +3389,13 @@ fs_visitor::remove_duplicate_mrf_writes()
   if (inst->opcode == BRW_OPCODE_MOV &&
  inst->dst.file == MRF) {
  fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
-if (prev_inst && inst->equals(prev_inst)) {
+if (prev_inst && prev_inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.equals(prev_inst->dst) &&
+inst->src[0].equals(prev_inst->src[0]) &&
+ inst->saturate == prev_inst->saturate &&
+ inst->predicate == prev_inst->predicate &&
+ inst->conditional_mod == prev_inst->conditional_mod &&
+ inst->exec_size == prev_inst->exec_size) {
inst->remove(block);
progress = true;
continue;
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 08e3d83d910..d05357e822e 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -347,7 +347,6 @@ public:
 
void resize_sources(uint8_t num_sources);
 
-   bool equals(fs_inst *inst) const;
bool is_send_from_grf() const;
bool is_partial_write() const;
bool is_copy_payload(const brw::simple_allocator _alloc) const;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Thoughts after hitting 100 merge requests?

2019-01-25 Thread Eric Anholt
Rob Clark  writes:

> I guess as discovered with
> https://gitlab.freedesktop.org/mesa/mesa/merge_requests/47 maybe we
> should wait to turn on merging MRs via web until we have at least some
> basic build-test CI wired up.. the downside is slower 'maintainer'
> response (if I am working on some long running branch I tend to
> postpone pushing changes from others until I am in a good state to
> rebase, but at least when I merge things via cmdline I do a build test
> and sanity test on at at least one driver ;-))

We should be days, not weeks, away from having build-test CI wired up.
I actually thought Igalia was supposed to be working on it again
already, but then Eric Engestrom started working on a new variation, so
I'm not sure what's going on.

I'm skeptical that we need to do an MR policy change given that
build-breakage and especially unit-test-breakage has been a recurring
issue in the absence of CI, even before MRs.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: add toggle for TLS support in GLX

2019-01-25 Thread Eric Anholt
Patrick Steinhardt  writes:

> The musl libc library does not have any support for the
> "initial-exec" TLS model and is unlikely to ever implement it.
> Thus, TLS support in GLX has been turned off in musl-based
> distributions to work around problems when dlopen'ing drivers.
> While this is easily possible using the autoconf build system by
> passing `--disable-glx-tls`, meson does not yet have such an
> option.
>
> Add a new toggle "glx-tls" that defaults to `true` to gain parity
> with autoconf. If disabled, `GLX_USE_TLS` will not be defined and
> thus mesa will be built without TLS support.

Could you compile-test instead of having an option for people to get
wrong?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mapi: print function declarations for shared glapi

2019-01-25 Thread Eric Anholt
Emil Velikov  writes:

> From: Emil Velikov 
>
> Earlier commit aimed to remove unneeded function declarations. Namely
> OpenGL entrypoints which are not applicable for OpenGLES*
>
> Although it did not consider the shared glapi which needs all,
> including hidden ones. Resulting in warning/errors like the following
>
> ../build/src/mapi/shared-glapi/glapi_mapi_tmp.h:26014:15:
> error: no previous prototype for ‘shared_dispatch_stub_1414’ 
> [-Werror=missing-prototypes]
>
> This patch addressed that.

Applied, to fix my builds.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: update validator to account for half-float exec type promotion

2019-01-25 Thread Francisco Jerez
Iago Toral  writes:

> On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
>> Iago Toral  writes:
>> 
>> > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
>> > > Iago Toral Quiroga  writes:
>> > > 
>> > > > Commit c84ec70b3a72 implemented execution type promotion to 32-
>> > > > bit
>> > > > for
>> > > > conversions involving half-float registers, which empirical
>> > > > testing
>> > > > suggested
>> > > > was required, but it did not incorporate this change into the
>> > > > assembly validator
>> > > > logic. This commits adds that, preventing validation errors
>> > > > like
>> > > > this:
>> > > > 
>> > > 
>> > > I don't think we should be validating empirical assumptions in
>> > > the EU
>> > > validator.
>> > 
>> > I am not sure I get your point, isn't c84ec70b3a72 also based on
>> > empirical testing after all?
>> > 
>> 
>> To some extent, but it doesn't attempt to enforce ISA restrictions
>> based
>> on information obtained empirically.
>> 
>> > 
>> > > > mov(16)  g9<4>B   g3<16,8,2>HF { align1 1H };
>> > > > ERROR: Destination stride must be equal to the ratio of the
>> > > > sizes
>> > > > of the
>> > > >execution data type to the destination type
>> > > > 
>> > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
>> > > > when any half-float conversion is needed."
>> > > 
>> > > I don't think this "fixes" anything that ever worked.
>> > 
>> > It is true that the code in that trace above is not something we
>> > can
>> > produce right now, because it is a conversion from HF to B and that
>> > should only happen within the context of
>> > VK_KHR_shader_float16_int8,
>> > however, this is a consequence of the fact that since c84ec70b3a72
>> > there is an inconsistency between what we do at the IR level
>> > regarding
>> > execution size of HF conversions and what the EU validator is
>> > doing,
>> > and from that perspective this is really fixing an inconsistency
>> > that
>> > didn't exist before, and I thought we would want to address that
>> > sooner
>> > rather than later and track it down to the original change that
>> > introduced that inconsistency so we know where this is coming from.
>> > 
>> 
>> The "inconsistency" between the IR's get_exec_type() and the EU
>> validator's execution_type() has existed ever since a05b6f25bf4bfad7
>> removed the HF assert from get_exec_type() without actually
>> implementing
>> the code required to handle HF operands (which is what my commit
>> c84ec70b3a72 did).
>
> I agree with the fact that since a05b6f25bf4bfad7 the validator could
> reject valid code and that had nothing to do with your patch,

The validator rejected the same valid HF code since it was written, that
had nothing to do with neither a05b6f25bf4bfad7 nor with my patch, and
it is the real problem this patch was working around.

> but the inconsistency I am talking about here, that this patch fixes,
> is the one about get_exec_type() in the IR and execution_type() in the
> validator doing different things for HF instructions, which only
> exists since your patch and which you discuss below.
>

The "inconsistency" exists ever since get_exec_type() was introduced
without correct handling of HF types (even though execution_type()
already attempted to handle it).  And I disagree that it's a real
inconsistency except due to the fact that the validator is incorrectly
attempting to validate the alignment of the destination region according
to a rule that doesn't apply to HF types.

>> > Anyway, that was my rationale for the Fixes tag, but if you think
>> > this
>> > is not useful I am happy to drop this patch and just include it as
>> > part
>> > of my series without the tag.
>> > 
>> 
>> I'd like to see the actual regioning restrictions for HF types
>> implemented in the EU validator as part of your series.
>
> Ok, let's see if we can agree on what restrictions should we implement
> then. I can implement this restriction as documented:
>
> "Conversion between Integer and HF (Half Float) must be DWord-aligned
> and strided by a DWord on the destination"
>
> Instead of trying to apply the general one that is currently breaking.
> That will fix the assertion issue. I guess my issue with it was that
> going by your analysis this restriction is not telling the full
> picture, this is what you had to say about this restriction:
>
> "I have a feeling that the reason for this may be that the 16-bit
> pipeline lacks the ability to handle conversions from or to half-float, 
> so the execution type is implicitly promoted to the matching (integer
> or floating-point) 32-bit type where any HF conversion would be
> needed.  And on those the usual alignment restriction of the
> destination to a larger execution type applies."
>
> But I guess your point is that we can ignore these details at the
> validator level and just focus on validating strictly what the PRM
> says. Fair enough.
>
> Unfortunatley, you also mentioned that this restriction *seems* to be
> overriden by 

[Mesa-dev] [PATCH] i965: Always compile fp64 funcs when needed

2019-01-25 Thread Matt Turner
Compilation of user-specified shaders with software fp64 works by
compiling on demand an "fp64-funcs" shader implementing various fp64
operations and then linking it into the "user shader".

In

   commit 64b8c86d37ebb1e1d286c69d642d52b7bcf051d3
   Author: Timothy Arceri 
   Date:   Thu Jan 17 17:16:29 2019 +1100

   glsl: be much more aggressive when skipping shader compilation

we changed the behavior of the shader cache to skip compilation earlier
when we get a cache hit.

After the aforementioned commit, compiling a user program using fp64
would store into the cache an entry for the fp64-funcs shader.
Subsequent compilations of uncached user shaders using fp64 would fail
in compile_fp64_funcs() after finding a cache entry for the fp64-funcs,
but being unprepared to read from the cache.

It's unclear to me how to retrieve the cached NIR of the fp64-funcs (if
it even is cached), so just call _mesa_glsl_compile_shader() with
force_recompile=true in order to ensure we generate the fp64-funcs
successfully.
---
 src/mesa/drivers/dri/i965/brw_program.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index c01143decd0..9ab25cf664c 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -40,6 +40,7 @@
 #include "tnl/tnl.h"
 #include "util/ralloc.h"
 #include "compiler/glsl/ir.h"
+#include "compiler/glsl/program.h"
 #include "compiler/glsl/glsl_to_nir.h"
 #include "compiler/glsl/float64_glsl.h"
 
@@ -87,7 +88,7 @@ compile_fp64_funcs(struct gl_context *ctx,
 
sh->Source = float64_source;
sh->CompileStatus = COMPILE_FAILURE;
-   _mesa_compile_shader(ctx, sh);
+   _mesa_glsl_compile_shader(ctx, sh, false, false, true);
 
if (!sh->CompileStatus) {
   if (sh->InfoLog) {
-- 
2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Thoughts on fp64 for GLES?

2019-01-25 Thread Jason Ekstrand
On Fri, Jan 25, 2019 at 1:53 PM Stéphane Marchesin <
stephane.marche...@gmail.com> wrote:

> On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny  wrote:
> >
> > Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane Marchesin:
> > >
> > > Yes, it's for running virgl on top of GLES. To emulate fp64 in GL on
> > > the guest side, we need fp64 on the host...
> >
> > BTW: we could also get it emulated from the guest side. When Elie (in
> > CC)  initially proposed the fp64 emulation series it was for r600 and
> > TGSI was emitted. The created shaders are horribly long and it is
> > certainly not performant, but if it's just for getting OpenGL 4.0
> > exposed it should be good enough.
>
> Yes, Ilia suggested this on IRC yesterday. My impression is that not
> many applications/games need high performance fp64 (it's likely mostly
> compute stuff, which is not our target). I could be wrong though. If
> anyone knows differently, please tell us :)
>

In our experience, we have yet to see an app actually use the extension.
If we do have such apps, it'd be better to do it in hardware when
available.  However, if it's just so that you can claim support, maybe a
GLES extension isn't worth the bother?  I don't have a particularly strong
opinion at the moment beyond "fp64 sounds like the most non-ES thing ever".


> >
> > I'm not sure though how much work it would be to add this to the soft
> > fp64 as it has now landed for NIR, though.
>
> Yes, with virgl not using NIR, I am not sure how much work soft fp64
> will require.
>

The core of the soft fp64 stuff is a library of GLSL functions which
actually implement it.  We just compile them to NIR and then lower fp64
math to function calls and inline.  You could write a lowering pass in
GLSL, TGSI, or a back-end compiler based on those easily enough.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Thoughts on fp64 for GLES?

2019-01-25 Thread Stéphane Marchesin
On Fri, Jan 25, 2019 at 2:25 AM Gert Wollny  wrote:
>
> Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane Marchesin:
> >
> > Yes, it's for running virgl on top of GLES. To emulate fp64 in GL on
> > the guest side, we need fp64 on the host...
>
> BTW: we could also get it emulated from the guest side. When Elie (in
> CC)  initially proposed the fp64 emulation series it was for r600 and
> TGSI was emitted. The created shaders are horribly long and it is
> certainly not performant, but if it's just for getting OpenGL 4.0
> exposed it should be good enough.

Yes, Ilia suggested this on IRC yesterday. My impression is that not
many applications/games need high performance fp64 (it's likely mostly
compute stuff, which is not our target). I could be wrong though. If
anyone knows differently, please tell us :)

>
> I'm not sure though how much work it would be to add this to the soft
> fp64 as it has now landed for NIR, though.

Yes, with virgl not using NIR, I am not sure how much work soft fp64
will require.

Stéphane


>
> Best,
> Gert
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 02/32] intel/blorp: Use isl_surf_get_image_offset_B_tile_el in ccs_ambiguate

2019-01-25 Thread Nanley Chery
On Fri, Oct 12, 2018 at 01:46:32PM -0500, Jason Ekstrand wrote:
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/blorp/blorp_clear.c | 8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 


Patches 1 and 2 are:
Reviewed-by: Nanley Chery 

> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index 5b575dccc22..dd974df35d2 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -1086,12 +1086,8 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
> }
>  
> uint32_t offset_B, x_offset_el, y_offset_el;
> -   isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
> -_offset_el, _offset_el);
> -   isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb,
> -  surf->aux_surf->row_pitch_B,
> -  x_offset_el, y_offset_el,
> -  _B, _offset_el, _offset_el);
> +   isl_surf_get_image_offset_B_tile_el(surf->aux_surf, level, layer, z,
> +   _B, _offset_el, 
> _offset_el);
> params.dst.addr.offset += offset_B;
>  
> const uint32_t width_px =
> -- 
> 2.19.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/4] nir: Add a new ALU nir_op_imad

2019-01-25 Thread Eric Anholt
Eduardo Lima Mitev  writes:

> ir3 compiler has an integer multiply-add instruction (IMAD_S24)
> that is used for different offset calculations in the backend.
> Since we intend to move some of these calculations to NIR, we need
> a new ALU op that can represent it.
> ---
>  src/compiler/nir/nir_opcodes.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index d32005846a6..b61845fd514 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -754,6 +754,7 @@ def triop_horiz(name, output_size, src1_size, src2_size, 
> src3_size, const_expr):
> [tuint, tuint, tuint], "", const_expr)
>  
>  triop("ffma", tfloat, "src0 * src1 + src2")
> +triop("imad", tint, "src0 * src1 + src2")

The constant-folding expression should be corrected to what the backend
operation actually does, and you should probably call it imad24 or
something in that case.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 1/4] nir: Add a new intrinsic 'load_image_stride'

2019-01-25 Thread Eric Anholt
Eduardo Lima Mitev  writes:

> This is an internal intrinsic intended to be injected by a
> (freedreno-specific) 'lower_sampler_io' pass that will be introduced
> later in this series; and consumed by ir3_compiler_nir.
>
> The intrinsic will load in SSA values for various constants
> for images (image_dims), namely the format's bytes-per-pixel,
> y-stride and z-stride; for which the backend compiler will emit
> the corresponding uniforms.
>
> const_index[0] is the image index, and const_index[1] is the index
> into image_dims' register file for a given image: 0 for bpp, 1 to
> y-stride and 2 for z-stride.

Can you move this documentation of the meaning of the intrinsic into a
comment in the file?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: allow stitching of non-empty block

2019-01-25 Thread Juan A. Suarez Romero
When stitching two blocks A and B, where A's last instruction is a jump,
it is not required that B is empty; it can be plainly removed.

This can happen in a situation like this:

vec1 1 ssa_1 = load_const (true)
vec1 1 ssa_2 = load_const (false)
block block_1:
[...]
loop {
  vec1 ssa_3 = phi block_1: ssa_2, block_4: ssa_1
  if ssa_3 {
block block_2:
[...]
break
  } else {
block block_3:
  }
  vec1 ssa_4 = 
  if ssa_4 {
block block_4:
continue
  } else {
block block_5:
  }
  block block_6:
  [...]
}

And opt_peel_loop_initial_if is applied. In this case, we would be
ending up stitching block_2 (which finalizes with a jump) with
block_4, which is not empty.

CC: Jason Ekstrand 
---
 src/compiler/nir/nir_control_flow.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/compiler/nir/nir_control_flow.c 
b/src/compiler/nir/nir_control_flow.c
index ddba2e55b45..27508f230d6 100644
--- a/src/compiler/nir/nir_control_flow.c
+++ b/src/compiler/nir/nir_control_flow.c
@@ -550,7 +550,6 @@ stitch_blocks(nir_block *before, nir_block *after)
 */
 
if (nir_block_ends_in_jump(before)) {
-  assert(exec_list_is_empty(>instr_list));
   if (after->successors[0])
  remove_phi_src(after->successors[0], after);
   if (after->successors[1])
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mapi: print function declarations for shared glapi

2019-01-25 Thread Emil Velikov
From: Emil Velikov 

Earlier commit aimed to remove unneeded function declarations. Namely
OpenGL entrypoints which are not applicable for OpenGLES*

Although it did not consider the shared glapi which needs all,
including hidden ones. Resulting in warning/errors like the following

../build/src/mapi/shared-glapi/glapi_mapi_tmp.h:26014:15:
error: no previous prototype for ‘shared_dispatch_stub_1414’ 
[-Werror=missing-prototypes]

This patch addressed that.

Cc: Erik Faye-Lund 
Cc Eric Anholt 
Reported-by: Eric Anholt 
Fixes: 6148cce388f ("mapi: drop unneeded gl_dispatch_stub declarations")
Signed-off-by: Emil Velikov 
---
 src/mapi/mapi_abi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mapi/mapi_abi.py b/src/mapi/mapi_abi.py
index 4293cc0c6c2..8b436638d2b 100644
--- a/src/mapi/mapi_abi.py
+++ b/src/mapi/mapi_abi.py
@@ -265,7 +265,7 @@ class ABIPrinter(object):
 if not self.need_entry_point(ent):
 continue
 export = self.api_call if not ent.hidden else ''
-if not ent.hidden:
+if not ent.hidden or not self.lib_need_non_hidden_entries:
 decls.append(self._c_decl(ent, prefix, True, export) + ';')
 
 return "\n".join(decls)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 4/4] radv: add support for push constants inlining when possible

2019-01-25 Thread Samuel Pitoiset
This removes some scalar loads from shaders, but it increases
the number of SET_SH_REG packets. This is currently basic but
it could be improved if needed. Inlining dynamic offsets might
also help.

Original idea from Dave Airlie.

29164 shaders in 15096 tests
Totals:
SGPRS: 1336072 -> 1365241 (2.18 %)
VGPRS: 937784 -> 934592 (-0.34 %)
Spilled SGPRs: 24751 -> 24796 (0.18 %)
Code Size: 50001672 -> 49815524 (-0.37 %) bytes
Max Waves: 208755 -> 208830 (0.04 %)

Totals from affected shaders:
SGPRS: 295018 -> 324187 (9.89 %)
VGPRS: 243108 -> 239916 (-1.31 %)
Spilled SGPRs: 1464 -> 1509 (3.07 %)
Code Size: 8028188 -> 7842040 (-2.32 %) bytes
Max Waves: 69580 -> 69655 (0.11 %)

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c   | 23 +++--
 src/amd/common/ac_shader_abi.h|  4 ++
 src/amd/vulkan/radv_cmd_buffer.c  | 78 ++-
 src/amd/vulkan/radv_nir_to_llvm.c | 54 +
 src/amd/vulkan/radv_shader.h  | 10 ++--
 src/amd/vulkan/radv_shader_info.c |  4 ++
 6 files changed, 145 insertions(+), 28 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f509fc31dff..db1574b5b35 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1392,10 +1392,27 @@ static LLVMValueRef visit_load_push_constant(struct 
ac_nir_context *ctx,
  nir_intrinsic_instr *instr)
 {
LLVMValueRef ptr, addr;
+   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+   unsigned index = nir_intrinsic_base(instr);
 
-   addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
-   addr = LLVMBuildAdd(ctx->ac.builder, addr,
-   get_src(ctx, instr->src[0]), "");
+   addr = LLVMConstInt(ctx->ac.i32, index, 0);
+   addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
+
+   /* Load constant values from user SGPRS when possible, otherwise
+* fallback to the default path that loads directly from memory.
+*/
+   if (LLVMIsConstant(src0) &&
+   index == 0 &&
+   instr->dest.ssa.bit_size == 32) {
+   unsigned offset = LLVMConstIntGetZExtValue(src0) / 4;
+   unsigned count = instr->dest.ssa.num_components;
+
+   if (offset + count <= ctx->abi->num_inline_push_consts) {
+   return ac_build_gather_values(>ac,
+ 
ctx->abi->inline_push_consts + offset,
+ count);
+   }
+   }
 
ptr = ac_build_gep0(>ac, ctx->abi->push_constants, addr);
 
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index ee18e6c1923..704c3d107c2 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -32,6 +32,8 @@ struct nir_variable;
 
 #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
 
+#define AC_MAX_INLINE_PUSH_CONSTS 8
+
 enum ac_descriptor_type {
AC_DESC_IMAGE,
AC_DESC_FMASK,
@@ -66,6 +68,8 @@ struct ac_shader_abi {
 
/* Vulkan only */
LLVMValueRef push_constants;
+   LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+   unsigned num_inline_push_consts;
LLVMValueRef view_index;
 
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index aae90290841..f80e2078da0 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -627,6 +627,23 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
*cmd_buffer,
}
 }
 
+static void
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
+struct radv_pipeline *pipeline,
+gl_shader_stage stage,
+int idx, int count, uint32_t *values)
+{
+   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, 
idx);
+   uint32_t base_reg = pipeline->user_data_0[stage];
+   if (loc->sgpr_idx == -1)
+   return;
+
+   assert(loc->num_sgprs == count);
+
+   radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 
count);
+   radeon_emit_array(cmd_buffer->cs, values, count);
+}
+
 static void
 radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
  struct radv_pipeline *pipeline)
@@ -1900,6 +1917,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_pipeline_layout *layout = pipeline->layout;
struct radv_shader_variant *shader, *prev_shader;
+   bool need_push_constants = false;
unsigned offset;
void *ptr;
uint64_t va;
@@ -1909,37 +1927,55 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
(!layout->push_constant_size && 

[Mesa-dev] [RFC PATCH 1/4] radv: gather more info about push constants

2019-01-25 Thread Samuel Pitoiset
This is needed in order to inline some push constants when possible.
This also adds a new helper for initializing the pass.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c |  2 ++
 src/amd/vulkan/radv_private.h |  2 ++
 src/amd/vulkan/radv_shader.h  |  4 
 src/amd/vulkan/radv_shader_info.c | 32 ++-
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 5048d9d2493..b655e2c2e2c 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3439,6 +3439,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct 
ac_llvm_compiler *ac_llvm,
 
memset(shader_info, 0, sizeof(*shader_info));
 
+   radv_nir_shader_info_init(_info->info);
+
for(int i = 0; i < shader_count; ++i)
radv_nir_shader_info_pass(shaders[i], options, 
_info->info);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 85c18906f84..4c76521a045 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1941,6 +1941,8 @@ void radv_nir_shader_info_pass(const struct nir_shader 
*nir,
   const struct radv_nir_compiler_options *options,
   struct radv_shader_info *info);
 
+void radv_nir_shader_info_init(struct radv_shader_info *info);
+
 struct radeon_winsys_sem;
 
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)\
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3652a811e80..0f049f9a528 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -162,6 +162,10 @@ struct radv_streamout_info {
 
 struct radv_shader_info {
bool loads_push_constants;
+   uint8_t min_push_constant_used;
+   uint8_t max_push_constant_used;
+   bool has_32bit_push_constants;
+   bool has_indirect_push_constants;
uint32_t desc_set_used_mask;
bool needs_multiview_view_index;
bool uses_invocation_id;
diff --git a/src/amd/vulkan/radv_shader_info.c 
b/src/amd/vulkan/radv_shader_info.c
index 7e5a3789af2..c9cd5fddc53 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -190,6 +190,30 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
}
 }
 
+static void
+gather_push_constant_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
+{
+   nir_const_value *cval = nir_src_as_const_value(instr->src[0]);
+
+   if (!cval)
+   info->has_indirect_push_constants = true;
+
+   if (instr->dest.ssa.bit_size == 32)
+   info->has_32bit_push_constants = true;
+
+   int base = nir_intrinsic_base(instr);
+   int range = nir_intrinsic_range(instr);
+
+   if (base + range > info->max_push_constant_used)
+   info->max_push_constant_used = base + range;
+   if (base < info->min_push_constant_used)
+   info->min_push_constant_used = base;
+
+   info->loads_push_constants = true;
+}
+
 static void
 gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
  struct radv_shader_info *info)
@@ -243,7 +267,7 @@ gather_intrinsic_info(const nir_shader *nir, const 
nir_intrinsic_instr *instr,
info->uses_prim_id = true;
break;
case nir_intrinsic_load_push_constant:
-   info->loads_push_constants = true;
+   gather_push_constant_info(nir, instr, info);
break;
case nir_intrinsic_vulkan_resource_index:
info->desc_set_used_mask |= (1 << 
nir_intrinsic_desc_set(instr));
@@ -504,6 +528,12 @@ gather_xfb_info(const nir_shader *nir, struct 
radv_shader_info *info)
ralloc_free(xfb);
 }
 
+void
+radv_nir_shader_info_init(struct radv_shader_info *info)
+{
+   info->min_push_constant_used = -1;
+}
+
 void
 radv_nir_shader_info_pass(const struct nir_shader *nir,
  const struct radv_nir_compiler_options *options,
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 3/4] radv: keep track of the number of remaining user SGPRs

2019-01-25 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index b655e2c2e2c..11417c5991b 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -588,6 +588,7 @@ set_loc_desc(struct radv_shader_context *ctx, int idx, 
uint8_t *sgpr_idx)
 struct user_sgpr_info {
bool need_ring_offsets;
bool indirect_all_descriptor_sets;
+   uint8_t remaining_sgprs;
 };
 
 static bool needs_view_index_sgpr(struct radv_shader_context *ctx,
@@ -701,6 +702,9 @@ static void allocate_user_sgprs(struct radv_shader_context 
*ctx,
 
if (remaining_sgprs < num_desc_set) {
user_sgpr_info->indirect_all_descriptor_sets = true;
+   user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
+   } else {
+   user_sgpr_info->remaining_sgprs = remaining_sgprs - 
num_desc_set;
}
 }
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 2/4] radv: gather if shaders load dynamic offsets separately

2019-01-25 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_shader.h  | 1 +
 src/amd/vulkan/radv_shader_info.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 0f049f9a528..09bc5c2d4a9 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -162,6 +162,7 @@ struct radv_streamout_info {
 
 struct radv_shader_info {
bool loads_push_constants;
+   bool loads_dynamic_offsets;
uint8_t min_push_constant_used;
uint8_t max_push_constant_used;
bool has_32bit_push_constants;
diff --git a/src/amd/vulkan/radv_shader_info.c 
b/src/amd/vulkan/radv_shader_info.c
index c9cd5fddc53..cabe2a470a0 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -545,6 +545,7 @@ radv_nir_shader_info_pass(const struct nir_shader *nir,
if (options->layout && options->layout->dynamic_offset_count &&
(options->layout->dynamic_shader_stages & 
mesa_to_vk_shader_stage(nir->info.stage))) {
info->loads_push_constants = true;
+   info->loads_dynamic_offsets = true;
}
 
nir_foreach_variable(variable, >inputs)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] etnaviv: hook up linear texture sampling support

2019-01-25 Thread Lucas Stach
Hi Christian,

Am Montag, den 21.01.2019, 07:50 +0100 schrieb Christian Gmeiner:
> If the GPU supports linear sampling, linear addressing mode
> will be used as default.
> 
> > Signed-off-by: Christian Gmeiner 
> ---
>  src/gallium/drivers/etnaviv/etnaviv_resource.c | 10 +++---
>  src/gallium/drivers/etnaviv/etnaviv_texture.c  |  4 +++-
>  2 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
> b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> index 9a7ebf3064e..7d24b1f03bd 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> @@ -318,9 +318,9 @@ etna_resource_create(struct pipe_screen *pscreen,
>  {
> struct etna_screen *screen = etna_screen(pscreen);
>  
> -   /* Figure out what tiling and address mode to use -- for now, assume that
> -* texture cannot be linear. there is a capability LINEAR_TEXTURE_SUPPORT
> -* (supported on gc880 and gc2000 at least), but not sure how it works.
> +   /* Figure out what tiling and address mode to use.
> +* Textures are TILED or LINEAR. If LINEAR_TEXTURE_SUPPORT capability is
> +* available LINEAR gets prefered.
>  * Buffers always have LINEAR layout.
>  */
> unsigned layout = ETNA_LAYOUT_LINEAR;
> @@ -334,6 +334,10 @@ etna_resource_create(struct pipe_screen *pscreen,
>  
>    if (util_format_is_compressed(templat->format))
>   layout = ETNA_LAYOUT_LINEAR;
> +  else if (VIV_FEATURE(screen, chipMinorFeatures1, 
> LINEAR_TEXTURE_SUPPORT)) {
> + layout = ETNA_LAYOUT_LINEAR;
> + mode = ETNA_ADDRESSING_MODE_LINEAR;
> +  }
> } else if (templat->target != PIPE_BUFFER) {
>    bool want_multitiled = false;
>    bool want_supertiled = screen->specs.can_supertile;
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_texture.c 
> b/src/gallium/drivers/etnaviv/etnaviv_texture.c
> index 3993e31cec1..b06f20531fd 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_texture.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_texture.c
> @@ -172,7 +172,9 @@ etna_resource_sampler_compatible(struct etna_resource 
> *res)
> if (res->layout == ETNA_LAYOUT_SUPER_TILED && VIV_FEATURE(screen, 
> chipMinorFeatures2, SUPERTILED_TEXTURE))
>    return true;
>  
> -   /* TODO: LINEAR_TEXTURE_SUPPORT */
> +   /* This GPU supports texturing from linear textures? */
> +   if (res->layout == ETNA_LAYOUT_LINEAR && VIV_FEATURE(screen, 
> chipMinorFeatures1, LINEAR_TEXTURE_SUPPORT))
> +  return true;

In order to avoid this sitting on the ML for too long while mostly
looking good: how would you feel about squashing this last hunk into
the previous patch and dropping the other parts of this patch for now?

This way we could have most of the stuff in master and do further
experiments about the performance implications later on. If you want to
do this, patches 1-3 are:

Reviewed-by: Lucas Stach 

Regards,
Lucas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/4] nir: Add a new ALU nir_op_imad

2019-01-25 Thread Ilia Mirkin
On Fri, Jan 25, 2019 at 10:58 AM Ilia Mirkin  wrote:
>
> IMAD_S24 isn't src0 * src1 + src2 though. I think this could be called
> imad24, which I suspect exits on many GPUs (nv50-era NVIDIA definitely
> had this, and I think maxwell+ has a variant of this implemented by
> XMAD):
>
> (src0 * src1) & 0xff + src2

And of course even that's wrong... the 24th bit has to get
sign-extended on that. Can express it with shifts.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/4] nir: Add a new ALU nir_op_imad

2019-01-25 Thread Ilia Mirkin
IMAD_S24 isn't src0 * src1 + src2 though. I think this could be called
imad24, which I suspect exits on many GPUs (nv50-era NVIDIA definitely
had this, and I think maxwell+ has a variant of this implemented by
XMAD):

(src0 * src1) & 0xff + src2

Cheers,

  -ilia

On Fri, Jan 25, 2019 at 10:49 AM Eduardo Lima Mitev  wrote:
>
> ir3 compiler has an integer multiply-add instruction (IMAD_S24)
> that is used for different offset calculations in the backend.
> Since we intend to move some of these calculations to NIR, we need
> a new ALU op that can represent it.
> ---
>  src/compiler/nir/nir_opcodes.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index d32005846a6..b61845fd514 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -754,6 +754,7 @@ def triop_horiz(name, output_size, src1_size, src2_size, 
> src3_size, const_expr):
> [tuint, tuint, tuint], "", const_expr)
>
>  triop("ffma", tfloat, "src0 * src1 + src2")
> +triop("imad", tint, "src0 * src1 + src2")
>
>  triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 0/4] freedreno: Move some compiler offset computations to NIR

2019-01-25 Thread Eduardo Lima Mitev
There are a bunch of instructions emitted on ir3_compiler_nir related to
offset computations for IO opcodes (ssbo, image, etc). This small series
explores the possibility of moving these instructions to NIR, where we
have higher chances of optimizing them.

The series introduces a new, freedreno specific NIR pass,
'ir3_nir_lower_sampler_io' (final name not set). The pass is executed
early on ir3_optimize_nir(), and the goal is to centralize all these
computations there, hoping that later NIR passes will produce better
code than what is currently emitted.

So far, we have just implemented byte-offset computation for image store
and atomics. This seemed like a good first target given the amount of
instructions being emitted for it by the backend.

This is an RFC series because there are a few open questions, but we
wanted to gather feedback already now, in case this effort is something
not worth it; and also hoping that somebody else will give it a try
against other shaders and on other gens (we have just tried this on
a5xx).

* We have so far been unable to see any improvement in generated code
(not a penalty either). shader-db has not been specially useful. Few
shaders there exercise image store or image atomic ops, and of those
that do, most require higher versions of GLSL than what freedreno
supports, so they get skipped. The few that do actually run, don't
show any meaningful difference.

Then other shaders picked from tests suites are simple enough not to
produce any difference in code either.

There is still on-going work looking for cases where the pass helps
instruction stats, whether writing custom shaders or porting complex
shader from shader-db to run on GLES 310.

There is though an open question here as to whether moving backend
code to NIR is a benefit in and of itself.

* The series adds a nir_op_imad opcode that didn't exist before, and
perhaps not generally useful even for freedreno outside this pass,
because it maps to IR3_MAD_S24 which is probably not suitable for
generic integer multiply-add.

* The pass currently has 2 alternative code-paths to emit the
multiplication by the bytes-per-pixel of an image format. In one
case, since this value can be obtained at compile time, it is
emitted as an immediate by nir_imul_imm. The other alternative is
emitting an nir_imul with an SSA value that will map to
image_dims[0] at shader runtime.

The former case is uglier but produces better code (a single SHL
instruction), whereas the latter involves a generic imul, for which
the backend emits a lot of code to cover for overflow.

The doubt here is whether we should introduce a (lower precision)
version of imul that maps directly to IR3_IMUL_S.


A live (WIP) tree of the series can be found at:


We plan to continue moving computations to the pass if we see
good opportunities.

Feedback very welcome,

cheers,
Eduardo

Eduardo Lima Mitev (4):
  nir: Add a new intrinsic 'load_image_stride'
  nir: Add a new ALU nir_op_imad
  ir3/nir: Add a new pass 'ir3_nir_lower_sampler_io'
  ir3: Use ir3_nir_lower_sampler_io pass

 src/compiler/nir/nir_intrinsics.py   |   2 +
 src/compiler/nir/nir_opcodes.py  |   1 +
 src/freedreno/Makefile.sources   |   1 +
 src/freedreno/ir3/ir3_compiler_nir.c |  61 ++--
 src/freedreno/ir3/ir3_nir.c  |   1 +
 src/freedreno/ir3/ir3_nir.h  |   1 +
 src/freedreno/ir3/ir3_nir_lower_sampler_io.c | 349 +++
 7 files changed, 383 insertions(+), 33 deletions(-)
 create mode 100644 src/freedreno/ir3/ir3_nir_lower_sampler_io.c

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 4/4] ir3: Use ir3_nir_lower_sampler_io pass

2019-01-25 Thread Eduardo Lima Mitev
This effectively removes all offset calculations in
ir3_compiler_nir::get_image_offset().

No regressions observed on affected tests from Khronos CTS and piglit
suites, compared to master.

Collecting useful stats on helps/hurts caused by this pass is WIP. Very
few shaders in shader-db data-base exercise image store or image
atomic ops, and of those that do, most require higher versions of
GLSL than what freedreno supports, so they get skipped.

There is on-going work writing/porting shaders to collect useful
stats. So far, all tested show no meaningful difference compared
to master.
---
 src/freedreno/ir3/ir3_compiler_nir.c | 61 +---
 src/freedreno/ir3/ir3_nir.c  |  1 +
 2 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c 
b/src/freedreno/ir3/ir3_compiler_nir.c
index fd641735620..fe329db658c 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -548,6 +548,9 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
ir3_MADSH_M16(b, src[0], 0, src[1], 0,
ir3_MULL_U(b, src[0], 0, 
src[1], 0), 0), 0);
break;
+   case nir_op_imad:
+   dst[0] = ir3_MAD_S24(b, src[0], 0, src[1], 0, src[2], 0);
+   break;
case nir_op_ineg:
dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
break;
@@ -1172,44 +1175,19 @@ get_image_type(const nir_variable *var)
 
 static struct ir3_instruction *
 get_image_offset(struct ir3_context *ctx, const nir_variable *var,
-   struct ir3_instruction * const *coords, bool byteoff)
+   struct ir3_instruction * const *coords)
 {
struct ir3_block *b = ctx->block;
-   struct ir3_instruction *offset;
-   unsigned ncoords = get_image_coords(var, NULL);
-
-   /* to calculate the byte offset (yes, uggg) we need (up to) three
-* const values to know the bytes per pixel, and y and z stride:
-*/
-   unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-   ctx->so->const_layout.image_dims.off[var->data.driver_location];
 
debug_assert(ctx->so->const_layout.image_dims.mask &
(1 << var->data.driver_location));
 
-   /* offset = coords.x * bytes_per_pixel: */
-   offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0);
-   if (ncoords > 1) {
-   /* offset += coords.y * y_pitch: */
-   offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0,
-   coords[1], 0, offset, 0);
-   }
-   if (ncoords > 2) {
-   /* offset += coords.z * z_pitch: */
-   offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0,
-   coords[2], 0, offset, 0);
-   }
-
-   if (!byteoff) {
-   /* Some cases, like atomics, seem to use dword offset instead
-* of byte offsets.. blob just puts an extra shr.b in there
-* in those cases:
-*/
-   offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-   }
-
+   /* ir3_nir_lower_sampler_io pass should have placed the final
+* byte-offset (or dword offset for atomics) at the 4th component
+* of the coordinate vector.
+*/
return ir3_create_collect(ctx, (struct ir3_instruction*[]){
-   offset,
+   coords[3],
create_immed(b, 0),
}, 2);
 }
@@ -1341,7 +1319,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
 * src2 is 64b byte offset
 */
 
-   offset = get_image_offset(ctx, var, coords, true);
+   offset = get_image_offset(ctx, var, coords);
 
/* NOTE: stib seems to take byte offset, but stgb.typed can be used
 * too and takes a dword offset.. not quite sure yet why blob uses
@@ -1443,7 +1421,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
 */
src0 = ir3_get_src(ctx, >src[3])[0];
src1 = ir3_create_collect(ctx, coords, ncoords);
-   src2 = get_image_offset(ctx, var, coords, false);
+   src2 = get_image_offset(ctx, var, coords);
 
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_atomic_add:
@@ -1612,6 +1590,23 @@ emit_intrinsic(struct ir3_context *ctx, 
nir_intrinsic_instr *intr)
}
 
switch (intr->intrinsic) {
+   case nir_intrinsic_load_image_stride: {
+   idx = intr->const_index[0];
+
+   /* this is the index into image_dims offsets, which can take
+* values 0, 1 or 2 (bpp, y-stride, z-stride respectively).
+*/
+   uint8_t off = intr->const_index[1];
+   debug_assert(off <= 2);
+
+   unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
+  

[Mesa-dev] [RFC 1/4] nir: Add a new intrinsic 'load_image_stride'

2019-01-25 Thread Eduardo Lima Mitev
This is an internal intrinsic intended to be injected by a
(freedreno-specific) 'lower_sampler_io' pass that will be introduced
later in this series; and consumed by ir3_compiler_nir.

The intrinsic will load in SSA values for various constants
for images (image_dims), namely the format's bytes-per-pixel,
y-stride and z-stride; for which the backend compiler will emit
the corresponding uniforms.

const_index[0] is the image index, and const_index[1] is the index
into image_dims' register file for a given image: 0 for bpp, 1 to
y-stride and 2 for z-stride.
---
 src/compiler/nir/nir_intrinsics.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index a5cc3f7401c..169c835d746 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -590,6 +590,8 @@ load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], 
[CAN_ELIMINATE])
 load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
 # src[] = { offset }. const_index[] = { base, range }
 load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
+# src[] = { offset }. const_index[] = { image_idx, dim_idx }
+load("image_stride", 1, [], [CAN_REORDER])
 
 # Stores work the same way as loads, except now the first source is the value
 # to store and the second (and possibly third) source specify where to store
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 3/4] ir3/nir: Add a new pass 'ir3_nir_lower_sampler_io'

2019-01-25 Thread Eduardo Lima Mitev
This pass moves to NIR some offset calculations that are currently
implemented on the backend compiler, to allow NIR to possibly
optimize them.

For now, only coordinate byte-offset calculations for imageStore
and image atomic operations are implemented.
---
 src/freedreno/Makefile.sources   |   1 +
 src/freedreno/ir3/ir3_nir.h  |   1 +
 src/freedreno/ir3/ir3_nir_lower_sampler_io.c | 349 +++
 3 files changed, 351 insertions(+)
 create mode 100644 src/freedreno/ir3/ir3_nir_lower_sampler_io.c

diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources
index 7fea9de39ef..fd4f7f294cd 100644
--- a/src/freedreno/Makefile.sources
+++ b/src/freedreno/Makefile.sources
@@ -31,6 +31,7 @@ ir3_SOURCES := \
ir3/ir3_legalize.c \
ir3/ir3_nir.c \
ir3/ir3_nir.h \
+   ir3/ir3_nir_lower_sampler_io.c \
ir3/ir3_nir_lower_tg4_to_tex.c \
ir3/ir3_print.c \
ir3/ir3_ra.c \
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index 74201d34160..52809ba099e 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -36,6 +36,7 @@ void ir3_nir_scan_driver_consts(nir_shader *shader, struct 
ir3_driver_const_layo
 
 bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
 bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
+bool ir3_nir_lower_sampler_io(nir_shader *shader);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct 
ir3_compiler *compiler);
 bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
diff --git a/src/freedreno/ir3/ir3_nir_lower_sampler_io.c 
b/src/freedreno/ir3/ir3_nir_lower_sampler_io.c
new file mode 100644
index 000..e2910d8906d
--- /dev/null
+++ b/src/freedreno/ir3/ir3_nir_lower_sampler_io.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright © 2018 Igalia S.L.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "ir3_nir.h"
+#include "compiler/nir/nir_builder.h"
+
+/**
+ * The goal of this pass is to move to NIR some offset calculations for
+ * different I/O that are currently implemented on the backend compiler,
+ * to allow NIR to possibly optimize them.
+ *
+ * Currently, only offset calculations for image store and image
+ * atomic operations are implemented.
+ */
+
+
+/* This flag enables/disables a code-path where the bytes-per-pixel of
+ * an image is obtained directly from the format, which is known
+ * at shader compile time; as opposed to using image_dims[0] constant
+ * available only at shader runtime.
+ *
+ * Inlining the bytes-per-pixel here as an immediate has the advantage
+ * that it gets converted to a single (SHL) instruction (because all
+ * possible values are powers of two); while loading it as a uniform and
+ * emitting an IMUL will cause the backend to expand it to quite a few
+ * instructions (see ir3_compiler_nir for imul), thus ultimately hurting
+ * instruction count.
+ */
+#define INLINE_BPP 1
+
+
+static bool
+intrinsic_is_image_atomic(unsigned intrinsic)
+{
+   switch (intrinsic) {
+   case nir_intrinsic_image_deref_atomic_add:
+   case nir_intrinsic_image_deref_atomic_min:
+   case nir_intrinsic_image_deref_atomic_max:
+   case nir_intrinsic_image_deref_atomic_and:
+   case nir_intrinsic_image_deref_atomic_or:
+   case nir_intrinsic_image_deref_atomic_xor:
+   case nir_intrinsic_image_deref_atomic_exchange:
+   case nir_intrinsic_image_deref_atomic_comp_swap:
+   return true;
+   default:
+   break;
+   }
+
+   return false;
+}
+
+static bool
+intrinsic_is_image_store_or_atomic(unsigned intrinsic)
+{
+   if (intrinsic == nir_intrinsic_image_deref_store)
+   return true;
+   else
+   return intrinsic_is_image_atomic(intrinsic);
+}
+
+/*
+ * FIXME: shamelessly copied from ir3_compiler_nir until it gets factorized
+ * out at some point.
+ */

[Mesa-dev] [RFC 2/4] nir: Add a new ALU nir_op_imad

2019-01-25 Thread Eduardo Lima Mitev
ir3 compiler has an integer multiply-add instruction (IMAD_S24)
that is used for different offset calculations in the backend.
Since we intend to move some of these calculations to NIR, we need
a new ALU op that can represent it.
---
 src/compiler/nir/nir_opcodes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index d32005846a6..b61845fd514 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -754,6 +754,7 @@ def triop_horiz(name, output_size, src1_size, src2_size, 
src3_size, const_expr):
[tuint, tuint, tuint], "", const_expr)
 
 triop("ffma", tfloat, "src0 * src1 + src2")
+triop("imad", tint, "src0 * src1 + src2")
 
 triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #9850 completed

2019-01-25 Thread AppVeyor


Build mesa 9850 completed



Commit 65b8d723fd by Jose Fonseca on 1/25/2019 2:07 PM:

appveyor: Revert commits adding Cygwin support.\n\nThis reverts commits 00ad77b9f683e561b1ac45fbb89eb2bafe45c8c6 and\n5334dafee265d78abdfcf30e2c693e0791bfecf5.\n\nThis avoids Appveyor build breakage due to Cygwin, but more importantly,\nthere are several problems with these patches, as highlighted to my\nrecent mesa-dev mail.  So better to revert for now, and pursue Cygwin\nsupport after these have been address.


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] appveyor: Add a Cygwin build script

2019-01-25 Thread Jose Fonseca
I just noticed these patches, because Appveyor build is broken, and to 
my surprise, on Cygwin.


There are several problems with these patches:

- First of all, there were commited with no reviewed by.  Not by me (a 
quick `git log appveyor.yml` would tell you I pretty much wrote and 
maitain it).  But worse of all, there's no review-by from anybody!  This 
is not a trivial change neither.  Honestly, I'm disappointed by these 
stealth commits.  I don't have time to keep up with mesa-dev threads, 
but I'm not MIA, and I've been responsive to all emails I've been CCed.


- Are you responsible for fixing the cygwin build? Somebody needs to 
ensure Cygwin build stays in good health.  Systematic failures of Cygwin 
will cause the mesa-dev list to be spammed, and other developers to 
become numb to failures. So we can't allow to happen.  On  the other 
hand, I'm not familiar with how mesa is supposed to build work on 
Cygwin, so I can't take than upon myself.


- The batch files are broken.  Cmd.exe will not stop processing a batch 
file when a error happens (appveyor does for comands started from 
.yml.)  so the scripts\appveyor_msvc.bat batch can fail one of the early 
commands, and the exit code won't propagate to appveyor.)


As it stands I'm about to revert these two changes.  We can bring cygwin 
support back once the above issues have been addresed.


Jose


On 14/12/2018 19:20, Jon Turney wrote:

Signed-off-by: Jon Turney 
---
  appveyor.yml| 19 +-
  scripts/appveyor_cygwin.bat | 40 +
  2 files changed, 54 insertions(+), 5 deletions(-)
  create mode 100644 scripts/appveyor_cygwin.bat

diff --git a/appveyor.yml b/appveyor.yml
index 0ec3a1e7bfe..9c6e5acd370 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -38,6 +38,9 @@ cache:
  - '%LOCALAPPDATA%\pip\Cache -> appveyor.yml'
  - win_flex_bison-2.5.15.zip
  - llvm-5.0.1-msvc2017-mtd.7z
+- C:\pkgcache
+- 'C:\cygwin64\home\%USERNAME%\.ccache'
+- 'C:\cygwin\home\%USERNAME%\.ccache'
  
  os: Visual Studio 2017
  
@@ -47,17 +50,23 @@ init:

  - git config --global core.autocrlf true
  
  environment:

-  WINFLEXBISON_VERSION: 2.5.15
-  LLVM_ARCHIVE: llvm-5.0.1-msvc2017-mtd.7z
+  matrix:
+  - compiler: msvc
+WINFLEXBISON_VERSION: 2.5.15
+LLVM_ARCHIVE: llvm-5.0.1-msvc2017-mtd.7z
+  - compiler: cygwin
+arch: x64
  
  install:

-- call scripts\appveyor_msvc.bat install
+- call scripts\appveyor_%compiler%.bat install
  
  build_script:

-- call scripts\appveyor_msvc.bat build_script
+- call scripts\appveyor_%compiler%.bat build_script
  
  after_build:

-- call scripts\appveyor_msvc.bat after_build
+- call scripts\appveyor_%compiler%.bat after_build
+
+test: off
  
  # It's possible to setup notification here, as described in

  # 
https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.appveyor.com%2Fdocs%2Fnotifications%23appveyor-yml-configurationdata=02%7C01%7Cjfonseca%40vmware.com%7C7e9e0b630a3e44c0101708d661f97a40%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C636804121479341361sdata=0H2uZbwA9%2BFlkyDQtcsjXjbEQtthAOGfxh%2BfcCS%2Bd8M%3Dreserved=0
 , but
diff --git a/scripts/appveyor_cygwin.bat b/scripts/appveyor_cygwin.bat
new file mode 100644
index 000..831270b8cd3
--- /dev/null
+++ b/scripts/appveyor_cygwin.bat
@@ -0,0 +1,40 @@
+set PKGCACHE=C:\pkgcache
+set CYGWIN_MIRROR=http://cygwin.mirror.constant.com
+
+if _%arch%_ == _x64_ set SETUP=setup-x86_64.exe && set CYGWIN_ROOT=C:\cygwin64
+if _%arch%_ == _x86_ set SETUP=setup-x86.exe && set CYGWIN_ROOT=C:\cygwin
+
+set PATH=%CYGWIN_ROOT%\bin;%SYSTEMROOT%\system32
+
+goto %1
+
+:install
+echo Updating Cygwin and installing build prerequsites
+%CYGWIN_ROOT%\%SETUP% -qnNdO -R "%CYGWIN_ROOT%" -s "%CYGWIN_MIRROR%" -l 
"%PKGCACHE%" -g -P ^
+bison,^
+ccache,^
+flex,^
+glproto,^
+libX11-devel,^
+libX11-xcb-devel,^
+libXdamage-devel,^
+libXext-devel,^
+libXfixes-devel,^
+libexpat-devel,^
+libllvm-devel,^
+libxcb-dri2-devel,^
+libxcb-glx-devel,^
+libxcb-xfixes-devel,^
+meson,^
+ninja,^
+python3-mako,^
+zlib-devel
+goto :eof
+
+:build_script
+bash -lc "cd $APPVEYOR_BUILD_FOLDER; meson _build -Degl=false --wrap-mode=nofallback 
&& ninja -C _build"
+goto :eof
+
+:after_build
+bash -lc "cd $APPVEYOR_BUILD_FOLDER; ninja -C _build test"
+goto :eof



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] kmsro: Add etnaviv renderonly support

2019-01-25 Thread Rob Herring
On Thu, Jan 24, 2019 at 7:21 PM Eric Anholt  wrote:
>
> Rob Herring  writes:
>
> > Enable using etnaviv for KMS renderonly. This still needs KMS driver
> > name mapping to kmsro to be used automatically.
> >
> > Signed-off-by: Rob Herring 
>
> > diff --git a/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c 
> > b/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
> > index 4448150cc0c6..e086c0858f05 100644
> > --- a/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
> > +++ b/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
> > @@ -27,6 +27,7 @@
> >
> >  #include "kmsro_drm_public.h"
> >  #include "vc4/drm/vc4_drm_public.h"
> > +#include "etnaviv/drm/etnaviv_drm_public.h"
> >  #include "xf86drm.h"
> >
> >  #include "pipe/p_screen.h"
> > @@ -34,22 +35,39 @@
> >
> >  struct pipe_screen *kmsro_drm_screen_create(int fd)
> >  {
> > +   struct pipe_screen *screen = NULL;
> > struct renderonly ro = {
> > +  .kms_fd = fd,
> > +  .gpu_fd = -1,
> > +   };
> > +
> > +#if defined(GALLIUM_VC4)
> > +   ro.gpu_fd = drmOpenWithType("vc4", NULL, DRM_NODE_RENDER);
> > +   if (ro.gpu_fd >= 0) {
> >/* Passes the vc4-allocated BO through to the KMS-only DRM device 
> > using
> > * PRIME buffer sharing.  The VC4 BO must be linear, which the 
> > SCANOUT
> > * flag on allocation will have ensured.
> > */
> > -  .create_for_resource = renderonly_create_gpu_import_for_resource,
> > -  .kms_fd = fd,
> > -  .gpu_fd = drmOpenWithType("vc4", NULL, DRM_NODE_RENDER),
> > -   };
> > +  ro.create_for_resource = renderonly_create_gpu_import_for_resource,
> > +  screen = vc4_drm_screen_create_renderonly();
> > +  if (!screen)
> > + close(ro.gpu_fd);
> > +
> > +  return screen;
> > +   }
> > +#endif
> >
> > -   if (ro.gpu_fd < 0)
> > -  return NULL;
> > +#if defined(GALLIUM_ETNAVIV)
> > +   ro.gpu_fd = drmOpenWithType("etnaviv", NULL, DRM_NODE_RENDER);
> > +   if (ro.gpu_fd >= 0) {
> > +  ro.create_for_resource = 
> > renderonly_create_kms_dumb_buffer_for_resource,
> > +  screen = etna_drm_screen_create_renderonly();
> > +  if (!screen)
> > + close(ro.gpu_fd);
> >
> > -   struct pipe_screen *screen = vc4_drm_screen_create_renderonly();
> > -   if (!screen)
> > -  close(ro.gpu_fd);
> > +  return screen;
> > +   }
> > +#endif
>
> Would it make more sense to open the first render node once, then check
> if its name matches any of the drivers we support and calling their
> setup function?

That would be more efficient. The downside is if you did have more
than one render node, then which device you get is less predictable as
it would be depending on probe order. As-is, we can handle at least
simple prioritization of drivers to use.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #9849 failed

2019-01-25 Thread AppVeyor



Build mesa 9849 failed


Commit 540939ecee by Tapani Pälli on 1/25/2019 7:51 AM:

android: fix build issues with libmesa_anv_gen* libraries\n\nWe need this include path to find nir/nir_xfb_info.h.\n\nSigned-off-by: Tapani Pälli \nReviewed-by: Eric Engestrom 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109183] GPU Hangs randomly with GTA V

2019-01-25 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109183

--- Comment #3 from Alexander  ---
I can't confirm that it's mesa who is doing this and can only provide spongy
info about it, but this only happens in online mode and just anyway proton has
a hard time with the online mode.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Add a file-level description of brw_eu_validate.c

2019-01-25 Thread Chema Casanova
Acked-by: Jose Maria Casanova Crespo 

Matt, I'll include in my TODO list creating tests for sends using grf127
as destination restriction and the one about byte_raw_moves and
execution size and stride once I receive your feedback.

Chema

El 24/1/19 a las 20:53, Matt Turner escribió:
> ---
>  src/intel/compiler/brw_eu_validate.c | 14 +-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/compiler/brw_eu_validate.c 
> b/src/intel/compiler/brw_eu_validate.c
> index a25010b225c..7f1580a5bb3 100644
> --- a/src/intel/compiler/brw_eu_validate.c
> +++ b/src/intel/compiler/brw_eu_validate.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright © 2015 Intel Corporation
> + * Copyright © 2015-2019 Intel Corporation
>   *
>   * Permission is hereby granted, free of charge, to any person obtaining a
>   * copy of this software and associated documentation files (the "Software"),
> @@ -24,6 +24,18 @@
>  /** @file brw_eu_validate.c
>   *
>   * This file implements a pass that validates shader assembly.
> + *
> + * The restrictions implemented herein are intended to verify that 
> instructions
> + * in shader assembly do not violate restrictions documented in the graphics
> + * programming reference manuals.
> + *
> + * The restrictions are difficult for humans to quickly verify due to their
> + * complexity and abundance.
> + *
> + * It is critical that this code is thoroughly unit tested because false
> + * results it will lead developers astray, which is worse than having no
> + * validator at all. Patches to this file without corresponding unit tests 
> (in
> + * test_eu_validate.cpp) will be rejected.
>   */
>  
>  #include "brw_eu.h"
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Thoughts on fp64 for GLES?

2019-01-25 Thread Gert Wollny
Am Donnerstag, den 24.01.2019, 22:25 -0800 schrieb Stéphane Marchesin:
> 
> Yes, it's for running virgl on top of GLES. To emulate fp64 in GL on
> the guest side, we need fp64 on the host...

BTW: we could also get it emulated from the guest side. When Elie (in
CC)  initially proposed the fp64 emulation series it was for r600 and
TGSI was emitted. The created shaders are horribly long and it is
certainly not performant, but if it's just for getting OpenGL 4.0
exposed it should be good enough.

I'm not sure though how much work it would be to add this to the soft
fp64 as it has now landed for NIR, though. 

Best, 
Gert 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: add toggle for TLS support in GLX

2019-01-25 Thread Patrick Steinhardt
The musl libc library does not have any support for the
"initial-exec" TLS model and is unlikely to ever implement it.
Thus, TLS support in GLX has been turned off in musl-based
distributions to work around problems when dlopen'ing drivers.
While this is easily possible using the autoconf build system by
passing `--disable-glx-tls`, meson does not yet have such an
option.

Add a new toggle "glx-tls" that defaults to `true` to gain parity
with autoconf. If disabled, `GLX_USE_TLS` will not be defined and
thus mesa will be built without TLS support.

Signed-off-by: Patrick Steinhardt 
---
 meson.build   | 6 +-
 meson_options.txt | 6 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 34e2a03254..3981bf1a67 100644
--- a/meson.build
+++ b/meson.build
@@ -50,6 +50,7 @@ with_tests = get_option('build-tests')
 with_valgrind = get_option('valgrind')
 with_libunwind = get_option('libunwind')
 with_asm = get_option('asm')
+with_glx_tls = get_option('glx-tls')
 with_glx_read_only_text = get_option('glx-read-only-text')
 with_glx_direct = get_option('glx-direct')
 with_osmesa = get_option('osmesa')
@@ -332,7 +333,10 @@ if with_egl and not (with_platform_drm or 
with_platform_surfaceless)
   endif
 endif
 
-pre_args += '-DGLX_USE_TLS'
+if with_glx_tls
+  pre_args += '-DGLX_USE_TLS'
+endif
+
 if with_glx != 'disabled'
   if not (with_platform_x11 and with_any_opengl)
 error('Cannot build GLX support without X11 platform support and at least 
one OpenGL API')
diff --git a/meson_options.txt b/meson_options.txt
index bfb06c4dd4..8ff63f20b4 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -224,6 +224,12 @@ option(
   value : true,
   description : 'Build assembly code if possible'
 )
+option(
+  'glx-tls',
+  type : 'boolean',
+  value : true,
+  description : 'Enable TLS support in GLX'
+)
 option(
'glx-read-only-text',
type : 'boolean',
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix computing number of user SGPRs for streamout buffers

2019-01-25 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Wed, Jan 23, 2019 at 10:26 AM Samuel Pitoiset
 wrote:
>
> Streamout buffers are emitted like push constants.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index 40812fa7ffb..7f1aa17b0d5 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -701,6 +701,9 @@ static void allocate_user_sgprs(struct 
> radv_shader_context *ctx,
> if (ctx->shader_info->info.loads_push_constants)
> user_sgpr_count++;
>
> +   if (ctx->streamout_buffers)
> +   user_sgpr_count++;
> +
> uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage 
> != MESA_SHADER_COMPUTE ? 32 : 16;
> uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
> uint32_t num_desc_set =
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Add a file-level description of brw_eu_validate.c

2019-01-25 Thread Iago Toral
On Thu, 2019-01-24 at 12:16 -0800, Francisco Jerez wrote:
> Matt Turner  writes:
> 
> > ---
> >  src/intel/compiler/brw_eu_validate.c | 14 +-
> >  1 file changed, 13 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/intel/compiler/brw_eu_validate.c
> > b/src/intel/compiler/brw_eu_validate.c
> > index a25010b225c..7f1580a5bb3 100644
> > --- a/src/intel/compiler/brw_eu_validate.c
> > +++ b/src/intel/compiler/brw_eu_validate.c
> > @@ -1,5 +1,5 @@
> >  /*
> > - * Copyright © 2015 Intel Corporation
> > + * Copyright © 2015-2019 Intel Corporation
> >   *
> >   * Permission is hereby granted, free of charge, to any person
> > obtaining a
> >   * copy of this software and associated documentation files (the
> > "Software"),
> > @@ -24,6 +24,18 @@
> >  /** @file brw_eu_validate.c
> >   *
> >   * This file implements a pass that validates shader assembly.
> > + *
> > + * The restrictions implemented herein are intended to verify that
> > instructions
> > + * in shader assembly do not violate restrictions documented in
> > the graphics
> > + * programming reference manuals.
> > + *
> > + * The restrictions are difficult for humans to quickly verify due
> > to their
> > + * complexity and abundance.
> > + *
> > + * It is critical that this code is thoroughly unit tested because
> > false
> > + * results it will lead developers astray, which is worse than
> > having no
> 
> Redundant "it".
> 
> > + * validator at all. Patches to this file without corresponding
> > unit tests (in
> > + * test_eu_validate.cpp) will be rejected.
> 
> Strictly by that rule this patch should be rejected ;).  Maybe say
> "functional changes" instead of "patches"?  Other than that:
> 
> Reviewed-by: Francisco Jerez 

Reviewed-by: Iago Toral Quiroga 

> >   */
> >  
> >  #include "brw_eu.h"
> > -- 
> > 2.19.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: update validator to account for half-float exec type promotion

2019-01-25 Thread Iago Toral
On Thu, 2019-01-24 at 10:22 -0800, Matt Turner wrote:
> On Wed, Jan 23, 2019 at 4:18 AM Iago Toral Quiroga  > wrote:
> > 
> > Commit c84ec70b3a72 implemented execution type promotion to 32-bit
> > for
> > conversions involving half-float registers, which empirical testing
> > suggested
> > was required, but it did not incorporate this change into the
> > assembly validator
> > logic. This commits adds that, preventing validation errors like
> > this:
> > 
> > mov(16)  g9<4>B   g3<16,8,2>HF { align1 1H };
> > ERROR: Destination stride must be equal to the ratio of the sizes
> > of the
> >execution data type to the destination type
> > 
> > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
> > when any half-float conversion is needed."
> > ---
> >  src/intel/compiler/brw_eu_validate.c | 27 ++
> > -
> 
> New rule: New restrictions (or relaxations) may not be added to
> brw_eu_validate.c without accompanying unit tests. I'll send a patch
> to add a comment to brw_eu_validate.c saying as much.
> 
> Rationale: the reason I wrote brw_eu_validate.c was because I wasted
> a
> week debugging an issue where fulsim not only failed to inform me
> that
> one instruction was invalid but also incorrectly told me that one
> correct instruction *was* invalid. I would have been better off
> without such a tool.
> 
> If the EU validator loses people's trust, then it's useless, but if
> it
> is incorrect it's worse than useless.


Makes sense to me.

Iago

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: update validator to account for half-float exec type promotion

2019-01-25 Thread Iago Toral
On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > Iago Toral Quiroga  writes:
> > > 
> > > > Commit c84ec70b3a72 implemented execution type promotion to 32-
> > > > bit
> > > > for
> > > > conversions involving half-float registers, which empirical
> > > > testing
> > > > suggested
> > > > was required, but it did not incorporate this change into the
> > > > assembly validator
> > > > logic. This commits adds that, preventing validation errors
> > > > like
> > > > this:
> > > > 
> > > 
> > > I don't think we should be validating empirical assumptions in
> > > the EU
> > > validator.
> > 
> > I am not sure I get your point, isn't c84ec70b3a72 also based on
> > empirical testing after all?
> > 
> 
> To some extent, but it doesn't attempt to enforce ISA restrictions
> based
> on information obtained empirically.
> 
> > 
> > > > mov(16)  g9<4>B   g3<16,8,2>HF { align1 1H };
> > > > ERROR: Destination stride must be equal to the ratio of the
> > > > sizes
> > > > of the
> > > >execution data type to the destination type
> > > > 
> > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to 32-bit
> > > > when any half-float conversion is needed."
> > > 
> > > I don't think this "fixes" anything that ever worked.
> > 
> > It is true that the code in that trace above is not something we
> > can
> > produce right now, because it is a conversion from HF to B and that
> > should only happen within the context of
> > VK_KHR_shader_float16_int8,
> > however, this is a consequence of the fact that since c84ec70b3a72
> > there is an inconsistency between what we do at the IR level
> > regarding
> > execution size of HF conversions and what the EU validator is
> > doing,
> > and from that perspective this is really fixing an inconsistency
> > that
> > didn't exist before, and I thought we would want to address that
> > sooner
> > rather than later and track it down to the original change that
> > introduced that inconsistency so we know where this is coming from.
> > 
> 
> The "inconsistency" between the IR's get_exec_type() and the EU
> validator's execution_type() has existed ever since a05b6f25bf4bfad7
> removed the HF assert from get_exec_type() without actually
> implementing
> the code required to handle HF operands (which is what my commit
> c84ec70b3a72 did).

I agree with the fact that since a05b6f25bf4bfad7 the validator could
reject valid code and that had nothing to do with your patch, but the
inconsistency I am talking about here, that this patch fixes, is the
one about get_exec_type() in the IR and execution_type() in the
validator doing different things for HF instructions, which only exists
since your patch and which you discuss below.

> > Anyway, that was my rationale for the Fixes tag, but if you think
> > this
> > is not useful I am happy to drop this patch and just include it as
> > part
> > of my series without the tag.
> > 
> 
> I'd like to see the actual regioning restrictions for HF types
> implemented in the EU validator as part of your series.

Ok, let's see if we can agree on what restrictions should we implement
then. I can implement this restriction as documented:

"Conversion between Integer and HF (Half Float) must be DWord-aligned
and strided by a DWord on the destination"

Instead of trying to apply the general one that is currently breaking.
That will fix the assertion issue. I guess my issue with it was that
going by your analysis this restriction is not telling the full
picture, this is what you had to say about this restriction:

"I have a feeling that the reason for this may be that the 16-bit
pipeline lacks the ability to handle conversions from or to half-float, 
so the execution type is implicitly promoted to the matching (integer
or floating-point) 32-bit type where any HF conversion would be
needed.  And on those the usual alignment restriction of the
destination to a larger execution type applies."

But I guess your point is that we can ignore these details at the
validator level and just focus on validating strictly what the PRM
says. Fair enough.

Unfortunatley, you also mentioned that this restriction *seems* to be
overriden by this one on all platforms but BDW (even though the both
are listed, at least I see both in my SKL docs):

"There is a relaxed alignment rule for word destinations. When the
destination type is word (UW, W, HF), destination data types can be
aligned to either the lowest word or the second lowest word of the
execution channel. This means the destination data words can be either
all in the even word locations or all in the odd word locations."

Which you discussed didn't make sense to you and I agreed, if only
because my own experiments suggested that the implications that one
would derive from a strict interpretation of this (that packed 16-bit
data is not supported) are not quite true going by the fact that we are
emitting packed 

[Mesa-dev] [PATCH 3/3] radv: set noalias/dereferenceable LLVM attributes based on param types

2019-01-25 Thread Samuel Pitoiset
Instead of using this useless array_params_mask variable.
This should set these two attributes to streamout buffers too.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 20 +++-
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 8058a1dfd9c..de89cd2d880 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -433,7 +433,6 @@ get_tcs_out_current_patch_data_offset(struct 
radv_shader_context *ctx)
 struct arg_info {
LLVMTypeRef types[MAX_ARGS];
LLVMValueRef *assign[MAX_ARGS];
-   unsigned array_params_mask;
uint8_t count;
uint8_t sgpr_count;
uint8_t num_sgprs_used;
@@ -464,13 +463,6 @@ add_arg(struct arg_info *info, enum ac_arg_regfile 
regfile, LLVMTypeRef type,
}
 }
 
-static inline void
-add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr)
-{
-   info->array_params_mask |= (1 << info->count);
-   add_arg(info, ARG_SGPR, type, param_ptr);
-}
-
 static void assign_arguments(LLVMValueRef main_function,
 struct arg_info *info)
 {
@@ -509,10 +501,11 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef 
module,
 
LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
for (unsigned i = 0; i < args->sgpr_count; ++i) {
+   LLVMValueRef P = LLVMGetParam(main_function, i);
+
ac_add_function_attr(ctx, main_function, i + 1, 
AC_FUNC_ATTR_INREG);
 
-   if (args->array_params_mask & (1 << i)) {
-   LLVMValueRef P = LLVMGetParam(main_function, i);
+   if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
ac_add_function_attr(ctx, main_function, i + 1, 
AC_FUNC_ATTR_NOALIAS);
ac_add_attr_dereferenceable(P, UINT64_MAX);
}
@@ -723,15 +716,16 @@ declare_global_input_sgprs(struct radv_shader_context 
*ctx,
while (mask) {
int i = u_bit_scan();
 
-   add_array_arg(args, type, >descriptor_sets[i]);
+   add_arg(args, ARG_SGPR, type, >descriptor_sets[i]);
}
} else {
-   add_array_arg(args, ac_array_in_const32_addr_space(type), 
desc_sets);
+   add_arg(args, ARG_SGPR, ac_array_in_const32_addr_space(type),
+   desc_sets);
}
 
if (ctx->shader_info->info.loads_push_constants) {
/* 1 for push constants and dynamic descriptors */
-   add_array_arg(args, type, >abi.push_constants);
+   add_arg(args, ARG_SGPR, type, >abi.push_constants);
}
 
if (ctx->shader_info->info.so.num_outputs) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radv: simplify allocating user SGPRS for descriptor sets

2019-01-25 Thread Samuel Pitoiset
Unnecesary to check the current stages if desc_set_used_mask
is used.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c | 102 ++
 1 file changed, 34 insertions(+), 68 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 86b51bbf7e2..8058a1dfd9c 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -710,29 +710,20 @@ static void allocate_user_sgprs(struct 
radv_shader_context *ctx,
 
 static void
 declare_global_input_sgprs(struct radv_shader_context *ctx,
-  gl_shader_stage stage,
-  bool has_previous_stage,
-  gl_shader_stage previous_stage,
   const struct user_sgpr_info *user_sgpr_info,
   struct arg_info *args,
   LLVMValueRef *desc_sets)
 {
LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
-   unsigned num_sets = ctx->options->layout ?
-   ctx->options->layout->num_sets : 0;
-   unsigned stage_mask = 1 << stage;
-
-   if (has_previous_stage)
-   stage_mask |= 1 << previous_stage;
 
/* 1 for each descriptor set */
if (!user_sgpr_info->indirect_all_descriptor_sets) {
-   for (unsigned i = 0; i < num_sets; ++i) {
-   if ((ctx->shader_info->info.desc_set_used_mask & (1 << 
i)) &&
-   ctx->options->layout->set[i].layout->shader_stages 
& stage_mask) {
-   add_array_arg(args, type,
- >descriptor_sets[i]);
-   }
+   uint32_t mask = ctx->shader_info->info.desc_set_used_mask;
+
+   while (mask) {
+   int i = u_bit_scan();
+
+   add_array_arg(args, type, >descriptor_sets[i]);
}
} else {
add_array_arg(args, ac_array_in_const32_addr_space(type), 
desc_sets);
@@ -829,41 +820,31 @@ declare_tes_input_vgprs(struct radv_shader_context *ctx, 
struct arg_info *args)
 }
 
 static void
-set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage,
- bool has_previous_stage, gl_shader_stage previous_stage,
+set_global_input_locs(struct radv_shader_context *ctx,
  const struct user_sgpr_info *user_sgpr_info,
  LLVMValueRef desc_sets, uint8_t *user_sgpr_idx)
 {
-   unsigned num_sets = ctx->options->layout ?
-   ctx->options->layout->num_sets : 0;
-   unsigned stage_mask = 1 << stage;
-
-   if (has_previous_stage)
-   stage_mask |= 1 << previous_stage;
+   uint32_t mask = ctx->shader_info->info.desc_set_used_mask;
 
if (!user_sgpr_info->indirect_all_descriptor_sets) {
-   for (unsigned i = 0; i < num_sets; ++i) {
-   if ((ctx->shader_info->info.desc_set_used_mask & (1 << 
i)) &&
-   ctx->options->layout->set[i].layout->shader_stages 
& stage_mask) {
-   set_loc_desc(ctx, i, user_sgpr_idx);
-   } else
-   ctx->descriptor_sets[i] = NULL;
+   while (mask) {
+   int i = u_bit_scan();
+
+   set_loc_desc(ctx, i, user_sgpr_idx);
}
} else {
set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
   user_sgpr_idx);
 
-   for (unsigned i = 0; i < num_sets; ++i) {
-   if ((ctx->shader_info->info.desc_set_used_mask & (1 << 
i)) &&
-   ctx->options->layout->set[i].layout->shader_stages 
& stage_mask) {
-   ctx->descriptor_sets[i] =
-   ac_build_load_to_sgpr(>ac,
- desc_sets,
- 
LLVMConstInt(ctx->ac.i32, i, false));
+   while (mask) {
+   int i = u_bit_scan();
+
+   ctx->descriptor_sets[i] =
+   ac_build_load_to_sgpr(>ac, desc_sets,
+ LLVMConstInt(ctx->ac.i32, 
i, false));
 
-   } else
-   ctx->descriptor_sets[i] = NULL;
}
+
ctx->shader_info->need_indirect_descriptor_sets = true;
}
 
@@ -949,9 +930,8 @@ static void create_function(struct radv_shader_context *ctx,
 
switch (stage) {
case MESA_SHADER_COMPUTE:
-   declare_global_input_sgprs(ctx, stage, has_previous_stage,
-  previous_stage, _sgpr_info,
-   

[Mesa-dev] [PATCH 1/3] radv: remove radv_userdata_info::indirect field

2019-01-25 Thread Samuel Pitoiset
Always false.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  |  2 --
 src/amd/vulkan/radv_nir_to_llvm.c | 15 ++-
 src/amd/vulkan/radv_shader.h  |  1 -
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 2b17fbb339c..aae90290841 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -589,7 +589,6 @@ radv_emit_userdata_address(struct radv_cmd_buffer 
*cmd_buffer,
return;
 
assert(loc->num_sgprs == 1);
-   assert(!loc->indirect);
 
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
 base_reg + loc->sgpr_idx * 4, va, false);
@@ -4158,7 +4157,6 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer 
*cmd_buffer,
}
 
if (loc->sgpr_idx != -1) {
-   assert(!loc->indirect);
assert(loc->num_sgprs == 3);
 
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 40812fa7ffb..86b51bbf7e2 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -553,11 +553,10 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef 
module,
 
 static void
 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
-   uint8_t num_sgprs, bool indirect)
+   uint8_t num_sgprs)
 {
ud_info->sgpr_idx = *sgpr_idx;
ud_info->num_sgprs = num_sgprs;
-   ud_info->indirect = indirect;
*sgpr_idx += num_sgprs;
 }
 
@@ -569,7 +568,7 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, 
uint8_t *sgpr_idx,
>shader_info->user_sgprs_locs.shader_data[idx];
assert(ud_info);
 
-   set_loc(ud_info, sgpr_idx, num_sgprs, false);
+   set_loc(ud_info, sgpr_idx, num_sgprs);
 }
 
 static void
@@ -581,18 +580,16 @@ set_loc_shader_ptr(struct radv_shader_context *ctx, int 
idx, uint8_t *sgpr_idx)
 }
 
 static void
-set_loc_desc(struct radv_shader_context *ctx, int idx,  uint8_t *sgpr_idx,
-bool indirect)
+set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
 {
struct radv_userdata_locations *locs =
>shader_info->user_sgprs_locs;
struct radv_userdata_info *ud_info = >descriptor_sets[idx];
assert(ud_info);
 
-   set_loc(ud_info, sgpr_idx, 1, indirect);
+   set_loc(ud_info, sgpr_idx, 1);
 
-   if (!indirect)
-   locs->descriptor_sets_enabled |= 1 << idx;
+   locs->descriptor_sets_enabled |= 1 << idx;
 }
 
 struct user_sgpr_info {
@@ -848,7 +845,7 @@ set_global_input_locs(struct radv_shader_context *ctx, 
gl_shader_stage stage,
for (unsigned i = 0; i < num_sets; ++i) {
if ((ctx->shader_info->info.desc_set_used_mask & (1 << 
i)) &&
ctx->options->layout->set[i].layout->shader_stages 
& stage_mask) {
-   set_loc_desc(ctx, i, user_sgpr_idx, false);
+   set_loc_desc(ctx, i, user_sgpr_idx);
} else
ctx->descriptor_sets[i] = NULL;
}
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index a1d38b3ce12..3652a811e80 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -213,7 +213,6 @@ struct radv_shader_info {
 struct radv_userdata_info {
int8_t sgpr_idx;
uint8_t num_sgprs;
-   bool indirect;
 };
 
 struct radv_userdata_locations {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev