[Mesa-dev] [PATCH 2/4] freedreno: Use shared drm_find_modifier util

2019-03-13 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
Cc: Rob Clark 
---
 .../drivers/freedreno/freedreno_resource.c| 20 ---
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index 36d61d715ef..620ed4cad41 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -32,6 +32,7 @@
 #include "util/u_string.h"
 #include "util/u_surface.h"
 #include "util/set.h"
+#include "util/u_drm.h"
 
 #include "freedreno_resource.h"
 #include "freedreno_batch_cache.h"
@@ -830,19 +831,6 @@ has_depth(enum pipe_format format)
}
 }
 
-static bool
-find_modifier(uint64_t needle, const uint64_t *haystack, int count)
-{
-   int i;
-
-   for (i = 0; i < count; i++) {
-   if (haystack[i] == needle)
-   return true;
-   }
-
-   return false;
-}
-
 /**
  * Create a new texture object, using the given template info.
  */
@@ -906,7 +894,7 @@ fd_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
 PIPE_BIND_LINEAR  | \
 PIPE_BIND_DISPLAY_TARGET)
 
-   bool linear = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+   bool linear = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, 
count);
if (tmpl->bind & LINEAR)
linear = true;
 
@@ -918,9 +906,9 @@ fd_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
 * except we don't have a format modifier for tiled.  (We probably
 * should.)
 */
-   bool allow_ubwc = find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, 
count);
+   bool allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, 
count);
if (tmpl->bind & PIPE_BIND_SHARED)
-   allow_ubwc = find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, 
modifiers, count);
+   allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, 
modifiers, count);
 
/* TODO turn on UBWC for all internal buffers
 * Manhattan benchmark shows artifacts when enabled.  Once this
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] v3d: Use shared drm_find_modifier util

2019-03-13 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
Cc: Eric Anholt 
---
 src/gallium/drivers/v3d/v3d_resource.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/v3d/v3d_resource.c 
b/src/gallium/drivers/v3d/v3d_resource.c
index ddecafe2ec9..71248e9ca1e 100644
--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@@ -31,6 +31,7 @@
 #include "util/u_transfer_helper.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_format_zs.h"
+#include "util/u_drm.h"
 
 #include "drm-uapi/drm_fourcc.h"
 #include "v3d_screen.h"
@@ -685,19 +686,6 @@ v3d_resource_setup(struct pipe_screen *pscreen,
 return rsc;
 }
 
-static bool
-find_modifier(uint64_t needle, const uint64_t *haystack, int count)
-{
-int i;
-
-for (i = 0; i < count; i++) {
-if (haystack[i] == needle)
-return true;
-}
-
-return false;
-}
-
 static struct pipe_resource *
 v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
const struct pipe_resource *tmpl,
@@ -741,7 +729,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
 return prsc;
 }
 
-bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, 
count);
+bool linear_ok = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, 
count);
 struct v3d_resource *rsc = v3d_resource_setup(pscreen, tmpl);
 struct pipe_resource *prsc = >base;
 /* Use a tiled layout if we can, for better 3D performance. */
@@ -773,7 +761,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
 linear_ok = true;
 rsc->tiled = should_tile;
 } else if (should_tile &&
-   find_modifier(DRM_FORMAT_MOD_BROADCOM_UIF,
+   drm_find_modifier(DRM_FORMAT_MOD_BROADCOM_UIF,
  modifiers, count)) {
 rsc->tiled = true;
 } else if (linear_ok) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] vc4: Use shared drm_find_modifier util

2019-03-13 Thread Alyssa Rosenzweig
Signed-off-by: Alyssa Rosenzweig 
Cc: Eric Anholt 
---
 src/gallium/drivers/vc4/vc4_resource.c | 18 +++---
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_resource.c 
b/src/gallium/drivers/vc4/vc4_resource.c
index c12187d7872..93688cde998 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -30,6 +30,7 @@
 #include "util/u_surface.h"
 #include "util/u_transfer_helper.h"
 #include "util/u_upload_mgr.h"
+#include "util/u_drm.h"
 
 #include "drm-uapi/drm_fourcc.h"
 #include "drm-uapi/vc4_drm.h"
@@ -478,19 +479,6 @@ get_resource_texture_format(struct pipe_resource *prsc)
 return format;
 }
 
-static bool
-find_modifier(uint64_t needle, const uint64_t *haystack, int count)
-{
-int i;
-
-for (i = 0; i < count; i++) {
-if (haystack[i] == needle)
-return true;
-}
-
-return false;
-}
-
 static struct pipe_resource *
 vc4_resource_create_with_modifiers(struct pipe_screen *pscreen,
const struct pipe_resource *tmpl,
@@ -500,7 +488,7 @@ vc4_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
 struct vc4_screen *screen = vc4_screen(pscreen);
 struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl);
 struct pipe_resource *prsc = >base;
-bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, 
count);
+bool linear_ok = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, 
count);
 /* Use a tiled layout if we can, for better 3D performance. */
 bool should_tile = true;
 
@@ -541,7 +529,7 @@ vc4_resource_create_with_modifiers(struct pipe_screen 
*pscreen,
 linear_ok = true;
 rsc->tiled = should_tile;
 } else if (should_tile &&
-   find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+   drm_find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
  modifiers, count)) {
 rsc->tiled = true;
 } else if (linear_ok) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] util: Add a drm_find_modifier helper

2019-03-13 Thread Alyssa Rosenzweig
This function is replicated across vc4/v3d/freedreno and is needed in
Panfrost; let's make this shared code.

Signed-off-by: Alyssa Rosenzweig 
---
 src/util/u_drm.h | 46 ++
 1 file changed, 46 insertions(+)
 create mode 100644 src/util/u_drm.h

diff --git a/src/util/u_drm.h b/src/util/u_drm.h
new file mode 100644
index 000..d543c9a7543
--- /dev/null
+++ b/src/util/u_drm.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2014 Broadcom
+ * Copyright (C) 2012 Rob Clark 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef U_DRM_H
+#define U_DRM_H
+
+#include 
+
+/* Given a list of `count` DRM modifiers `haystack` and a desired modifier
+ * `needle`, returns whether the modifier is found */
+
+static bool
+drm_find_modifier(uint64_t needle, const uint64_t *haystack, unsigned count)
+{
+unsigned i;
+
+for (i = 0; i < count; i++) {
+if (haystack[i] == needle)
+return true;
+}
+
+return false;
+}
+
+#endif
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] radeonsi/nir: call some more var optimisation passes

2019-03-13 Thread Timothy Arceri
shader-db results (VEGA64):

Totals from affected shaders:
SGPRS: 5328912 -> 5329680 (0.01 %)
VGPRS: 2969308 -> 2969164 (-0.00 %)
Spilled SGPRs: 37921 -> 37917 (-0.01 %)
Spilled VGPRs: 32882 -> 29024 (-11.73 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 1400 -> 1200 (-14.29 %) dwords per thread
Code Size: 121126000 -> 121282784 (0.13 %) bytes
LDS: 1501 -> 1501 (0.00 %) blocks
Max Waves: 933188 -> 933229 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 25429fdeab7..5ac18e2ebc8 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -841,9 +841,6 @@ si_lower_nir(struct si_shader_selector* sel)
 * - ensure constant offsets for texture instructions are folded
 *   and copy-propagated
 */
-   NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
-   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar);
-   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
 
static const struct nir_lower_tex_options lower_tex_options = {
.lower_txp = ~0u,
@@ -866,6 +863,14 @@ si_lower_nir(struct si_shader_selector* sel)
do {
progress = false;
 
+   NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
+
+   NIR_PASS(progress, sel->nir, nir_opt_copy_prop_vars);
+   NIR_PASS(progress, sel->nir, nir_opt_dead_write_vars);
+
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar);
+   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+
/* (Constant) copy propagation is needed for txf with offsets. 
*/
NIR_PASS(progress, sel->nir, nir_copy_prop);
NIR_PASS(progress, sel->nir, nir_opt_remove_phis);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] radv: call nir_opt_combine_stores()

2019-03-13 Thread Timothy Arceri
No change in my vkpipeline-db collection, but given the same change
in radeonsi resulted in all positive results it seems worth while.
---
 src/amd/vulkan/radv_shader.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index bd045a0b92f..c1047562207 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader, bool 
optimize_conservatively,
 
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
NIR_PASS(progress, shader, nir_opt_dead_write_vars);
+   NIR_PASS(progress, shader, nir_opt_combine_stores, nir_var_all);
 
 NIR_PASS_V(shader, nir_lower_alu_to_scalar);
 NIR_PASS_V(shader, nir_lower_phis_to_scalar);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] radeonsi/nir: call nir_opt_combine_stores()

2019-03-13 Thread Timothy Arceri
shader-db results (VEGA64):

Totals from affected shaders:
SGPRS: 4992920 -> 4992920 (0.00 %)
VGPRS: 2787092 -> 2786916 (-0.01 %)
Spilled SGPRs: 35987 -> 35987 (0.00 %)
Spilled VGPRs: 29024 -> 29024 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 1200 -> 1200 (0.00 %) dwords per thread
Code Size: 115681712 -> 115675472 (-0.01 %) bytes
LDS: 1522 -> 1522 (0.00 %) blocks
Max Waves: 866585 -> 866595 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/gallium/drivers/radeonsi/si_shader_nir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5ac18e2ebc8..bef2a0d0d66 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -867,6 +867,7 @@ si_lower_nir(struct si_shader_selector* sel)
 
NIR_PASS(progress, sel->nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, sel->nir, nir_opt_dead_write_vars);
+   NIR_PASS(progress, sel->nir, nir_opt_combine_stores, 
nir_var_all);
 
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: Only set 3DSTATE_PS::VectorMaskEnable on gen8+

2019-03-13 Thread Kenneth Graunke
On Wednesday, March 13, 2019 4:25:24 PM PDT Jason Ekstrand wrote:
> We don't set it on HSW and earlier in i965 and disabling it appears to
> make derivatives somewhat more reliable.
> 
> Cc: Kenneth Graunke 
> ---
>  src/intel/vulkan/genX_pipeline.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/intel/vulkan/genX_pipeline.c 
> b/src/intel/vulkan/genX_pipeline.c
> index 975052deb79..3767e5033c5 100644
> --- a/src/intel/vulkan/genX_pipeline.c
> +++ b/src/intel/vulkan/genX_pipeline.c
> @@ -1761,7 +1761,7 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
> brw_wm_prog_data_prog_offset(wm_prog_data, 
> ps, 2);
>  
>ps.SingleProgramFlow  = false;
> -  ps.VectorMaskEnable   = true;
> +  ps.VectorMaskEnable   = GEN_GEN >= 8;
>/* WA_1606682166 */
>ps.SamplerCount   = GEN_GEN == 11 ? 0 : 
> get_sampler_count(fs_bin);
>/* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
> 

I have no idea why it would behave any differently, but something
definitely changed with Broadwell.  If we don't set it on Gen8+,
derivatives go horribly wrong - basic Piglit tests fail.  It seems
like setting it should be the right thing.  But I never observed it
to matter at all on pre-Gen8.  Not sure why.

Acked-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] anv: Only set 3DSTATE_PS::VectorMaskEnable on gen8+

2019-03-13 Thread Jason Ekstrand
We don't set it on HSW and earlier in i965 and disabling it appears to
make derivatives somewhat more reliable.

Cc: Kenneth Graunke 
---
 src/intel/vulkan/genX_pipeline.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 975052deb79..3767e5033c5 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1761,7 +1761,7 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 
2);
 
   ps.SingleProgramFlow  = false;
-  ps.VectorMaskEnable   = true;
+  ps.VectorMaskEnable   = GEN_GEN >= 8;
   /* WA_1606682166 */
   ps.SamplerCount   = GEN_GEN == 11 ? 0 : 
get_sampler_count(fs_bin);
   /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Is EVOC real or fake?

2019-03-13 Thread Rob Clark
Thanks Alex.  Adding evoc list.

And I'd also like to point out that the mentors (and xorg board
members, etc) are all volunteers who are also busy with their existing
work and flooded inboxes (and in general finding people with bandwidth
to be a mentor is the hardest thing).  Students who show some
initiative and dig in to whatever upstream project under the xorg
umbrella (mesa/xorg/drm/etc) usually get a better response.

BR,
-R

On Wed, Mar 13, 2019 at 11:56 AM Alex Deucher  wrote:
>
> I don't remember seeing your message, but you can find details about EVoC 
> here:
> https://www.x.org/wiki/XorgEVoC/
>
> Basically, it's similar to GSoC.  The prospective student picks a
> project, gets involved in the project (to learn the code and make some
> simple changes to show you understand it) and then pick something to
> tackle, a new feature, etc.  You need to come up with a plan and find
> a mentor.  If this is something you want to pursue, please pick a
> project (mesa, xorg, etc.) and get to know the code base.  Ask
> questions and decide what area you are interested in and what you want
> to work on for your project.  Many projects have GSoC ideas pages that
> you can review to see some possible ideas.
>
> Alex
>
> On Wed, Mar 13, 2019 at 10:50 AM Adarsh Khubchandani  
> wrote:
> >
> > Hello. I sent a message regarding guidance that I needed for EVOC to some 
> > mentors, but no one seems to care or respond. How is someone, who is new to 
> > X.org, supposed to get started learn and contribute to the development of 
> > the community?
> > Please guide me, if anyone is interested.
> >
> > ---
> >
> > Adarsh Khubchandani.
> > askhubchandani.github.io
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium/docs: clarify set_sampler_views

2019-03-13 Thread Rob Clark
On Wed, Mar 13, 2019 at 11:37 AM Roland Scheidegger  wrote:
>
> Am 12.03.19 um 22:48 schrieb Rob Clark:
> > On Tue, Mar 12, 2019 at 1:59 PM Roland Scheidegger  
> > wrote:
> >>
> >> Am 12.03.19 um 16:16 schrieb Rob Clark:
> >>> This previously was not called out clearly, but based on a survey of the
> >>> code, it seems the expected behavior is to release the reference to any
> >>> sampler views beyond the new range being bound.
> >>
> >> That isn't really true. This was designed to work like d3d10, where
> >> other views are unmodified.
> >> The cso code will actually unset all views which previously were set and
> >> are above the num_views in the call (this wouldn't be necessary if the
> >> pipe function itself would work like this).
> >> However, it will only do this for fragment textures, and pass through
> >> the parameters unmodified otherwise. Which means behavior might not be
> >> very consistent for the different stages...
> >
> > hmm, I did notice w/ deqp tests (which aren't so good at
> > resetting/clearing state between tests), that I ended up w/ different
> > # of sampler views bound (without changing freedreno to match the
> > behavior of most of the other drivers).. I didn't really dig in that
> > closely but it seemed like mesa/st wasn't clearing the additional
> > previously bound textures.  Maybe I overlooked something, but that
> > seemed wrong.
> >
> > One way or another, I guess we should clarify and change the various
> > drivers to have a common behavior because right now there two
> > different behaviors and I guess it is at least confusing for new
> > gallium driver writers (as it was for me and I've been at it for a
> > while)
>
> Yes, I agree with that, the current state there doesn't help anyone.

I guess the best thing is that I should put together a patchset that
documents the opposite behavior of what this patch suggests, followed
by patches for the other drivers to change them to match the docs.

BR,
-R

> Roland
>
>
> > BR,
> > -R
> >
> >>
> >>
> >>>
> >>> I think radeonsi and freedreno were the only ones not doing this.  Which
> >>> could probably temporarily leak a bit of memory by holding on to the
> >>> sampler view reference.
> >> Not sure about other drivers, but llvmpipe will not do this neither.
> >>
> >> Roland
> >>
> >>
> >>>
> >>> Signed-off-by: Rob Clark 
> >>> ---
> >>>  src/gallium/docs/source/context.rst | 3 +++
> >>>  1 file changed, 3 insertions(+)
> >>>
> >>> diff --git a/src/gallium/docs/source/context.rst 
> >>> b/src/gallium/docs/source/context.rst
> >>> index f89d9e1005e..199d335f8f4 100644
> >>> --- a/src/gallium/docs/source/context.rst
> >>> +++ b/src/gallium/docs/source/context.rst
> >>> @@ -143,6 +143,9 @@ to the array index which is used for sampling.
> >>>to a respective sampler view and releases a reference to the previous
> >>>sampler view.
> >>>
> >>> +  Previously bound samplers with index ``>= num_views`` are unbound 
> >>> rather
> >>> +  than unmodified.
> >>> +
> >>>  * ``create_sampler_view`` creates a new sampler view. ``texture`` is 
> >>> associated
> >>>with the sampler view which results in sampler view holding a reference
> >>>to the texture. Format specified in template must be compatible
> >>>
> >>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH 2/3] glsl: TCS outputs can not be transform feedback candidates on GLES

2019-03-13 Thread Emil Velikov
Hi Jose,

On Wed, 21 Nov 2018 at 18:45, Jose Maria Casanova Crespo
 wrote:
>
> Fixes: 
> KHR-GLES*.core.tessellation_shader.single.xfb_captures_data_from_correct_stage
>
This and the follow-up patch "glsl: fix recording of variables for XFB
in TCS shaders" are explicitly marked as 19.0 only.
As such I've omitted them from 18.3, let me know if you prefer to include them.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv/pass: Flag the need for a depth flush for resolve attachments

2019-03-13 Thread Jason Ekstrand
On Wed, Mar 13, 2019 at 3:56 PM Nanley Chery  wrote:

> On Tue, Mar 12, 2019 at 10:56:27PM -0500, Jason Ekstrand wrote:
> > Cc: mesa-sta...@lists.freedesktop.org
> > Cc: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_pass.c | 18 +-
> >  1 file changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
> > index 5fac5bbb31c..ec217abfda0 100644
> > --- a/src/intel/vulkan/anv_pass.c
> > +++ b/src/intel/vulkan/anv_pass.c
> > @@ -178,12 +178,28 @@ anv_render_pass_compile(struct anv_render_pass
> *pass)
> >  * subpasses and checking to see if any of them don't have an
> external
> >  * dependency.  Or, we could just be lazy and add a couple extra
> flushes.
> >  * We choose to be lazy.
> > +*
> > +* From the documentation for vkCmdNextSubpass:
> > +*
> > +*"Moving to the next subpass automatically performs any
> multisample
> > +*resolve operations in the subpass being ended. End-of-subpass
> > +*multisample resolves are treated as color attachment writes
> for the
> > +*purposes of synchronization. This applies to resolve
> operations for
> > +*both color and depth/stencil attachments. That is, they are
> > +*considered to execute in the
> > +*VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage
> and
> > +*their writes are synchronized with
> > +*VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT."
> > +*
> > +* Therefore, the above flags concerning color attachments also
> apply to
> > +* color and depth/stencil resolve attachments.
> >  */
> > if (all_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
> >pass->subpass_flushes[0] |=
> >   ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
> > }
> > -   if (all_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
> > +   if (all_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
> > +VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
> >pass->subpass_flushes[pass->subpass_count] |=
> >   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
>
> I'm assuming you meant to s/depth/color/ in the title of the patch?
>

Right.  I'll fix that.


> If so and with that change, this patch is
>
Reviewed-by: Nanley Chery 
>

Thanks!


>
> > }
> > --
> > 2.20.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv/pass: Flag the need for a depth flush for resolve attachments

2019-03-13 Thread Nanley Chery
On Tue, Mar 12, 2019 at 10:56:27PM -0500, Jason Ekstrand wrote:
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Nanley Chery 
> ---
>  src/intel/vulkan/anv_pass.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
> index 5fac5bbb31c..ec217abfda0 100644
> --- a/src/intel/vulkan/anv_pass.c
> +++ b/src/intel/vulkan/anv_pass.c
> @@ -178,12 +178,28 @@ anv_render_pass_compile(struct anv_render_pass *pass)
>  * subpasses and checking to see if any of them don't have an external
>  * dependency.  Or, we could just be lazy and add a couple extra flushes.
>  * We choose to be lazy.
> +*
> +* From the documentation for vkCmdNextSubpass:
> +*
> +*"Moving to the next subpass automatically performs any multisample
> +*resolve operations in the subpass being ended. End-of-subpass
> +*multisample resolves are treated as color attachment writes for the
> +*purposes of synchronization. This applies to resolve operations for
> +*both color and depth/stencil attachments. That is, they are
> +*considered to execute in the
> +*VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage and
> +*their writes are synchronized with
> +*VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT."
> +*
> +* Therefore, the above flags concerning color attachments also apply to
> +* color and depth/stencil resolve attachments.
>  */
> if (all_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
>pass->subpass_flushes[0] |=
>   ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
> }
> -   if (all_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
> +   if (all_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
> +VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
>pass->subpass_flushes[pass->subpass_count] |=
>   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;

I'm assuming you meant to s/depth/color/ in the title of the patch?

If so and with that change, this patch is
Reviewed-by: Nanley Chery 

> }
> -- 
> 2.20.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 99553] Tracker bug for runnning OpenCL applications on Clover

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99553

Jan Vesely  changed:

   What|Removed |Added

 Depends on||110045


Referenced Bugs:

https://bugs.freedesktop.org/show_bug.cgi?id=110045
[Bug 110045] [radeonsi][clover][regression][bisected]
cl-api-enqueue-copy-buffer hangs on radeonsi
-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [ANNOUNCE] mesa 19.0.0

2019-03-13 Thread Dylan Baker
Hi List,

I'm pleased to announce the general availability of mesa 19.0.0. We've had a
slightly long rc process with 7 RCs (there should have been 6, but there was a
bug in the script for pulling patches resulting in two back to back RCs). In
general this release has shaped up rather nicely, and I look forward to the
stable release cycle.

Of note is that autotools support is deprecated in 19.0.0, and you must now add
--enable-autotools to autogen.sh and configure. If you haven't already **now**
is the time to try meson, if all goes according to plan autotools will be
removed before the 19.1 release.

Dylan

shortlog:
Brian Paul (1):
  svga: remove SVGA_RELOC_READ flag in SVGA3D_BindGBSurface()

Danylo Piliaiev (1):
  anv: Fix destroying descriptor sets when pool gets reset

Dylan Baker (4):
  cherry-ignore: Update the cherry-ignore file
  VERSION: bump for 19.0.0 release
  docs: Add release notes for 19.0.0
  docs: Add SHA256 sums for 19.0.0

Eric Anholt (1):
  st/dri: Set the PIPE_BIND_SHARED flag on create_image_with_modifiers.

Erik Faye-Lund (1):
  virgl: remove unused variable

Ian Romanick (2):
  intel/fs: nir_op_extract_i8 extracts a byte, not a word
  intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer

Jason Ekstrand (5):
  spirv: Pull offset/stride from the pointer for OpArrayLength
  anv: Refactor descriptor pushing a bit
  anv: Take references to push descriptor set layouts
  nir: Add a pass for lowering IO back to vector when possible
  intel/nir: Vectorize all IO

Juan A. Suarez Romero (1):
  anv: destroy descriptor sets when pool gets reset

Samuel Pitoiset (1):
  radv: fix pointSizeRange limits

Tapani Pälli (3):
  anv: release memory allocated by glsl types during spirv_to_nir
  anv: revert "anv: release memory allocated by glsl types during 
spirv_to_nir"
  anv: destroy descriptor sets when pool gets destroyed

pal1000 (1):
  scons: Compatibility with Scons development version string


git tag: mesa-19.0.0

https://mesa.freedesktop.org/archive/mesa-19.0.0.tar.gz
MD5:  9a7d36f589a6a1f832d032e5fea18c98  mesa-19.0.0.tar.gz
SHA1: c637ad5644b5da0a400dfca9ed3f8bbbe41be5bc  mesa-19.0.0.tar.gz
SHA256: 4c5b9c5227d37c1f6bdc786a6fa7ee7fbce40b2e8a87340c7d3234534ece3304  
mesa-19.0.0.tar.gz
SHA512: 
febe74d20255d2999dc9fbd228b87bfb53575641f15a9cdfa0a58f288a342dea1963a66b5c36e860dd6876fe1960e9771c59e5510c0b8a752dd744eda4da1580
  mesa-19.0.0.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-19.0.0.tar.xz
MD5:  d0d76cd8b00bc1308e37985d4a45d3c5  mesa-19.0.0.tar.xz
SHA1: d6353eab5e8949d14f15138fb726f5e298c70485  mesa-19.0.0.tar.xz
SHA256: 5a549dfb40ec31e5c36c47aadac04554cb2e2a8d144a046a378fc16da57e38f8  
mesa-19.0.0.tar.xz
SHA512: 
5759b85275bcd145513cf14a9ef7505595766fb33b82c53738f74ede462e5850580d48ab4af326b41209e7f4b05aab75539f2bfebf67c3098a4680ea95c37591
  mesa-19.0.0.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0.tar.xz.sig



signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 34/40] intel/compiler: validate region restrictions for half-float conversions

2019-03-13 Thread Francisco Jerez
Iago Toral  writes:

> On Tue, 2019-03-12 at 15:44 -0700, Francisco Jerez wrote:
>> Iago Toral  writes:
>> 
>> > On Tue, 2019-03-05 at 07:35 +0100, Iago Toral wrote:
>> > > On Mon, 2019-03-04 at 15:36 -0800, Francisco Jerez wrote:
>> > > > Iago Toral  writes:
>> > > > 
>> > > > > On Fri, 2019-03-01 at 19:04 -0800, Francisco Jerez wrote:
>> > > > > > Iago Toral  writes:
>> > > > > > 
>> > > > > > > On Thu, 2019-02-28 at 09:54 -0800, Francisco Jerez wrote:
>> > > > > > > > Iago Toral  writes:
>> > > > > > > > 
>> > > > > > > > > On Wed, 2019-02-27 at 13:47 -0800, Francisco Jerez
>> > > > > > > > > wrote:
>> > > > > > > > > > Iago Toral  writes:
>> > > > > > > > > > 
>> > > > > > > > > > > On Tue, 2019-02-26 at 14:54 -0800, Francisco
>> > > > > > > > > > > Jerez
>> > > > > > > > > > > wrote:
>> > > > > > > > > > > > Iago Toral Quiroga  writes:
>> > > > > > > > > > > > 
>> > > > > > > > > > > > > ---
>> > > > > > > > > > > > >  src/intel/compiler/brw_eu_validate.c|  6
>> > > > > > > > > > > > > 4
>> > > > > > > > > > > > > -
>> > > > > > > > > > > > >  src/intel/compiler/test_eu_validate.cpp |
>> > > > > > > > > > > > > 122
>> > > > > > > > > > > > > 
>> > > > > > > > > > > > >  2 files changed, 185 insertions(+), 1
>> > > > > > > > > > > > > deletion(-
>> > > > > > > > > > > > > )
>> > > > > > > > > > > > > 
>> > > > > > > > > > > > > diff --git
>> > > > > > > > > > > > > a/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > > > > > b/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > > > > > index 000a05cb6ac..203641fecb9 100644
>> > > > > > > > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
>> > > > > > > > > > > > > @@ -531,7 +531,69 @@
>> > > > > > > > > > > > > general_restrictions_based_on_operand_types(c
>> > > > > > > > > > > > > onst
>> > > > > > > > > > > > > struct
>> > > > > > > > > > > > > gen_device_info *devinf
>> > > > > > > > > > > > > exec_type_size == 8 && dst_type_size
>> > > > > > > > > > > > > ==
>> > > > > > > > > > > > > 4)
>> > > > > > > > > > > > >dst_type_size = 8;
>> > > > > > > > > > > > >  
>> > > > > > > > > > > > > -   if (exec_type_size > dst_type_size) {
>> > > > > > > > > > > > > +   /* From the BDW+ PRM:
>> > > > > > > > > > > > > +*
>> > > > > > > > > > > > > +*"There is no direct conversion from
>> > > > > > > > > > > > > HF
>> > > > > > > > > > > > > to
>> > > > > > > > > > > > > DF
>> > > > > > > > > > > > > or
>> > > > > > > > > > > > > DF to
>> > > > > > > > > > > > > HF.
>> > > > > > > > > > > > > +* There is no direct conversion from
>> > > > > > > > > > > > > HF
>> > > > > > > > > > > > > to
>> > > > > > > > > > > > > Q/UQ or
>> > > > > > > > > > > > > Q/UQ to
>> > > > > > > > > > > > > HF."
>> > > > > > > > > > > > > +*/
>> > > > > > > > > > > > > +   enum brw_reg_type src0_type =
>> > > > > > > > > > > > > brw_inst_src0_type(devinfo,
>> > > > > > > > > > > > > inst);
>> > > > > > > > > > > > > +   ERROR_IF(brw_inst_opcode(devinfo, inst)
>> > > > > > > > > > > > > ==
>> > > > > > > > > > > > > BRW_OPCODE_MOV
>> > > > > > > > > > > > > &&
>> > > > > > > > > > > > 
>> > > > > > > > > > > > Why is only the MOV instruction handled here
>> > > > > > > > > > > > and
>> > > > > > > > > > > > below?  Aren't
>> > > > > > > > > > > > other
>> > > > > > > > > > > > instructions able to do implicit
>> > > > > > > > > > > > conversions?  Probably
>> > > > > > > > > > > > means
>> > > > > > > > > > > > you
>> > > > > > > > > > > > need
>> > > > > > > > > > > > to deal with two sources rather than one.
>> > > > > > > > > > > 
>> > > > > > > > > > > This comes from the programming notes of the MOV
>> > > > > > > > > > > instruction
>> > > > > > > > > > > (Volume
>> > > > > > > > > > > 2a, Command Reference - Instructions - MOV), so
>> > > > > > > > > > > it is
>> > > > > > > > > > > described
>> > > > > > > > > > > specifically for the MOV instruction. I should
>> > > > > > > > > > > probably
>> > > > > > > > > > > have
>> > > > > > > > > > > made
>> > > > > > > > > > > this
>> > > > > > > > > > > clear in the comment.
>> > > > > > > > > > > 
>> > > > > > > > > > 
>> > > > > > > > > > Maybe the one above is specified in the MOV page
>> > > > > > > > > > only,
>> > > > > > > > > > probably
>> > > > > > > > > > due
>> > > > > > > > > > to
>> > > > > > > > > > an oversight (If these restrictions were really
>> > > > > > > > > > specific
>> > > > > > > > > > to
>> > > > > > > > > > the
>> > > > > > > > > > MOV
>> > > > > > > > > > instruction, what would prevent you from
>> > > > > > > > > > implementing
>> > > > > > > > > > such
>> > > > > > > > > > conversions
>> > > > > > > > > > through a different instruction?  E.g. "ADD dst:df,
>> > > > > > > > > > src:hf,
>> > > > > > > > > > 0"
>> > > > > > > > > > which
>> > > > > > > > > > would be substantially more efficient than what
>> > > > > > > > > > you're
>> > > > > > 

[Mesa-dev] [PATCH v3 05/11] ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations

2019-03-13 Thread Samuel Pitoiset
Use the raw version (ie. IDXEN=0) because vindex is unused.

v2: - use raw version

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 66 +
 1 file changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 247bd20a7ea..7f63b506b93 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1626,57 +1626,75 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
   const nir_intrinsic_instr *instr)
 {
-   const char *name;
-   LLVMValueRef params[6];
+   const char *atomic_name;
+   char intrinsic_name[64];
+   LLVMValueRef params[7];
int arg_count = 0;
-
-   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
-   params[arg_count++] = ac_llvm_extract_elem(>ac, 
get_src(ctx, instr->src[3]), 0);
-   }
-   params[arg_count++] = ac_llvm_extract_elem(>ac, get_src(ctx, 
instr->src[2]), 0);
-   params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
-get_src(ctx, instr->src[0]),
-true);
-   params[arg_count++] = ctx->ac.i32_0; /* vindex */
-   params[arg_count++] = get_src(ctx, instr->src[1]);  /* voffset */
-   params[arg_count++] = ctx->ac.i1false;  /* slc */
+   int length;
 
switch (instr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add:
-   name = "llvm.amdgcn.buffer.atomic.add";
+   atomic_name = "add";
break;
case nir_intrinsic_ssbo_atomic_imin:
-   name = "llvm.amdgcn.buffer.atomic.smin";
+   atomic_name = "smin";
break;
case nir_intrinsic_ssbo_atomic_umin:
-   name = "llvm.amdgcn.buffer.atomic.umin";
+   atomic_name = "umin";
break;
case nir_intrinsic_ssbo_atomic_imax:
-   name = "llvm.amdgcn.buffer.atomic.smax";
+   atomic_name = "smax";
break;
case nir_intrinsic_ssbo_atomic_umax:
-   name = "llvm.amdgcn.buffer.atomic.umax";
+   atomic_name = "umax";
break;
case nir_intrinsic_ssbo_atomic_and:
-   name = "llvm.amdgcn.buffer.atomic.and";
+   atomic_name = "and";
break;
case nir_intrinsic_ssbo_atomic_or:
-   name = "llvm.amdgcn.buffer.atomic.or";
+   atomic_name = "or";
break;
case nir_intrinsic_ssbo_atomic_xor:
-   name = "llvm.amdgcn.buffer.atomic.xor";
+   atomic_name = "xor";
break;
case nir_intrinsic_ssbo_atomic_exchange:
-   name = "llvm.amdgcn.buffer.atomic.swap";
+   atomic_name = "swap";
break;
case nir_intrinsic_ssbo_atomic_comp_swap:
-   name = "llvm.amdgcn.buffer.atomic.cmpswap";
+   atomic_name = "cmpswap";
break;
default:
abort();
}
 
-   return ac_build_intrinsic(>ac, name, ctx->ac.i32, params, 
arg_count, 0);
+   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+   params[arg_count++] = ac_llvm_extract_elem(>ac, 
get_src(ctx, instr->src[3]), 0);
+   }
+   params[arg_count++] = ac_llvm_extract_elem(>ac, get_src(ctx, 
instr->src[2]), 0);
+   params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
+get_src(ctx, instr->src[0]),
+true);
+
+   if (HAVE_LLVM >= 0x0800) {
+   params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+   params[arg_count++] = ctx->ac.i32_0; /* soffset */
+   params[arg_count++] = ctx->ac.i32_0; /* slc */
+
+   length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.raw.buffer.atomic.%s.i32",
+ atomic_name);
+   } else {
+   params[arg_count++] = ctx->ac.i32_0; /* vindex */
+   params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+   params[arg_count++] = ctx->ac.i1false; /* slc */
+
+   length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.buffer.atomic.%s", atomic_name);
+   }
+
+   assert(length < sizeof(intrinsic_name));
+   return ac_build_intrinsic(>ac, intrinsic_name, ctx->ac.i32,
+ params, arg_count, 0);
 }
 
 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH v3 02/11] ac/nir: set attrib flags for SSBO and image store operations

2019-03-13 Thread Samuel Pitoiset
For consistency regarding other store operations.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 18297ed99b1..947dc359d49 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1591,7 +1591,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
ctx->ac.i1false,
};
ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, tbuffer_params, 10, 
0);
+  ctx->ac.voidt, tbuffer_params, 10,
+  
ac_get_store_intr_attribs(writeonly_memory));
} else {
switch (num_bytes) {
case 16: /* v4f32 */
@@ -1619,7 +1620,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
ctx->ac.i1false,  /* slc */
};
ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, params, 6, 0);
+  ctx->ac.voidt, params, 6,
+  
ac_get_store_intr_attribs(writeonly_memory));
}
}
 }
@@ -2546,7 +2548,8 @@ static void visit_image_store(struct ac_nir_context *ctx,
params[4] = LLVMConstInt(ctx->ac.i1, 
!!(args.cache_policy & ac_glc), 0);
params[5] = ctx->ac.i1false;  /* slc */
}
-   ac_build_intrinsic(>ac, name, ctx->ac.voidt, params, 6, 0);
+   ac_build_intrinsic(>ac, name, ctx->ac.voidt, params, 6,
+  ac_get_store_intr_attribs(writeonly_memory));
} else {
args.opcode = ac_image_store;
args.data[0] = ac_to_float(>ac, get_src(ctx, 
instr->src[3]));
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 09/11] ac: add ac_build_{struct, raw}_tbuffer_store() helpers

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 127 +
 src/amd/common/ac_llvm_build.h |  29 
 2 files changed, 156 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1843bbbee5f..99fb1ecc327 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1614,6 +1614,133 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
 }
 
+static void
+ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef vdata,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+LLVMValueRef soffset,
+unsigned num_channels,
+unsigned dfmt,
+unsigned nfmt,
+bool glc,
+bool slc,
+bool writeonly_memory,
+bool structurized)
+{
+   LLVMValueRef args[7];
+   int idx = 0;
+   args[idx++] = vdata;
+   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+   if (structurized)
+   args[idx++] = vindex ? vindex : ctx->i32_0;
+   args[idx++] = voffset ? voffset : ctx->i32_0;
+   args[idx++] = soffset ? soffset : ctx->i32_0;
+   args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+   args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+   const char *type_names[] = {"i32", "v2i32", "v4i32"};
+   const char *indexing_kind = structurized ? "struct" : "raw";
+   char name[256];
+
+   snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
+indexing_kind, type_names[func]);
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+  ac_get_store_intr_attribs(writeonly_memory));
+}
+
+static void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vdata,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  LLVMValueRef soffset,
+  LLVMValueRef immoffset,
+  unsigned num_channels,
+  unsigned dfmt,
+  unsigned nfmt,
+  bool glc,
+  bool slc,
+  bool writeonly_memory,
+  bool structurized) /* only matters for LLVM 8+ */
+{
+   if (HAVE_LLVM >= 0x800) {
+   voffset = LLVMBuildAdd(ctx->builder,
+  voffset ? voffset : ctx->i32_0,
+  immoffset, "");
+
+   ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
+soffset, num_channels, dfmt, nfmt,
+glc, slc, writeonly_memory,
+structurized);
+   } else {
+   LLVMValueRef params[] = {
+   vdata,
+   rsrc,
+   vindex ? vindex : ctx->i32_0,
+   voffset ? voffset : ctx->i32_0,
+   soffset ? soffset : ctx->i32_0,
+   immoffset,
+   LLVMConstInt(ctx->i32, dfmt, false),
+   LLVMConstInt(ctx->i32, nfmt, false),
+   LLVMConstInt(ctx->i32, glc, false),
+   LLVMConstInt(ctx->i32, slc, false),
+   };
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+   const char *type_names[] = {"i32", "v2i32", "v4i32"};
+   char name[256];
+
+   snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
+type_names[func]);
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
+  ac_get_store_intr_attribs(writeonly_memory));
+   }
+}
+
+void
+ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory)
+{
+   

[Mesa-dev] [PATCH v3 04/11] ac/nir: remove one useless check in visit_store_ssbo()

2019-03-13 Thread Samuel Pitoiset
Trivial.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c77547c319b..247bd20a7ea 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1567,12 +1567,9 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
}
data = extract_vector_range(>ac, base_data, start, count);
 
-   if (start == 0) {
-   offset = base_offset;
-   } else {
-   offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
- LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
-   }
+   offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
+ LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
+
if (num_bytes == 2) {
store_name = "llvm.amdgcn.tbuffer.store.i32";
data_type = ctx->ac.i32;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 07/11] ac/nir: use ac_build_buffer_store_dword() for SSBO store operations

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 23 +--
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 949a7a74834..5aad5807bb5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1591,34 +1591,29 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
   ctx->ac.voidt, tbuffer_params, 10,
   
ac_get_store_intr_attribs(writeonly_memory));
} else {
+   int num_channels = num_bytes / 4;
+
switch (num_bytes) {
case 16: /* v4f32 */
-   store_name = "llvm.amdgcn.buffer.store.v4f32";
data_type = ctx->ac.v4f32;
break;
case 8: /* v2f32 */
-   store_name = "llvm.amdgcn.buffer.store.v2f32";
data_type = ctx->ac.v2f32;
break;
case 4: /* f32 */
-   store_name = "llvm.amdgcn.buffer.store.f32";
data_type = ctx->ac.f32;
break;
default:
unreachable("Malformed vector store.");
}
data = LLVMBuildBitCast(ctx->ac.builder, data, 
data_type, "");
-   LLVMValueRef params[] = {
-   data,
-   rsrc,
-   ctx->ac.i32_0, /* vindex */
-   offset,
-   glc,
-   ctx->ac.i1false,  /* slc */
-   };
-   ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, params, 6,
-  
ac_get_store_intr_attribs(writeonly_memory));
+
+   ac_build_buffer_store_dword(>ac, rsrc, data,
+   num_channels, offset,
+   ctx->ac.i32_0, 0,
+   cache_policy & ac_glc,
+   false, writeonly_memory,
+   false);
}
}
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 03/11] ac: add ac_build_buffer_store_format() helper

2019-03-13 Thread Samuel Pitoiset
Similar to ac_build_buffer_load_format().

v2: - fix out of bounds access

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 100 
 src/amd/common/ac_llvm_build.h  |  11 
 src/amd/common/ac_nir_to_llvm.c |  29 +++--
 3 files changed, 119 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index d21cd7e0e28..cd7f73aa0d4 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1082,6 +1082,106 @@ LLVMValueRef 
ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
 }
 
+static void
+ac_build_buffer_store_common(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef data,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+unsigned num_channels,
+bool glc,
+bool slc,
+bool writeonly_memory,
+bool use_format)
+{
+   LLVMValueRef args[] = {
+   data,
+   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+   vindex ? vindex : ctx->i32_0,
+   voffset,
+   LLVMConstInt(ctx->i1, glc, 0),
+   LLVMConstInt(ctx->i1, slc, 0)
+   };
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+   const char *type_names[] = {"f32", "v2f32", "v4f32"};
+   char name[256];
+
+   if (use_format) {
+   snprintf(name, sizeof(name), 
"llvm.amdgcn.buffer.store.format.%s",
+type_names[func]);
+   } else {
+   snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
+type_names[func]);
+   }
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args),
+  ac_get_store_intr_attribs(writeonly_memory));
+}
+
+static void
+ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef data,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  LLVMValueRef soffset,
+  unsigned num_channels,
+  bool glc,
+  bool slc,
+  bool writeonly_memory,
+  bool use_format,
+  bool structurized)
+{
+   LLVMValueRef args[6];
+   int idx = 0;
+   args[idx++] = data;
+   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+   if (structurized)
+   args[idx++] = vindex ? vindex : ctx->i32_0;
+   args[idx++] = voffset ? voffset : ctx->i32_0;
+   args[idx++] = soffset ? soffset : ctx->i32_0;
+   args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+   const char *type_names[] = {"f32", "v2f32", "v4f32"};
+   const char *indexing_kind = structurized ? "struct" : "raw";
+   char name[256];
+
+   if (use_format) {
+   snprintf(name, sizeof(name), 
"llvm.amdgcn.%s.buffer.store.format.%s",
+indexing_kind, type_names[func]);
+   } else {
+   snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
+indexing_kind, type_names[func]);
+   }
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+  ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef data,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+unsigned num_channels,
+bool glc,
+bool writeonly_memory)
+{
+   if (HAVE_LLVM >= 0x800) {
+   ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
+  voffset, NULL, num_channels,
+  glc, false, writeonly_memory,
+  true, true);
+   } else {
+   ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset,
+num_channels, glc, false,
+writeonly_memory, true);
+   }
+}
+
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by 
num_channels=1..4.
  * The type of 

[Mesa-dev] [PATCH v3 06/11] ac/nir: use ac_build_buffer_load() for SSBO load operations

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 35 ++---
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7f63b506b93..949a7a74834 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1704,7 +1704,6 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
int num_components = instr->num_components;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
unsigned cache_policy = get_cache_policy(ctx, access, false, false);
-   LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : 
ctx->ac.i1false;
 
LLVMValueRef offset = get_src(ctx, instr->src[1]);
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
@@ -1734,34 +1733,12 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
  immoffset,
  cache_policy & 
ac_glc);
} else {
-   const char *load_name;
-   LLVMTypeRef data_type;
-   switch (load_bytes) {
-   case 16:
-   case 12:
-   load_name = "llvm.amdgcn.buffer.load.v4f32";
-   data_type = ctx->ac.v4f32;
-   break;
-   case 8:
-   case 6:
-   load_name = "llvm.amdgcn.buffer.load.v2f32";
-   data_type = ctx->ac.v2f32;
-   break;
-   case 4:
-   load_name = "llvm.amdgcn.buffer.load.f32";
-   data_type = ctx->ac.f32;
-   break;
-   default:
-   unreachable("Malformed load buffer.");
-   }
-   LLVMValueRef params[] = {
-   rsrc,
-   vindex,
-   LLVMBuildAdd(ctx->ac.builder, offset, 
immoffset, ""),
-   glc,
-   ctx->ac.i1false,
-   };
-   ret = ac_build_intrinsic(>ac, load_name, 
data_type, params, 5, 0);
+   int num_channels = util_next_power_of_two(load_bytes) / 
4;
+
+   ret = ac_build_buffer_load(>ac, rsrc, num_channels,
+  vindex, offset, immoffset, 0,
+  cache_policy & ac_glc, false,
+  false, false);
}
 
LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, 
ac_get_type_size(LLVMTypeOf(ret)));
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 11/11] ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

2019-03-13 Thread Samuel Pitoiset
New buffer intrinsics have a separate soffset parameter.

v3: - use ac_build_raw_tbuffer_store()
v2: - use the raw version as vindex is not used

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 66 ++
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c86a4f98864..541ad75c877 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1227,59 +1227,45 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
if (!swizzle_enable_hint) {
LLVMValueRef offset = soffset;
 
-   static const char *types[] = {"f32", "v2f32", "v4f32"};
-
if (inst_offset)
offset = LLVMBuildAdd(ctx->builder, offset,
  LLVMConstInt(ctx->i32, 
inst_offset, 0), "");
-   if (voffset)
-   offset = LLVMBuildAdd(ctx->builder, offset, voffset, 
"");
-
-   LLVMValueRef args[] = {
-   ac_to_float(ctx, vdata),
-   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   ctx->i32_0,
-   offset,
-   LLVMConstInt(ctx->i1, glc, 0),
-   LLVMConstInt(ctx->i1, slc, 0),
-   };
-
-   char name[256];
-   snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
-types[CLAMP(num_channels, 1, 3) - 1]);
 
-   ac_build_intrinsic(ctx, name, ctx->voidt,
-  args, ARRAY_SIZE(args),
-  ac_get_store_intr_attribs(writeonly_memory));
+   if (HAVE_LLVM >= 0x800) {
+   ac_build_llvm8_buffer_store_common(ctx, rsrc,
+  ac_to_float(ctx, 
vdata),
+  ctx->i32_0,
+  voffset, offset,
+  num_channels,
+  glc, slc,
+  writeonly_memory,
+  false, false);
+   } else {
+   if (voffset)
+   offset = LLVMBuildAdd(ctx->builder, offset, 
voffset, "");
+
+   ac_build_buffer_store_common(ctx, rsrc,
+ac_to_float(ctx, vdata),
+ctx->i32_0, offset,
+num_channels, glc, slc,
+writeonly_memory, false);
+   }
return;
}
 
-   static const unsigned dfmt[] = {
+   static const unsigned dfmts[] = {
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_DATA_FORMAT_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32_32
};
-   static const char *types[] = {"i32", "v2i32", "v4i32"};
-   LLVMValueRef args[] = {
-   vdata,
-   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   ctx->i32_0,
-   voffset ? voffset : ctx->i32_0,
-   soffset,
-   LLVMConstInt(ctx->i32, inst_offset, 0),
-   LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
-   LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
-   LLVMConstInt(ctx->i1, glc, 0),
-   LLVMConstInt(ctx->i1, slc, 0),
-   };
-   char name[256];
-   snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
-types[CLAMP(num_channels, 1, 3) - 1]);
+   unsigned dfmt = dfmts[num_channels - 1];
+   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+   LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
 
-   ac_build_intrinsic(ctx, name, ctx->voidt,
-  args, ARRAY_SIZE(args),
-  ac_get_store_intr_attribs(writeonly_memory));
+   ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+  immoffset, num_channels, dfmt, nfmt, glc,
+  slc, writeonly_memory);
 }
 
 static LLVMValueRef
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 10/11] ac: use new LLVM 8 intrinsic when storing 16-bit values

2019-03-13 Thread Samuel Pitoiset
vindex is always 0.

v3: use the raw version
v2: do not force enable IDXEN when unecessary

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 20 
 src/amd/common/ac_llvm_build.h  |  9 +
 src/amd/common/ac_nir_to_llvm.c | 25 -
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 99fb1ecc327..c86a4f98864 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1741,6 +1741,26 @@ ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
   writeonly_memory, false);
 }
 
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef vdata,
+LLVMValueRef voffset,
+LLVMValueRef soffset,
+bool glc,
+bool writeonly_memory)
+{
+   unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+   vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+   vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+   ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+  ctx->i32_0, 1, dfmt, nfmt, glc, false,
+  writeonly_memory);
+}
+
 /**
  * Set range metadata on an instruction.  This can only be used on load and
  * call instructions.  If you know an instruction can only produce the values
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index ae0bdbcfd1c..723aec53cbe 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -343,6 +343,15 @@ ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
  bool slc,
  bool can_speculate);
 
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef vdata,
+LLVMValueRef voffset,
+LLVMValueRef soffset,
+bool glc,
+bool writeonly_memory);
+
 void
 ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
  LLVMValueRef rsrc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5aad5807bb5..7a03e9c15b2 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1521,14 +1521,12 @@ static unsigned get_cache_policy(struct ac_nir_context 
*ctx,
 static void visit_store_ssbo(struct ac_nir_context *ctx,
  nir_intrinsic_instr *instr)
 {
-   const char *store_name;
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
int elem_size_bytes = ac_get_elem_bits(>ac, LLVMTypeOf(src_data)) 
/ 8;
unsigned writemask = nir_intrinsic_write_mask(instr);
enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool writeonly_memory = access & ACCESS_NON_READABLE;
unsigned cache_policy = get_cache_policy(ctx, access, false, 
writeonly_memory);
-   LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : 
ctx->ac.i1false;
 
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
get_src(ctx, instr->src[1]), true);
@@ -1571,25 +1569,10 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
  LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
 
if (num_bytes == 2) {
-   store_name = "llvm.amdgcn.tbuffer.store.i32";
-   data_type = ctx->ac.i32;
-   data = LLVMBuildBitCast(ctx->ac.builder, data, 
ctx->ac.i16, "");
-   data = LLVMBuildZExt(ctx->ac.builder, data, data_type, 
"");
-   LLVMValueRef tbuffer_params[] = {
-   data,
-   rsrc,
-   ctx->ac.i32_0, /* vindex */
-   offset,/* voffset */
-   ctx->ac.i32_0,
-   ctx->ac.i32_0,
-   LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 
16bit)
-   LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= 
uint)
-   glc,
-   ctx->ac.i1false,
-   };
-   ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, tbuffer_params, 10,
-  
ac_get_store_intr_attribs(writeonly_memory));
+   ac_build_tbuffer_store_short(>ac, 

[Mesa-dev] [PATCH v3 01/11] ac: make use of ac_get_store_intr_attribs() where possible

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 0cab4d4a9b5..d21cd7e0e28 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1150,9 +1150,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
ac_build_intrinsic(ctx, name, ctx->voidt,
   args, ARRAY_SIZE(args),
-  writeonly_memory ?
-  AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
-  AC_FUNC_ATTR_WRITEONLY);
+  ac_get_store_intr_attribs(writeonly_memory));
return;
}
 
@@ -1181,9 +1179,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
ac_build_intrinsic(ctx, name, ctx->voidt,
   args, ARRAY_SIZE(args),
-  writeonly_memory ?
-  AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
-  AC_FUNC_ATTR_WRITEONLY);
+  ac_get_store_intr_attribs(writeonly_memory));
 }
 
 static LLVMValueRef
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 08/11] ac: use new LLVM 8 intrinsics in ac_build_buffer_load()

2019-03-13 Thread Samuel Pitoiset
v3: - always use the raw version as vindex is unused
v2: - do not force enable IDXEN when unecessary

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index cd7f73aa0d4..1843bbbee5f 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1412,6 +1412,14 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
return ac_build_gather_values(ctx, result, num_channels);
}
 
+   if (HAVE_LLVM >= 0x0800) {
+   return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex,
+offset, ctx->i32_0,
+num_channels, glc, slc,
+can_speculate, false,
+false);
+   }
+
return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
   num_channels, glc, slc,
   can_speculate, false);
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 00/11] ac: use LLVM 8 buffer intrinsics everywhere

2019-03-13 Thread Samuel Pitoiset
Hi,

This small series makes use of new LLVM 8 buffer intrinsics.
No CTS regressions on GFX8 with LLVM 7, 8 and master.

v3: use different names (ie. struct vs raw) for IDXEN
V2: fix use of IDXEN for GFX9

Please review,
Thanks!

Samuel Pitoiset (11):
  ac: make use of ac_get_store_intr_attribs() where possible
  ac/nir: set attrib flags for SSBO and image store operations
  ac: add ac_build_buffer_store_format() helper
  ac/nir: remove one useless check in visit_store_ssbo()
  ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations
  ac/nir: use ac_build_buffer_load() for SSBO load operations
  ac/nir: use ac_build_buffer_store_dword() for SSBO store operations
  ac: use new LLVM 8 intrinsics in ac_build_buffer_load()
  ac: add ac_build_{struct,raw}_tbuffer_store() helpers
  ac: use new LLVM 8 intrinsic when storing 16-bit values
  ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

 src/amd/common/ac_llvm_build.c  | 325 +++-
 src/amd/common/ac_llvm_build.h  |  49 +
 src/amd/common/ac_nir_to_llvm.c | 184 +++---
 3 files changed, 402 insertions(+), 156 deletions(-)

-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: fix the NUM_RECORDS field for vertex bindings on GFX6/GFX7

2019-03-13 Thread Samuel Pitoiset
Since the driver now uses typed buffer loads, we don't have to
account for the format.

This fixes few CTS regressions on SI.

Fixes: a66b186bebf ("radv: use typed buffer loads for vertex input fetches")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c |  3 +--
 src/amd/vulkan/radv_pipeline.c   | 12 
 src/amd/vulkan/radv_private.h|  6 --
 3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 06806ed6fce..d14bb1093c5 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1990,7 +1990,6 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
(cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
cmd_buffer->state.pipeline->num_vertex_bindings &&
radv_get_shader(cmd_buffer->state.pipeline, 
MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
-   struct radv_vertex_elements_info *velems = 
_buffer->state.pipeline->vertex_elements;
unsigned vb_offset;
void *vb_ptr;
uint32_t i = 0;
@@ -2018,7 +2017,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 
S_008F04_STRIDE(stride);
if 
(cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
-   desc[2] = (buffer->size - offset - 
velems->format_size[i]) / stride + 1;
+   desc[2] = (buffer->size - offset) / stride + 1;
else
desc[2] = buffer->size - offset;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 7f2f96c540a..793508d15d6 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3531,18 +3531,6 @@ radv_compute_vertex_input_state(struct radv_pipeline 
*pipeline,
 {
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
-   struct radv_vertex_elements_info *velems = >vertex_elements;
-
-   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) 
{
-   const VkVertexInputAttributeDescription *desc =
-   _info->pVertexAttributeDescriptions[i];
-   unsigned loc = desc->location;
-   const struct vk_format_description *format_desc;
-
-   format_desc = vk_format_description(desc->format);
-
-   velems->format_size[loc] = format_desc->block.bits / 8;
-   }
 
for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
const VkVertexInputBindingDescription *desc =
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 39fa6110fde..5c6258a2952 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1341,10 +1341,6 @@ struct radv_prim_vertex_count {
uint8_t incr;
 };
 
-struct radv_vertex_elements_info {
-   uint32_t format_size[MAX_VERTEX_ATTRIBS];
-};
-
 struct radv_ia_multi_vgt_param_helpers {
uint32_t base;
bool partial_es_wave;
@@ -1371,8 +1367,6 @@ struct radv_pipeline {
uint32_t  ctx_cs_hash;
struct radeon_cmdbuf  ctx_cs;
 
-   struct radv_vertex_elements_info vertex_elements;
-
uint32_t binding_stride[MAX_VBS];
uint8_t  num_vertex_bindings;
 
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Is EVOC real or fake?

2019-03-13 Thread Alex Deucher
I don't remember seeing your message, but you can find details about EVoC here:
https://www.x.org/wiki/XorgEVoC/

Basically, it's similar to GSoC.  The prospective student picks a
project, gets involved in the project (to learn the code and make some
simple changes to show you understand it) and then pick something to
tackle, a new feature, etc.  You need to come up with a plan and find
a mentor.  If this is something you want to pursue, please pick a
project (mesa, xorg, etc.) and get to know the code base.  Ask
questions and decide what area you are interested in and what you want
to work on for your project.  Many projects have GSoC ideas pages that
you can review to see some possible ideas.

Alex

On Wed, Mar 13, 2019 at 10:50 AM Adarsh Khubchandani  wrote:
>
> Hello. I sent a message regarding guidance that I needed for EVOC to some 
> mentors, but no one seems to care or respond. How is someone, who is new to 
> X.org, supposed to get started learn and contribute to the development of the 
> community?
> Please guide me, if anyone is interested.
>
> ---
>
> Adarsh Khubchandani.
> askhubchandani.github.io
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium/docs: clarify set_sampler_views

2019-03-13 Thread Roland Scheidegger
Am 12.03.19 um 22:48 schrieb Rob Clark:
> On Tue, Mar 12, 2019 at 1:59 PM Roland Scheidegger  wrote:
>>
>> Am 12.03.19 um 16:16 schrieb Rob Clark:
>>> This previously was not called out clearly, but based on a survey of the
>>> code, it seems the expected behavior is to release the reference to any
>>> sampler views beyond the new range being bound.
>>
>> That isn't really true. This was designed to work like d3d10, where
>> other views are unmodified.
>> The cso code will actually unset all views which previously were set and
>> are above the num_views in the call (this wouldn't be necessary if the
>> pipe function itself would work like this).
>> However, it will only do this for fragment textures, and pass through
>> the parameters unmodified otherwise. Which means behavior might not be
>> very consistent for the different stages...
> 
> hmm, I did notice w/ deqp tests (which aren't so good at
> resetting/clearing state between tests), that I ended up w/ different
> # of sampler views bound (without changing freedreno to match the
> behavior of most of the other drivers).. I didn't really dig in that
> closely but it seemed like mesa/st wasn't clearing the additional
> previously bound textures.  Maybe I overlooked something, but that
> seemed wrong.
> 
> One way or another, I guess we should clarify and change the various
> drivers to have a common behavior because right now there two
> different behaviors and I guess it is at least confusing for new
> gallium driver writers (as it was for me and I've been at it for a
> while)

Yes, I agree with that, the current state there doesn't help anyone.

Roland


> BR,
> -R
> 
>>
>>
>>>
>>> I think radeonsi and freedreno were the only ones not doing this.  Which
>>> could probably temporarily leak a bit of memory by holding on to the
>>> sampler view reference.
>> Not sure about other drivers, but llvmpipe will not do this neither.
>>
>> Roland
>>
>>
>>>
>>> Signed-off-by: Rob Clark 
>>> ---
>>>  src/gallium/docs/source/context.rst | 3 +++
>>>  1 file changed, 3 insertions(+)
>>>
>>> diff --git a/src/gallium/docs/source/context.rst 
>>> b/src/gallium/docs/source/context.rst
>>> index f89d9e1005e..199d335f8f4 100644
>>> --- a/src/gallium/docs/source/context.rst
>>> +++ b/src/gallium/docs/source/context.rst
>>> @@ -143,6 +143,9 @@ to the array index which is used for sampling.
>>>to a respective sampler view and releases a reference to the previous
>>>sampler view.
>>>
>>> +  Previously bound samplers with index ``>= num_views`` are unbound rather
>>> +  than unmodified.
>>> +
>>>  * ``create_sampler_view`` creates a new sampler view. ``texture`` is 
>>> associated
>>>with the sampler view which results in sampler view holding a reference
>>>to the texture. Format specified in template must be compatible
>>>
>>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109599] small shadows are not drawn in Heroes of the Storm

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109599

--- Comment #14 from tempel.jul...@gmail.com ---
Sure, no problem. Upload is complete now.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109599] small shadows are not drawn in Heroes of the Storm

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109599

--- Comment #13 from tempel.jul...@gmail.com ---
Created attachment 143652
  --> https://bugs.freedesktop.org/attachment.cgi?id=143652=edit
new renderdoc capture part 3

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109599] small shadows are not drawn in Heroes of the Storm

2019-03-13 Thread bugzilla-daemon


[Mesa-dev] [Bug 109599] small shadows are not drawn in Heroes of the Storm

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109599

--- Comment #11 from tempel.jul...@gmail.com ---
Created attachment 143650
  --> https://bugs.freedesktop.org/attachment.cgi?id=143650=edit
new renderdoc capture part 1

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110035] AMDGPU and AMDGPU-PRO driver installation fails on Ubuntu 18.04.2 (Radeon VII)

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110035

Michel Dänzer  changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
Product|Mesa|DRI
 QA Contact|mesa-dev@lists.freedesktop. |
   |org |
  Component|Other   |DRM/AMDgpu-pro

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/9] anv: Added support for non-dynamic sample locations on Gen8+

2019-03-13 Thread Jason Ekstrand
On Wed, Mar 13, 2019 at 8:43 AM Eleni Maria Stea  wrote:

> On Wed, 13 Mar 2019 08:16:10 -0500
> Jason Ekstrand  wrote:
>
> > On Mon, Mar 11, 2019 at 10:05 AM Eleni Maria Stea 
> > wrote:
> >
> > > Allowing the user to set custom sample locations non-dynamically, by
> > > filling the extension structs and chaining them to the pipeline
> > > structs according to the Vulkan specification section [26.5. Custom
> > > Sample Locations]
>
> [...]
>
> > > +void
> > > +anv_calc_sample_locations(struct anv_sample *samples,
> > > +  uint32_t num_samples,
> > > +  const VkSampleLocationsInfoEXT *info)
> > > +{
> > > +   int i;
> > > +
> > > +   for(i = 0; i < num_samples; i++) {
> > > +  float dx, dy;
> > > +
> > > +  /* this is because the grid is 1x1, in case that
> > > +   * we support different grid sizes in the future
> > > +   * this must be changed.
> > > +   */
> > > +  samples[i].offs_x = info->pSampleLocations[i].x;
> > > +  samples[i].offs_y = info->pSampleLocations[i].y;
> > > +
> > > +  /* distance from the center */
> > > +  dx = samples[i].offs_x - 0.5;
> > > +  dy = samples[i].offs_y - 0.5;
> > > +
> > > +  samples[i].radius = dx * dx + dy * dy;
> > > +   }
> > > +
> > > +   qsort(samples, num_samples, sizeof *samples, compare_samples);
> > >
> >
> > Are we allowed to re-order the samples like this?  The spec says:
> >
> > The sample location for sample i at the pixel grid location (x,y) is
> > taken from pSampleLocations[(x + y * sampleLocationGridSize.width) *
> > sampleLocationsPerPixel + i]
> >
> > Which leads me to think that they expect the ordering of samples to be
> > respected.  Yes, I know the HW docs say we're supposed to order them
> > from nearest to furthest.  However, AFAIK, that's only so we get nice
> > centroids and I don't know that it's actually required.
> >
> > --Jason
>
> I wasn't sure about this to be honest. I could remove the qsort and
> explain why we decided to ignore the PRM in a comment for the case that
> someone decides to put this back in the future.
>

I think we're better off ignoring the hardware and adding a comment
something like ths:

The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says:

   "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
MSRASTMODE_xxx_PATTERN), the
   order of the samples 0 to 3 (or 7 for 8X, or 15 for 16X) must have
monotonically increasing distance from the
   pixel center. This is required to get the correct centroid computation
in the device."

However, the Vulkan spec seems to require that the the samples occur in the
order provided through the API.  The standard sample patterns have the
above property that they have monotonically increasing distances from the
center but client-provided ones do not.  As long as this only affects
centroid calculations as the docs say, we should be ok because OpenGL and
Vulkan only require that the centroid be some lit sample and that it's the
same for all samples in a pixel; they have no requirement that it be the
one closest to center.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] gallium/auxiliary/vl: Change weave compute shader implementation

2019-03-13 Thread Liu, Leo

On 3/12/19 11:12 AM, Zhu, James wrote:
> Use 2D_ARRARY instead of RECT to fetch texels for weave compute
> shader.
>
> Problem 2,3: Fixed interpolation issue with weave de-interlace
>
> Fixes: 9364d66cb7f7 (Add video compositor compute shader render)
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109646

The series are:

Acked-by: Leo Liu 



> Signed-off-by: James Zhu 
> Tested-by: Bruno Milreu 
> ---
>   src/gallium/auxiliary/vl/vl_compositor_cs.c | 79 
> ++---
>   1 file changed, 62 insertions(+), 17 deletions(-)
>
> diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c 
> b/src/gallium/auxiliary/vl/vl_compositor_cs.c
> index de0a3c7..bad7d5f 100644
> --- a/src/gallium/auxiliary/vl/vl_compositor_cs.c
> +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c
> @@ -113,15 +113,16 @@ const char *compute_shader_weave =
> "DCL SV[1], BLOCK_ID\n"
>   
> "DCL CONST[0..5]\n"
> -  "DCL SVIEW[0..2], RECT, FLOAT\n"
> +  "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
> "DCL SAMP[0..2]\n"
>   
> "DCL IMAGE[0], 2D, WR\n"
> -  "DCL TEMP[0..9]\n"
> +  "DCL TEMP[0..15]\n"
>   
> "IMM[0] UINT32 { 8, 8, 1, 0}\n"
> "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
> "IMM[2] UINT32 { 1, 2, 4, 0}\n"
> +  "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
>   
> "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
>   
> @@ -137,26 +138,70 @@ const char *compute_shader_weave =
>/* Translate */
>"UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
>   
> - /* Texture layer */
> - "UMOD TEMP[2].z, TEMP[2]., IMM[2].\n"
> - "UMOD TEMP[3].z, TEMP[2]., IMM[2].\n"
> - "USHR TEMP[3].z, TEMP[3]., IMM[2].\n"
> + /* Top Y */
> + "U2F TEMP[2], TEMP[2]\n"
> + "DIV TEMP[2].y, TEMP[2]., IMM[1].\n"
> + /* Down Y */
> + "MOV TEMP[12], TEMP[2]\n"
> +
> + /* Top UV */
> + "MOV TEMP[3], TEMP[2]\n"
> + "DIV TEMP[3].xy, TEMP[3], IMM[1].\n"
> + /* Down UV */
> + "MOV TEMP[13], TEMP[3]\n"
> +
> + /* Texture offset */
> + "ADD TEMP[2].x, TEMP[2]., IMM[3].\n"
> + "ADD TEMP[2].y, TEMP[2]., IMM[3].\n"
> + "ADD TEMP[12].x, TEMP[12]., IMM[3].\n"
> + "ADD TEMP[12].y, TEMP[12]., IMM[3].\n"
> +
> + "ADD TEMP[3].x, TEMP[3]., IMM[3].\n"
> + "ADD TEMP[3].y, TEMP[3]., IMM[3].\n"
> + "ADD TEMP[13].x, TEMP[13]., IMM[3].\n"
> + "ADD TEMP[13].y, TEMP[13]., IMM[3].\n"
>   
> - "USHR TEMP[2].y, TEMP[2], IMM[2].\n"
> - "USHR TEMP[3].xy, TEMP[2], IMM[2].\n"
> + /* Scale */
> + "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
> + "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
> + "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
> + "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
>   
> - "U2F TEMP[4], TEMP[2]\n"
> - "U2F TEMP[5], TEMP[3]\n"
> + /* Weave offset */
> + "ADD TEMP[2].y, TEMP[2]., IMM[3].\n"
> + "ADD TEMP[12].y, TEMP[12]., -IMM[3].\n"
> + "ADD TEMP[3].y, TEMP[3]., IMM[3].\n"
> + "ADD TEMP[13].y, TEMP[13]., -IMM[3].\n"
>   
> - /* Scale */
> - "DIV TEMP[4], TEMP[4], CONST[3].zwzw\n"
> - "DIV TEMP[5], TEMP[5], CONST[3].zwzw\n"
> + /* Texture layer */
> + "MOV TEMP[14].x, TEMP[2].\n"
> + "MOV TEMP[14].yz, TEMP[3].\n"
> + "ROUND TEMP[15], TEMP[14]\n"
> + "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
> + "MOV TEMP[14], |TEMP[14]|\n"
> + "MUL TEMP[14], TEMP[14], IMM[1].\n"
> +
> + /* Normalize */
> + "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
> + "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
> + "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].\n"
> + "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
> + "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
>   
>/* Fetch texels */
> - "TEX_LZ TEMP[6].x, TEMP[4], SAMP[0], RECT\n"
> - "TEX_LZ TEMP[6].y, TEMP[5], SAMP[1], RECT\n"
> - "TEX_LZ TEMP[6].z, TEMP[5], SAMP[2], RECT\n"
> -
> + "MOV TEMP[2].z, IMM[1].\n"
> + "MOV TEMP[3].z, IMM[1].\n"
> + "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
> + "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
> + "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
> +
> + "MOV TEMP[12].z, IMM[1].\n"
> + "MOV TEMP[13].z, IMM[1].\n"
> + "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
> + "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
> + "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
> +
> + "LRP TEMP[6], TEMP[14], TEMP[11], TEMP[10]\n"
>"MOV TEMP[6].w, IMM[1].\n"
>   
>/* Color Space Conversion */

[Mesa-dev] [Bug 110035] AMDGPU and AMDGPU-PRO driver installation fails on Ubuntu 18.04.2 (Radeon VII)

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110035

Bug ID: 110035
   Summary: AMDGPU and AMDGPU-PRO driver installation fails on
Ubuntu 18.04.2 (Radeon VII)
   Product: Mesa
   Version: unspecified
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: hrushev...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

I have a freshly-installed Ubuntu 18.04.2 on my machine with a Radeon VII and I
want to install AMDGPU-PRO drivers on it.

The driver installation completes without problems, but after I reboot the
system, I can't log in : the system performs cyclic filesystem checks without
proceeding to the login screen.

I have verified that it is the AMDGPU-PRO driver which causes the issue: when I
go to the second constole (with no graphics) and run amdgpu-pro-uninstall and
the reboot, the system works fine again.

I have tried Linux kernels 4.18.15-generic and 4.15.46 generic - the behavior
is the same.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Is EVOC real or fake?

2019-03-13 Thread Adarsh Khubchandani
Hello. I sent a message regarding guidance that I needed for EVOC to some 
mentors, but no one seems to care or respond. How is someone, who is new to 
X.org, supposed to get started learn and contribute to the development of the 
community?
Please guide me, if anyone is interested.


---


Adarsh Khubchandani.

askhubchandani.github.io
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering

2019-03-13 Thread Daniel P . Berrangé
On Wed, Mar 13, 2019 at 02:29:28PM +0100, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Mar 13, 2019 at 8:53 AM Mathias Fröhlich
>  wrote:
> >
> > Marek, Marc-Andre,
> >
> > On Wednesday, 13 March 2019 00:03:26 CET Marek Olšák wrote:
> > > The env var workaround is fine.
> > >
> > > Thread affinity is used for cache topology related optimizations. I think
> > > it's a mistake to treat it only as a resource allocation tool.
> >
> > For a shorter term solution to the problem.
> > One Idea that comes into my mind:
> >
> > Can we check the currently set thread affinity mask if it still contains the
> > cpu we are aiming for and narrow the mask down to our cpu if we can do
> > that by narrowing. If we would need to assign our thread to a cpu that
> > we are not bound anymore just do nothing.
> >
> 
> getaffinity() is also blocked by current qemu policy.

I think we could consider that a bug. Blocking this syscall while still
allowing read of /proc/self/status achieves little from a security pov
as the affinity is still visible. It is just protecting against a bug
in the impl of getaffinity in the kernel which is unlikely to be worth
caring about & a bug in /proc impl is probably more likely! 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109599] small shadows are not drawn in Heroes of the Storm

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109599

--- Comment #10 from Samuel Pitoiset  ---
Can you record a fresh renderdoc capture please?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [MR] anv: fix use after free when copying nir_xfb_info

2019-03-13 Thread apinheiro

https://gitlab.freedesktop.org/mesa/mesa/merge_requests/442

After adding varyings to nir_xfb_info, I added varyings as a pointer, 
and replaced outputs[0] for also a pointer, so now both needed to be 
allocated. But anv was copying such xfb info as: *xfb_info = *xfb_info_in


So after my changes, that line was assigning the outputs pointer, 
instead of copying. Then xfb_info_in was freed, and that included their 
outputs, that now are xfb_info outputs too. Unfourtunately, as as with 
other use after free crashes, the crash didn't happen always, just with 
some configurations. And that included not crashing on Intel CI.


Thanks @jasuarez  for pinging 
me with this issue.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109599] small shadows are not drawn in Heroes of the Storm

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109599

--- Comment #9 from tempel.jul...@gmail.com ---
Updated screenshots with the latest game update including its content change:

radv-git (some small shadows missing):
https://abload.de/img/screenshot_20190313_1esjw0.png

amdvlk (no shadows missing):
https://abload.de/img/screenshot_20190313_18djlz.png

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/9] anv: Added support for non-dynamic sample locations on Gen8+

2019-03-13 Thread Eleni Maria Stea
On Wed, 13 Mar 2019 08:16:10 -0500
Jason Ekstrand  wrote:

> On Mon, Mar 11, 2019 at 10:05 AM Eleni Maria Stea 
> wrote:
> 
> > Allowing the user to set custom sample locations non-dynamically, by
> > filling the extension structs and chaining them to the pipeline
> > structs according to the Vulkan specification section [26.5. Custom
> > Sample Locations]

[...]

> > +void
> > +anv_calc_sample_locations(struct anv_sample *samples,
> > +  uint32_t num_samples,
> > +  const VkSampleLocationsInfoEXT *info)
> > +{
> > +   int i;
> > +
> > +   for(i = 0; i < num_samples; i++) {
> > +  float dx, dy;
> > +
> > +  /* this is because the grid is 1x1, in case that
> > +   * we support different grid sizes in the future
> > +   * this must be changed.
> > +   */
> > +  samples[i].offs_x = info->pSampleLocations[i].x;
> > +  samples[i].offs_y = info->pSampleLocations[i].y;
> > +
> > +  /* distance from the center */
> > +  dx = samples[i].offs_x - 0.5;
> > +  dy = samples[i].offs_y - 0.5;
> > +
> > +  samples[i].radius = dx * dx + dy * dy;
> > +   }
> > +
> > +   qsort(samples, num_samples, sizeof *samples, compare_samples);
> >  
> 
> Are we allowed to re-order the samples like this?  The spec says:
> 
> The sample location for sample i at the pixel grid location (x,y) is
> taken from pSampleLocations[(x + y * sampleLocationGridSize.width) *
> sampleLocationsPerPixel + i]
> 
> Which leads me to think that they expect the ordering of samples to be
> respected.  Yes, I know the HW docs say we're supposed to order them
> from nearest to furthest.  However, AFAIK, that's only so we get nice
> centroids and I don't know that it's actually required.
> 
> --Jason

I wasn't sure about this to be honest. I could remove the qsort and
explain why we decided to ignore the PRM in a comment for the case that
someone decides to put this back in the future.

Thanks a lot for reviewing the series, BTW. I am working on the
changes for all patches.

Eleni

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering

2019-03-13 Thread Marc-André Lureau
Hi

On Wed, Mar 13, 2019 at 8:53 AM Mathias Fröhlich
 wrote:
>
> Marek, Marc-Andre,
>
> On Wednesday, 13 March 2019 00:03:26 CET Marek Olšák wrote:
> > The env var workaround is fine.
> >
> > Thread affinity is used for cache topology related optimizations. I think
> > it's a mistake to treat it only as a resource allocation tool.
>
> For a shorter term solution to the problem.
> One Idea that comes into my mind:
>
> Can we check the currently set thread affinity mask if it still contains the
> cpu we are aiming for and narrow the mask down to our cpu if we can do
> that by narrowing. If we would need to assign our thread to a cpu that
> we are not bound anymore just do nothing.
>

getaffinity() is also blocked by current qemu policy.

It should be possible to allow a narrower setaffinity(), with some complex rule.

> That would obviously require that we can still call into 
> pthread_setaffinity_np
> without being just killed straight away because we touch something that
> somebody else wants to control. And that we even succeed in just narrowing
> down the allowed set of cpus.
> Marc-Andre, would that still work with qemu then?

For now, Daniel proposed "seccomp: don't kill process for resource
control syscalls": the resource control syscalls will return -1/EPERM.

>
> Of course this still leaves a small race condition open if somebody changes 
> the
> affinitiy mask of the current thread in between our call to 
> pthread_getaffinity_np
> and pthread_setaffinity_np from the outside of our linux task. Then we may
> experience a non narrowing set affinity operation anymore because of an other 
> set
> operation that came in between and we may get killed then.
> ... which is an other argument against just killing. But ok ...
> IMO this condition happens sufficiently seldom to accept that.
>
> Could that solve our problem??
>
> best
> Mathias
>
>
> >
> > Marek
> >
> > On Tue, Mar 12, 2019, 1:59 AM Marc-André Lureau 
> > wrote:
> >
> > > Hi
> > >
> > > On Fri, Mar 1, 2019 at 12:13 PM Mathias Fröhlich
> > >  wrote:
> > > >
> > > > On Friday, 1 March 2019 12:15:08 CET Eero Tamminen wrote:
> > > > > Hi,
> > > > >
> > > > > On 1.3.2019 11.12, Michel Dänzer wrote:
> > > > > > On 2019-02-28 8:41 p.m., Marek Olšák wrote:
> > > > > >>> On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen <
> > > eero.t.tammi...@intel.com>
> > > > >  Why distro versions of Qemu filter sched_setaffinity() syscall?
> > > > > >>>
> > > > > >>> (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889)
> > > > > >>>
> > > > > >>> Daniel Berrange (berrange) wrote on 2019-02-27: #19
> > > > > >>>
> > > > > >>> "IMHO that mesa change is not valid. It is settings its affinity 
> > > > > >>> to
> > > > > >>> run on all threads which is definitely *NOT* something we want to
> > > be
> > > > > >>> allowed. Management applications want to control which CPUs QEMU
> > > runs
> > > > > >>> on, and as such Mesa should honour the CPU placement that the QEMU
> > > > > >>> process has.
> > > > > >>>
> > > > > >>> This is a great example of why QEMU wants to use seccomp to block
> > > > > >>> affinity changes to prevent something silently trying to use more
> > > CPUs
> > > > > >>> than are assigned to this QEMU."
> > > > > >>>
> > > > > >>
> > > > > >> Mesa uses thread affinity to optimize memory access performance on
> > > some
> > > > > >> CPUs (see util_pin_thread_to_L3). Other places in Mesa need to
> > > restore the
> > > > > >> original thread affinity for some child threads. Additionally, if
> > > games
> > > > > >> limit the thread affinity, Mesa needs to restore the full thread
> > > affinity
> > > > > >> for some of its child threads.
> > > > > >
> > > > > > The last part sounds like Mesa clearly overstepping its authority.
> > > > > >
> > > > > >
> > > > > >> In essence, the thread affinity should only be considered a hint
> > > for the
> > > > > >> kernel for optimal performance. There is no reason to kill the
> > > process if
> > > > > >> it's disallowed. Just ignore the call or modify the thread mask to
> > > make it
> > > > > >> legal.
> > > > > >
> > > > > > The fundamental issue here is that Mesa is using the thread affinity
> > > API
> > > > > > for something else than it's intended for. If there was an API for
> > > what
> > > > > > Mesa wants (encouraging certain sets of threads to run on
> > > topologically
> > > > > > close cores), there should be no need to block that.
> > > > >
> > > > > Why such process needs to be killed instead the request being masked
> > > > > suitably, is there some program that breaks subtly if affinity request
> > > > > is masked (and that being worse than the program being killed)?
> > > >
> > > > But that is still a situation that could be nicely handled with a
> > > > EPERM error return. Way better than just kill a process.
> > > > That 'badly affected' program still can call abort then.
> > > > But nicely working programs don't get just killed then!!
> > >
> > >
> > > Returning an error seems less 

Re: [Mesa-dev] [PATCH 4/9] anv: Added support for non-dynamic sample locations on Gen8+

2019-03-13 Thread Jason Ekstrand
On Mon, Mar 11, 2019 at 10:05 AM Eleni Maria Stea  wrote:

> Allowing the user to set custom sample locations non-dynamically, by
> filling the extension structs and chaining them to the pipeline structs
> according to the Vulkan specification section [26.5. Custom Sample
> Locations]
> for the following structures:
>
> 'VkPipelineSampleLocationsStateCreateInfoEXT'
> 'VkSampleLocationsInfoEXT'
> 'VkSampleLocationEXT'
>
> Once custom locations are used, the default locations are lost and need to
> be
> re-emitted again in the next pipeline creation. For that, we emit the
> 3DSTATE_SAMPLE_PATTERN at every pipeline creation.
> ---
>  src/intel/common/gen_sample_positions.h | 53 
>  src/intel/vulkan/anv_genX.h |  5 ++
>  src/intel/vulkan/anv_private.h  |  9 +++
>  src/intel/vulkan/anv_sample_locations.c | 38 +++-
>  src/intel/vulkan/anv_sample_locations.h | 29 +
>  src/intel/vulkan/genX_pipeline.c| 80 +
>  src/intel/vulkan/genX_state.c   | 59 ++
>  7 files changed, 259 insertions(+), 14 deletions(-)
>  create mode 100644 src/intel/vulkan/anv_sample_locations.h
>
> diff --git a/src/intel/common/gen_sample_positions.h
> b/src/intel/common/gen_sample_positions.h
> index da48dcb5ed0..e8af2a552dc 100644
> --- a/src/intel/common/gen_sample_positions.h
> +++ b/src/intel/common/gen_sample_positions.h
> @@ -160,4 +160,57 @@ prefix##14YOffset  = 0.9375; \
>  prefix##15XOffset  = 0.0625; \
>  prefix##15YOffset  = 0.;
>
> +/* Examples:
> + * in case of GEN_GEN < 8:
> + * SET_SAMPLE_POS(ms.Sample, 0); expands to:
> + *ms.Sample0XOffset = anv_samples[0].offs_x;
> + *ms.Sample0YOffset = anv_samples[0].offs_y;
> + *
> + * in case of GEN_GEN >= 8:
> + * SET_SAMPLE_POS(sp._16xSample, 0); expands to:
> + *sp._16xSample0XOffset = anv_samples[0].offs_x;
> + *sp._16xSample0YOffset = anv_samples[0].offs_y;
> + */
> +#define SET_SAMPLE_POS(prefix, sample_idx) \
> +prefix##sample_idx##XOffset = anv_samples[sample_idx].offs_x; \
> +prefix##sample_idx##YOffset = anv_samples[sample_idx].offs_y;
> +
> +#define SET_SAMPLE_POS_2X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1);
> +
> +#define SET_SAMPLE_POS_4X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1); \
> +SET_SAMPLE_POS(prefix, 2); \
> +SET_SAMPLE_POS(prefix, 3);
> +
> +#define SET_SAMPLE_POS_8X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1); \
> +SET_SAMPLE_POS(prefix, 2); \
> +SET_SAMPLE_POS(prefix, 3); \
> +SET_SAMPLE_POS(prefix, 4); \
> +SET_SAMPLE_POS(prefix, 5); \
> +SET_SAMPLE_POS(prefix, 6); \
> +SET_SAMPLE_POS(prefix, 7);
> +
> +#define SET_SAMPLE_POS_16X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1); \
> +SET_SAMPLE_POS(prefix, 2); \
> +SET_SAMPLE_POS(prefix, 3); \
> +SET_SAMPLE_POS(prefix, 4); \
> +SET_SAMPLE_POS(prefix, 5); \
> +SET_SAMPLE_POS(prefix, 6); \
> +SET_SAMPLE_POS(prefix, 7); \
> +SET_SAMPLE_POS(prefix, 8); \
> +SET_SAMPLE_POS(prefix, 9); \
> +SET_SAMPLE_POS(prefix, 10); \
> +SET_SAMPLE_POS(prefix, 11); \
> +SET_SAMPLE_POS(prefix, 12); \
> +SET_SAMPLE_POS(prefix, 13); \
> +SET_SAMPLE_POS(prefix, 14); \
> +SET_SAMPLE_POS(prefix, 15);
> +
>  #endif /* GEN_SAMPLE_POSITIONS_H */
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index 8fd32cabf1e..52415c04a45 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -88,3 +88,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer
> *cmd_buffer,
>
>  void genX(blorp_exec)(struct blorp_batch *batch,
>const struct blorp_params *params);
> +
> +void genX(emit_sample_locations)(struct anv_batch *batch,
> + uint32_t num_samples,
> + const VkSampleLocationsInfoEXT *sl,
> + bool custom_locations);
> diff --git a/src/intel/vulkan/anv_private.h
> b/src/intel/vulkan/anv_private.h
> index 5905299e59d..981956e5706 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -71,6 +71,7 @@ struct anv_buffer;
>  struct anv_buffer_view;
>  struct anv_image_view;
>  struct anv_instance;
> +struct anv_sample;
>
>  struct gen_l3_config;
>
> @@ -165,6 +166,7 @@ struct gen_l3_config;
>  #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
>  #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
>  #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
> +#define MAX_SAMPLE_LOCATIONS 16
>
>  /* The kernel relocation API has a limitation of a 32-bit delta value
>   * applied to the address before it is written which, in spite of it being
> @@ -2086,6 +2088,13 @@ struct anv_push_constants {
> struct brw_image_param images[MAX_GEN8_IMAGES];
>  };
>
> +struct
> +anv_sample {
> +   float offs_x;
> +   float offs_y;
> +   float radius;
> +};
> +
>  struct anv_dynamic_state {
> struct {
>uint32_t  

Re: [Mesa-dev] [PATCH 4/9] anv: Added support for non-dynamic sample locations on Gen8+

2019-03-13 Thread Jason Ekstrand
On Mon, Mar 11, 2019 at 10:05 AM Eleni Maria Stea  wrote:

> Allowing the user to set custom sample locations non-dynamically, by
> filling the extension structs and chaining them to the pipeline structs
> according to the Vulkan specification section [26.5. Custom Sample
> Locations]
> for the following structures:
>
> 'VkPipelineSampleLocationsStateCreateInfoEXT'
> 'VkSampleLocationsInfoEXT'
> 'VkSampleLocationEXT'
>
> Once custom locations are used, the default locations are lost and need to
> be
> re-emitted again in the next pipeline creation. For that, we emit the
> 3DSTATE_SAMPLE_PATTERN at every pipeline creation.
> ---
>  src/intel/common/gen_sample_positions.h | 53 
>  src/intel/vulkan/anv_genX.h |  5 ++
>  src/intel/vulkan/anv_private.h  |  9 +++
>  src/intel/vulkan/anv_sample_locations.c | 38 +++-
>  src/intel/vulkan/anv_sample_locations.h | 29 +
>  src/intel/vulkan/genX_pipeline.c| 80 +
>  src/intel/vulkan/genX_state.c   | 59 ++
>  7 files changed, 259 insertions(+), 14 deletions(-)
>  create mode 100644 src/intel/vulkan/anv_sample_locations.h
>
> diff --git a/src/intel/common/gen_sample_positions.h
> b/src/intel/common/gen_sample_positions.h
> index da48dcb5ed0..e8af2a552dc 100644
> --- a/src/intel/common/gen_sample_positions.h
> +++ b/src/intel/common/gen_sample_positions.h
> @@ -160,4 +160,57 @@ prefix##14YOffset  = 0.9375; \
>  prefix##15XOffset  = 0.0625; \
>  prefix##15YOffset  = 0.;
>
> +/* Examples:
> + * in case of GEN_GEN < 8:
> + * SET_SAMPLE_POS(ms.Sample, 0); expands to:
> + *ms.Sample0XOffset = anv_samples[0].offs_x;
> + *ms.Sample0YOffset = anv_samples[0].offs_y;
> + *
> + * in case of GEN_GEN >= 8:
> + * SET_SAMPLE_POS(sp._16xSample, 0); expands to:
> + *sp._16xSample0XOffset = anv_samples[0].offs_x;
> + *sp._16xSample0YOffset = anv_samples[0].offs_y;
> + */
> +#define SET_SAMPLE_POS(prefix, sample_idx) \
> +prefix##sample_idx##XOffset = anv_samples[sample_idx].offs_x; \
> +prefix##sample_idx##YOffset = anv_samples[sample_idx].offs_y;
>

I'm not a huge fan of hanving anv-specific stuff in gen_sample_positions.h
and I also don't think I really like having the array be implicit in the
macro.  Maybe the best thing to do here would be to have a

struct gen_sample_position {
   float x_offset;
   float y_offset;
}

And then

#define GEN_SAMPLE_POS_ELEM(prefix, arr, sample_idx) \
prefix##sample_idx##XOffset = arr[sample_idx].x_offset; \
prefix##sample_idx##YOffset = arr[sample_idx].y_offset;

And

#define GEN_SAMPLE_POS_1X_ARRAY(prefix, arr) \
   SET_SAMPLE_POS_ELEM(prefix, arr, 0)

#define GEN_SAMPLE_POS_2X_ARRAY(prefix, arr) \
   SET_SAMPLE_POS_ELEM(prefix, arr, 0) \
   SET_SAMPLE_POS_ELEM(prefix, arr, 1)

etc.

+
> +#define SET_SAMPLE_POS_2X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1);
> +
> +#define SET_SAMPLE_POS_4X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1); \
> +SET_SAMPLE_POS(prefix, 2); \
> +SET_SAMPLE_POS(prefix, 3);
> +
> +#define SET_SAMPLE_POS_8X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1); \
> +SET_SAMPLE_POS(prefix, 2); \
> +SET_SAMPLE_POS(prefix, 3); \
> +SET_SAMPLE_POS(prefix, 4); \
> +SET_SAMPLE_POS(prefix, 5); \
> +SET_SAMPLE_POS(prefix, 6); \
> +SET_SAMPLE_POS(prefix, 7);
> +
> +#define SET_SAMPLE_POS_16X(prefix) \
> +SET_SAMPLE_POS(prefix, 0); \
> +SET_SAMPLE_POS(prefix, 1); \
> +SET_SAMPLE_POS(prefix, 2); \
> +SET_SAMPLE_POS(prefix, 3); \
> +SET_SAMPLE_POS(prefix, 4); \
> +SET_SAMPLE_POS(prefix, 5); \
> +SET_SAMPLE_POS(prefix, 6); \
> +SET_SAMPLE_POS(prefix, 7); \
> +SET_SAMPLE_POS(prefix, 8); \
> +SET_SAMPLE_POS(prefix, 9); \
> +SET_SAMPLE_POS(prefix, 10); \
> +SET_SAMPLE_POS(prefix, 11); \
> +SET_SAMPLE_POS(prefix, 12); \
> +SET_SAMPLE_POS(prefix, 13); \
> +SET_SAMPLE_POS(prefix, 14); \
> +SET_SAMPLE_POS(prefix, 15);
> +
>  #endif /* GEN_SAMPLE_POSITIONS_H */
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index 8fd32cabf1e..52415c04a45 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -88,3 +88,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer
> *cmd_buffer,
>
>  void genX(blorp_exec)(struct blorp_batch *batch,
>const struct blorp_params *params);
> +
> +void genX(emit_sample_locations)(struct anv_batch *batch,
> + uint32_t num_samples,
> + const VkSampleLocationsInfoEXT *sl,
> + bool custom_locations);
> diff --git a/src/intel/vulkan/anv_private.h
> b/src/intel/vulkan/anv_private.h
> index 5905299e59d..981956e5706 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -71,6 +71,7 @@ struct anv_buffer;
>  struct anv_buffer_view;
>  struct anv_image_view;
>  struct anv_instance;
> +struct anv_sample;
>
>  struct gen_l3_config;
>
> 

[Mesa-dev] [PATCH 2/2] ac: use the raw tbuffer version when for 16-bit SSBO loads

2019-03-13 Thread Samuel Pitoiset
vindex is always 0.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 6 +++---
 src/amd/common/ac_llvm_build.h  | 1 -
 src/amd/common/ac_nir_to_llvm.c | 2 --
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2d873d79102..0cab4d4a9b5 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1494,7 +1494,6 @@ ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
-   LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
@@ -1504,8 +1503,9 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
LLVMValueRef res;
 
-   res = ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
-   immoffset, 1, dfmt, nfmt, glc, false, 
false, true);
+   res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
+   immoffset, 1, dfmt, nfmt, glc, false,
+   false);
 
return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
 }
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index c6475320990..55068169a34 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -300,7 +300,6 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct 
ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
-   LLVMValueRef vindex,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a7b3fdf64aa..18297ed99b1 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1712,7 +1712,6 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
if (load_bytes == 2) {
ret = ac_build_tbuffer_load_short(>ac,
  rsrc,
- vindex,
  offset,
  ctx->ac.i32_0,
  immoffset,
@@ -1783,7 +1782,6 @@ static LLVMValueRef visit_load_ubo_buffer(struct 
ac_nir_context *ctx,
for (unsigned i = 0; i < num_components; ++i) {
results[i] = ac_build_tbuffer_load_short(>ac,
 rsrc,
-ctx->ac.i32_0,
 offset,
 ctx->ac.i32_0,
 
LLVMConstInt(ctx->ac.i32, 2 * i, 0),
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] ac: add ac_build_{struct, raw}_tbuffer_load() helpers

2019-03-13 Thread Samuel Pitoiset
The struct version sets IDXEN=1, while the raw version sets IDXEN=0.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c| 48 +++
 src/amd/common/ac_llvm_build.h| 37 
 src/amd/vulkan/radv_nir_to_llvm.c | 13 +
 3 files changed, 75 insertions(+), 23 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index b9eaf26f603..2d873d79102 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1407,7 +1407,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
  ac_get_load_intr_attribs(can_speculate));
 }
 
-LLVMValueRef
+static LLVMValueRef
 ac_build_tbuffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
@@ -1419,7 +1419,8 @@ ac_build_tbuffer_load(struct ac_llvm_context *ctx,
unsigned nfmt,
bool glc,
bool slc,
-   bool can_speculate)
+   bool can_speculate,
+   bool structurized) /* only matters for LLVM 8+ */
 {
if (HAVE_LLVM >= 0x800) {
voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
@@ -1427,12 +1428,12 @@ ac_build_tbuffer_load(struct ac_llvm_context *ctx,
return ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
   soffset, num_channels,
   dfmt, nfmt, glc, slc,
-  can_speculate, true);
+  can_speculate, structurized);
}
 
LLVMValueRef args[] = {
rsrc,
-   vindex,
+   vindex ? vindex : ctx->i32_0,
voffset,
soffset,
immoffset,
@@ -1453,6 +1454,43 @@ ac_build_tbuffer_load(struct ac_llvm_context *ctx,
  ac_get_load_intr_attribs(can_speculate));
 }
 
+LLVMValueRef
+ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+LLVMValueRef soffset,
+LLVMValueRef immoffset,
+unsigned num_channels,
+unsigned dfmt,
+unsigned nfmt,
+bool glc,
+bool slc,
+bool can_speculate)
+{
+   return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
+immoffset, num_channels, dfmt, nfmt, glc,
+slc, can_speculate, true);
+}
+
+LLVMValueRef
+ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool can_speculate)
+{
+   return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
+immoffset, num_channels, dfmt, nfmt, glc,
+slc, can_speculate, false);
+}
+
 LLVMValueRef
 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
@@ -1467,7 +1505,7 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef res;
 
res = ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
-   immoffset, 1, dfmt, nfmt, glc, false, 
false);
+   immoffset, 1, dfmt, nfmt, glc, false, 
false, true);
 
return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
 }
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index d746c864229..c6475320990 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -307,18 +307,31 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
bool glc);
 
 LLVMValueRef
-ac_build_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
-

Re: [Mesa-dev] [PATCH v2 2/9] anv: Set the values for the VkPhysicalDeviceSampleLocationsPropertiesEXT

2019-03-13 Thread Jason Ekstrand
On Tue, Mar 12, 2019 at 5:35 AM Eleni Maria Stea  wrote:

> The VkPhysicalDeviceSampleLocationPropertiesEXT struct is filled with
> implementation dependent values and according to the table from the
> Vulkan Specification section [36.1. Limit Requirements]:
>
> pname | max | min
> pname:sampleLocationSampleCounts   |-
> |ename:VK_SAMPLE_COUNT_4_BIT
> pname:maxSampleLocationGridSize|-|(1, 1)
> pname:sampleLocationCoordinateRange|(0.0, 0.9375)|(0.0, 0.9375)
> pname:sampleLocationSubPixelBits   |-|4
> pname:variableSampleLocations  | false   |implementation dependent
>
> The hardware only supports setting the same sample location for all the
> pixels, so we only support 1x1 grids.
>
> Also, variableSampleLocations is set to false because we don't support the
> feature.
>
> v2: 1- Replaced false with VK_FALSE for consistency. (Sagar Ghuge)
> 2- Used the isl_device_sample_count to take the number of samples
> per platform to avoid extra checks. (Sagar Ghuge)
>
> Reviewed-by: Sagar Ghuge 
> ---
>  src/intel/vulkan/anv_device.c  | 19 +++
>  src/intel/vulkan/anv_private.h |  3 +++
>  2 files changed, 22 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 729cceb3e32..bf6f03ebb1a 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1401,6 +1401,25 @@ void anv_GetPhysicalDeviceProperties2(
>   break;
>}
>
> +  case
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
> + VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
> +(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
> +
> + props->sampleLocationSampleCounts =
> +isl_device_get_sample_counts(>isl_dev);
> +
> + props->maxSampleLocationGridSize.width = SAMPLE_LOC_GRID_W;
> + props->maxSampleLocationGridSize.height = SAMPLE_LOC_GRID_H;
>

Here youhave #defines which you sue for the min/max but in a later patch,
you just hard-code 1.  I'm not sure the #defines are gaining us anything.
Why not just put in 1 and add a quick comment:

/* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */


> +
> + props->sampleLocationCoordinateRange[0] = 0;
> + props->sampleLocationCoordinateRange[1] = 0.9375;
> + props->sampleLocationSubPixelBits = 4;
> +
> + props->variableSampleLocations = VK_FALSE;
>

I just sent out a patch series to kill VK_TRUE/FALSE.  Sagar, sorry to
contradict.


> +
> + break;
> +  }
> +
>default:
>   anv_debug_ignored_stype(ext->sType);
>   break;
> diff --git a/src/intel/vulkan/anv_private.h
> b/src/intel/vulkan/anv_private.h
> index eed282ff985..5905299e59d 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -195,6 +195,9 @@ struct gen_l3_config;
>
>  #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
>
> +#define SAMPLE_LOC_GRID_W 1
> +#define SAMPLE_LOC_GRID_H 1
> +
>  static inline uint32_t
>  align_down_npot_u32(uint32_t v, uint32_t a)
>  {
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering

2019-03-13 Thread Eero Tamminen

Hi,

On 12.3.2019 10.59, Marc-André Lureau wrote:

On Fri, Mar 1, 2019 at 12:13 PM Mathias Fröhlich
 wrote:

On Friday, 1 March 2019 12:15:08 CET Eero Tamminen wrote:

On 1.3.2019 11.12, Michel Dänzer wrote:

On 2019-02-28 8:41 p.m., Marek Olšák wrote:

On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen 

Why distro versions of Qemu filter sched_setaffinity() syscall?


(https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889)

Daniel Berrange (berrange) wrote on 2019-02-27: #19

"IMHO that mesa change is not valid. It is settings its affinity to
run on all threads which is definitely *NOT* something we want to be
allowed. Management applications want to control which CPUs QEMU runs
on, and as such Mesa should honour the CPU placement that the QEMU
process has.

This is a great example of why QEMU wants to use seccomp to block
affinity changes to prevent something silently trying to use more CPUs
than are assigned to this QEMU."



Mesa uses thread affinity to optimize memory access performance on some
CPUs (see util_pin_thread_to_L3). Other places in Mesa need to restore the
original thread affinity for some child threads. Additionally, if games
limit the thread affinity, Mesa needs to restore the full thread affinity
for some of its child threads.


The last part sounds like Mesa clearly overstepping its authority.



In essence, the thread affinity should only be considered a hint for the
kernel for optimal performance. There is no reason to kill the process if
it's disallowed. Just ignore the call or modify the thread mask to make it
legal.


The fundamental issue here is that Mesa is using the thread affinity API
for something else than it's intended for. If there was an API for what
Mesa wants (encouraging certain sets of threads to run on topologically
close cores), there should be no need to block that.


Why such process needs to be killed instead the request being masked
suitably, is there some program that breaks subtly if affinity request
is masked (and that being worse than the program being killed)?


But that is still a situation that could be nicely handled with a
EPERM error return. Way better than just kill a process.
That 'badly affected' program still can call abort then.
But nicely working programs don't get just killed then!!



Returning an error seems less secure that prohibiting it completely.
And it may lead to subtle bugs in rarely tested code paths.

It's legitimate that QEMU and management layers want to prevent
arbitrary code from changing resource allocation etc.


They can do that by no-oping the system call, or masking the parts they 
don't want to be modified.  As that affects only (potentially) 
performance, not functionality, it seems to me better than outright 
killing a process.


(As with killing, there should probably be some way to log things that 
were ignored/masked.)




There are no easy way I can think of for mesa (and other libraries) to
probe the seccomp filters and associated action.

So we need a way to tell mesa not to call setaffinity() (and other
syscalls). MESA_NO_THREAD_AFFINITY or MESA_NO_SYSCALLS=setaffinity,...
seem like a relatively easy way to go.



- Eero

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 3/9] anv: Implemented the vkGetPhysicalDeviceMultisamplePropertiesEXT

2019-03-13 Thread Jason Ekstrand
This doesn't need to be in it's open file. Just put it in anv_device.c 
after all the other physical device queries.


On March 13, 2019 06:01:33 Eleni Maria Stea  wrote:


Implemented the vkGetPhysicalDeviceMultisamplePropertiesEXT according to
the Vulkan Specification section [36.2. Additional Multisampling
Capabilities].
---
src/intel/Makefile.sources  |  1 +
src/intel/vulkan/anv_sample_locations.c | 60 +
src/intel/vulkan/meson.build|  1 +
3 files changed, 62 insertions(+)
create mode 100644 src/intel/vulkan/anv_sample_locations.c


diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index a5c8828a6b6..a0873c7ccc2 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -251,6 +251,7 @@ VULKAN_FILES := \
 vulkan/anv_pipeline_cache.c \
 vulkan/anv_private.h \
 vulkan/anv_queue.c \
+ vulkan/anv_sample_locations.c \
 vulkan/anv_util.c \
 vulkan/anv_wsi.c \
 vulkan/vk_format_info.h
diff --git a/src/intel/vulkan/anv_sample_locations.c 
b/src/intel/vulkan/anv_sample_locations.c

new file mode 100644
index 000..1ebf280e05b
--- /dev/null
+++ b/src/intel/vulkan/anv_sample_locations.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
DEALINGS

+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+void
+anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
+  VkSampleCountFlagBits samples,
+  VkMultisamplePropertiesEXT
+  *pMultisampleProperties)
+{
+   ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+   const struct gen_device_info *devinfo = _device->info;
+
+   VkExtent2D grid_size;
+   switch (samples) {
+   case VK_SAMPLE_COUNT_2_BIT:
+   case VK_SAMPLE_COUNT_4_BIT:
+   case VK_SAMPLE_COUNT_8_BIT:
+  grid_size.width = SAMPLE_LOC_GRID_W;
+  grid_size.height = SAMPLE_LOC_GRID_H;
+  break;
+
+   case VK_SAMPLE_COUNT_16_BIT:
+  if (devinfo->gen >= 9) {
+ grid_size.width = SAMPLE_LOC_GRID_W;
+ grid_size.height = SAMPLE_LOC_GRID_H;
+ break;
+  }


You could also just do

if (samples & isl_get_sample_counts(>isl_dev)) {
  grid_size.width = 1;
  grid_size.height = 1;
} else {
  grid_size.width = 0;
  grid_size.height = 0;
}


+   default:
+  grid_size.width = grid_size.height = 0;
+  break;
+   };
+
+   *pMultisampleProperties = (VkMultisamplePropertiesEXT) {
+  .sType = VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT,
+  .pNext = NULL,
+  .maxSampleLocationGridSize = grid_size



You can't just assign to the whole struct like this because it will mess up 
the pNext chain.  Speaking of the pNext chain, we should add a pNext 
walking loop at the end which calls ignored_stype on everything.




+   };
+}
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index 7fa43a6ad79..3f78757c774 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -135,6 +135,7 @@ libanv_files = files(
  'anv_pipeline_cache.c',
  'anv_private.h',
  'anv_queue.c',
+  'anv_sample_locations.c',
  'anv_util.c',
  'anv_wsi.c',
  'vk_format_info.h',
--
2.20.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 8/9] anv: Removed unused header file

2019-03-13 Thread Jason Ekstrand

Rb

On March 13, 2019 06:01:53 Eleni Maria Stea  wrote:


In src/intel/vulkan/genX_blorp_exec.c we included the file:
common/gen_sample_positions.h but not use it. Removed.

Reviewed-by: Sagar Ghuge 
---
src/intel/vulkan/genX_blorp_exec.c | 1 -
1 file changed, 1 deletion(-)

diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c

index e9c85d56d5f..0eeefaaa9d6 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -31,7 +31,6 @@
#undef __gen_combine_address

#include "common/gen_l3_config.h"
-#include "common/gen_sample_positions.h"
#include "blorp/blorp_genX_exec.h"

static void *
--
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: Stop using VK_TRUE/FALSE

2019-03-13 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 12/03/2019 20:24, Jason Ekstrand wrote:

We've been fairly inconsistent about this so we should really choose
whether we're going to use VK_TRUE/FALSE or the C boolean values.  The
Vulkan #defines are set to 1 and 0 respectively so it's the same value
as C gives you when you cast a boolean expression to an integer.  Since
there are several places where we set a VkBool32 to a C logical
expression, let's just embrace C booleans and stop using the VK defines.
---
  src/intel/vulkan/anv_device.c | 42 +--
  1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 729cceb3e32..83fa3936c19 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -833,7 +833,7 @@ VkResult anv_EnumeratePhysicalDeviceGroups(
memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
p->physicalDevices[0] =
   anv_physical_device_to_handle(>physicalDevice);
-  p->subsetAllocation = VK_FALSE;
+  p->subsetAllocation = false;
  
vk_foreach_struct(ext, p->pNext)

   anv_debug_ignored_stype(ext->sType);
@@ -967,7 +967,7 @@ void anv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
   VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
  (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
- features->depthClipEnable = VK_TRUE;
+ features->depthClipEnable = true;
   break;
}
  
@@ -990,7 +990,7 @@ void anv_GetPhysicalDeviceFeatures2(
  
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {

   VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
- features->protectedMemory = VK_FALSE;
+ features->protectedMemory = false;
   break;
}
  
@@ -1024,23 +1024,23 @@ void anv_GetPhysicalDeviceFeatures2(

case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: 
{
   VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
  (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
- features->transformFeedback = VK_TRUE;
- features->geometryStreams = VK_TRUE;
+ features->transformFeedback = true;
+ features->geometryStreams = true;
   break;
}
  
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {

   VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
  (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
- features->vertexAttributeInstanceRateDivisor = VK_TRUE;
- features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
+ features->vertexAttributeInstanceRateDivisor = true;
+ features->vertexAttributeInstanceRateZeroDivisor = true;
   break;
}
  
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {

   VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
  (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
- features->ycbcrImageArrays = VK_TRUE;
+ features->ycbcrImageArrays = true;
   break;
}
  
@@ -1234,8 +1234,8 @@ void anv_GetPhysicalDeviceProperties2(

 VK_RESOLVE_MODE_MAX_BIT_KHR;
   }
  
- props->independentResolveNone = VK_TRUE;

- props->independentResolve = VK_TRUE;
+ props->independentResolveNone = true;
+ props->independentResolve = true;
   break;
}
  
@@ -1372,7 +1372,7 @@ void anv_GetPhysicalDeviceProperties2(

 
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
 VK_SUBGROUP_FEATURE_QUAD_BIT;
- properties->quadOperationsInAllStages = VK_TRUE;
+ properties->quadOperationsInAllStages = true;
   break;
}
  
@@ -1386,10 +1386,10 @@ void anv_GetPhysicalDeviceProperties2(

   props->maxTransformFeedbackStreamDataSize = 128 * 4;
   props->maxTransformFeedbackBufferDataSize = 128 * 4;
   props->maxTransformFeedbackBufferDataStride = 2048;
- props->transformFeedbackQueries = VK_TRUE;
- props->transformFeedbackStreamsLinesTriangles = VK_FALSE;
- props->transformFeedbackRasterizationStreamSelect = VK_FALSE;
- props->transformFeedbackDraw = VK_TRUE;
+ props->transformFeedbackQueries = true;
+ props->transformFeedbackStreamsLinesTriangles = false;
+ props->transformFeedbackRasterizationStreamSelect = false;
+ props->transformFeedbackDraw = true;
   break;
}
  
@@ -2961,8 +2961,8 @@ void anv_GetBufferMemoryRequirements2(

switch (ext->sType) {
case 

[Mesa-dev] [PATCH v3 9/9] anv: Enabled the VK_EXT_sample_locations extension

2019-03-13 Thread Eleni Maria Stea
Enabled the VK_EXT_sample_locations for Intel Gen >= 7.

v2: Replaced device.info->gen >= 7 with True, as Anv doesn't support
anything below Gen7. (Lionel Landwerlin)

Reviewed-by: Sagar Ghuge 
---
 src/intel/vulkan/anv_extensions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 9e4e03e46df..5a30c733c5c 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -129,7 +129,7 @@ EXTENSIONS = [
 Extension('VK_EXT_inline_uniform_block',  1, True),
 Extension('VK_EXT_pci_bus_info',  2, True),
 Extension('VK_EXT_post_depth_coverage',   1, 'device->info.gen 
>= 9'),
-Extension('VK_EXT_sample_locations',  1, False),
+Extension('VK_EXT_sample_locations',  1, True),
 Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen 
>= 9'),
 Extension('VK_EXT_scalar_block_layout',   1, True),
 Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 8/9] anv: Removed unused header file

2019-03-13 Thread Eleni Maria Stea
In src/intel/vulkan/genX_blorp_exec.c we included the file:
common/gen_sample_positions.h but not use it. Removed.

Reviewed-by: Sagar Ghuge 
---
 src/intel/vulkan/genX_blorp_exec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c
index e9c85d56d5f..0eeefaaa9d6 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -31,7 +31,6 @@
 #undef __gen_combine_address
 
 #include "common/gen_l3_config.h"
-#include "common/gen_sample_positions.h"
 #include "blorp/blorp_genX_exec.h"
 
 static void *
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 7/9] anv: Optimized the emission of the default locations on Gen8+

2019-03-13 Thread Eleni Maria Stea
We only emit sample locations when the extension is enabled by the user.
In all other cases the default locations are emitted once when the device
is initialized to increase performance.
---
 src/intel/vulkan/anv_genX.h|  3 ++-
 src/intel/vulkan/genX_cmd_buffer.c |  2 +-
 src/intel/vulkan/genX_pipeline.c   | 11 +++
 src/intel/vulkan/genX_state.c  |  8 +---
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index e82d83465ef..7f33a2b0a68 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -93,4 +93,5 @@ void genX(emit_ms_state)(struct anv_batch *batch,
  struct anv_sample *anv_samples,
  uint32_t num_samples,
  uint32_t log2_samples,
- bool custom_sample_locations);
+ bool custom_sample_locations,
+ bool sample_locations_ext_enabled);
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 4752c66f350..ae7c5a80a3c 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2654,7 +2654,7 @@ cmd_buffer_emit_sample_locations(struct anv_cmd_buffer 
*cmd_buffer)
anv_samples = cmd_buffer->state.gfx.dynamic.sample_locations.anv_samples;
 
genX(emit_ms_state)(_buffer->batch, anv_samples, samples,
-   log2_samples, true);
+   log2_samples, true, true);
 }
 
 void
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 8afc08f0320..12adfa65da8 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -573,10 +573,12 @@ emit_sample_mask(struct anv_pipeline *pipeline,
 }
 
 static void
-emit_ms_state(struct anv_pipeline *pipeline,
+emit_ms_state(struct anv_device *device,
+  struct anv_pipeline *pipeline,
   const VkPipelineMultisampleStateCreateInfo *info,
   const VkPipelineDynamicStateCreateInfo *dinfo)
 {
+   bool sample_loc_enabled = device->enabled_extensions.EXT_sample_locations;
struct anv_sample anv_samples[MAX_SAMPLE_LOCATIONS];
VkSampleLocationsInfoEXT *sl;
bool custom_locations = false;
@@ -588,7 +590,7 @@ emit_ms_state(struct anv_pipeline *pipeline,
if (info) {
   samples = info->rasterizationSamples;
 
-  if (info->pNext) {
+  if (sample_loc_enabled && info->pNext) {
  VkPipelineSampleLocationsStateCreateInfoEXT *slinfo =
 (VkPipelineSampleLocationsStateCreateInfoEXT *)info->pNext;
 
@@ -617,7 +619,7 @@ emit_ms_state(struct anv_pipeline *pipeline,
}
 
genX(emit_ms_state)(>batch, anv_samples, samples, log2_samples,
-   custom_locations);
+   custom_locations, sample_loc_enabled);
 }
 
 static const uint32_t vk_to_gen_logic_op[] = {
@@ -1947,7 +1949,8 @@ genX(graphics_pipeline_create)(
assert(pCreateInfo->pRasterizationState);
emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
  pCreateInfo->pMultisampleState, pass, subpass);
-   emit_ms_state(pipeline, pCreateInfo->pMultisampleState, 
pCreateInfo->pDynamicState);
+   emit_ms_state(device, pipeline, pCreateInfo->pMultisampleState,
+ pCreateInfo->pDynamicState);
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 804cfab3a56..bc6b5870d8d 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -552,12 +552,14 @@ genX(emit_ms_state)(struct anv_batch *batch,
   struct anv_sample *anv_samples,
   uint32_t num_samples,
   uint32_t log2_samples,
-  bool custom_sample_locations)
+  bool custom_sample_locations,
+  bool sample_locations_ext_enabled)
 {
emit_multisample(batch, anv_samples, num_samples, log2_samples,
 custom_sample_locations);
 #if GEN_GEN >= 8
-   emit_sample_locations(batch, anv_samples, num_samples,
- custom_sample_locations);
+   if (sample_locations_ext_enabled)
+  emit_sample_locations(batch, anv_samples, num_samples,
+custom_sample_locations);
 #endif
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 3/9] anv: Implemented the vkGetPhysicalDeviceMultisamplePropertiesEXT

2019-03-13 Thread Eleni Maria Stea
Implemented the vkGetPhysicalDeviceMultisamplePropertiesEXT according to
the Vulkan Specification section [36.2. Additional Multisampling
Capabilities].
---
 src/intel/Makefile.sources  |  1 +
 src/intel/vulkan/anv_sample_locations.c | 60 +
 src/intel/vulkan/meson.build|  1 +
 3 files changed, 62 insertions(+)
 create mode 100644 src/intel/vulkan/anv_sample_locations.c

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index a5c8828a6b6..a0873c7ccc2 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -251,6 +251,7 @@ VULKAN_FILES := \
vulkan/anv_pipeline_cache.c \
vulkan/anv_private.h \
vulkan/anv_queue.c \
+   vulkan/anv_sample_locations.c \
vulkan/anv_util.c \
vulkan/anv_wsi.c \
vulkan/vk_format_info.h
diff --git a/src/intel/vulkan/anv_sample_locations.c 
b/src/intel/vulkan/anv_sample_locations.c
new file mode 100644
index 000..1ebf280e05b
--- /dev/null
+++ b/src/intel/vulkan/anv_sample_locations.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+void
+anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
+  VkSampleCountFlagBits samples,
+  VkMultisamplePropertiesEXT
+  *pMultisampleProperties)
+{
+   ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+   const struct gen_device_info *devinfo = _device->info;
+
+   VkExtent2D grid_size;
+   switch (samples) {
+   case VK_SAMPLE_COUNT_2_BIT:
+   case VK_SAMPLE_COUNT_4_BIT:
+   case VK_SAMPLE_COUNT_8_BIT:
+  grid_size.width = SAMPLE_LOC_GRID_W;
+  grid_size.height = SAMPLE_LOC_GRID_H;
+  break;
+
+   case VK_SAMPLE_COUNT_16_BIT:
+  if (devinfo->gen >= 9) {
+ grid_size.width = SAMPLE_LOC_GRID_W;
+ grid_size.height = SAMPLE_LOC_GRID_H;
+ break;
+  }
+   default:
+  grid_size.width = grid_size.height = 0;
+  break;
+   };
+
+   *pMultisampleProperties = (VkMultisamplePropertiesEXT) {
+  .sType = VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT,
+  .pNext = NULL,
+  .maxSampleLocationGridSize = grid_size
+   };
+}
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index 7fa43a6ad79..3f78757c774 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -135,6 +135,7 @@ libanv_files = files(
   'anv_pipeline_cache.c',
   'anv_private.h',
   'anv_queue.c',
+  'anv_sample_locations.c',
   'anv_util.c',
   'anv_wsi.c',
   'vk_format_info.h',
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 1/9] anv: Added the VK_EXT_sample_locations extension to the anv_extensions list

2019-03-13 Thread Eleni Maria Stea
Added the VK_EXT_sample_locations to the anv_extensions.py list to
generate the related entrypoints.

Reviewed-by: Sagar Ghuge 
---
 src/intel/vulkan/anv_extensions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index 6fff293dee4..9e4e03e46df 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -129,6 +129,7 @@ EXTENSIONS = [
 Extension('VK_EXT_inline_uniform_block',  1, True),
 Extension('VK_EXT_pci_bus_info',  2, True),
 Extension('VK_EXT_post_depth_coverage',   1, 'device->info.gen 
>= 9'),
+Extension('VK_EXT_sample_locations',  1, False),
 Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen 
>= 9'),
 Extension('VK_EXT_scalar_block_layout',   1, True),
 Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen 
>= 9'),
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 6/9] anv: Added support for dynamic and non-dynamic sample locations on Gen7

2019-03-13 Thread Eleni Maria Stea
Allowing setting dynamic and non-dynamic sample locations on Gen7.
---
 src/intel/vulkan/anv_genX.h| 13 ++---
 src/intel/vulkan/genX_cmd_buffer.c |  9 ++--
 src/intel/vulkan/genX_pipeline.c   | 13 +
 src/intel/vulkan/genX_state.c  | 86 +-
 4 files changed, 70 insertions(+), 51 deletions(-)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index f84fe457152..e82d83465ef 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -89,11 +89,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer 
*cmd_buffer,
 void genX(blorp_exec)(struct blorp_batch *batch,
   const struct blorp_params *params);
 
-void genX(emit_multisample)(struct anv_batch *batch,
-uint32_t samples,
-uint32_t log2_samples);
-
-void genX(emit_sample_locations)(struct anv_batch *batch,
- const struct anv_sample *anv_samples,
- uint32_t num_samples,
- bool custom_locations);
+void genX(emit_ms_state)(struct anv_batch *batch,
+ struct anv_sample *anv_samples,
+ uint32_t num_samples,
+ uint32_t log2_samples,
+ bool custom_sample_locations);
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 9229df84caa..4752c66f350 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2643,8 +2643,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
 static void
 cmd_buffer_emit_sample_locations(struct anv_cmd_buffer *cmd_buffer)
 {
-#if GEN_GEN >= 8
-   const struct anv_sample *anv_samples;
+   struct anv_sample *anv_samples;
uint32_t log2_samples;
uint32_t samples;
 
@@ -2654,10 +2653,8 @@ cmd_buffer_emit_sample_locations(struct anv_cmd_buffer 
*cmd_buffer)
log2_samples = __builtin_ffs(samples) - 1;
anv_samples = cmd_buffer->state.gfx.dynamic.sample_locations.anv_samples;
 
-   genX(emit_multisample)(_buffer->batch, samples, log2_samples);
-   genX(emit_sample_locations)(_buffer->batch, anv_samples, samples,
-  true);
-#endif
+   genX(emit_ms_state)(_buffer->batch, anv_samples, samples,
+   log2_samples, true);
 }
 
 void
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index fa42e622077..8afc08f0320 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -577,12 +577,9 @@ emit_ms_state(struct anv_pipeline *pipeline,
   const VkPipelineMultisampleStateCreateInfo *info,
   const VkPipelineDynamicStateCreateInfo *dinfo)
 {
-#if GEN_GEN >= 8
struct anv_sample anv_samples[MAX_SAMPLE_LOCATIONS];
VkSampleLocationsInfoEXT *sl;
bool custom_locations = false;
-#endif
-
uint32_t samples = 1;
uint32_t log2_samples = 0;
 
@@ -591,7 +588,6 @@ emit_ms_state(struct anv_pipeline *pipeline,
if (info) {
   samples = info->rasterizationSamples;
 
-#if GEN_GEN >= 8
   if (info->pNext) {
  VkPipelineSampleLocationsStateCreateInfoEXT *slinfo =
 (VkPipelineSampleLocationsStateCreateInfoEXT *)info->pNext;
@@ -616,17 +612,12 @@ emit_ms_state(struct anv_pipeline *pipeline,
 }
  }
   }
-#endif
 
   log2_samples = __builtin_ffs(samples) - 1;
}
 
-   genX(emit_multisample(>batch, samples, log2_samples));
-
-#if GEN_GEN >= 8
-   genX(emit_sample_locations)(>batch, anv_samples, samples,
-   custom_locations);
-#endif
+   genX(emit_ms_state)(>batch, anv_samples, samples, log2_samples,
+   custom_locations);
 }
 
 static const uint32_t vk_to_gen_logic_op[] = {
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 44cfc925ed5..804cfab3a56 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -437,10 +437,12 @@ VkResult genX(CreateSampler)(
return VK_SUCCESS;
 }
 
-void
-genX(emit_multisample)(struct anv_batch *batch,
-   uint32_t samples,
-   uint32_t log2_samples)
+static void
+emit_multisample(struct anv_batch *batch,
+ const struct anv_sample *anv_samples,
+ uint32_t samples,
+ uint32_t log2_samples,
+ bool custom_locations)
 {
anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
   ms.NumberofMultisamples = log2_samples;
@@ -453,34 +455,52 @@ genX(emit_multisample)(struct anv_batch *batch,
*/
   ms.PixelPositionOffsetEnable  = false;
 #else
-  switch (samples) {
-  case 1:
- GEN_SAMPLE_POS_1X(ms.Sample);
- break;
-  case 2:
- GEN_SAMPLE_POS_2X(ms.Sample);
- break;
-  case 4:
- GEN_SAMPLE_POS_4X(ms.Sample);
- break;
- 

[Mesa-dev] [PATCH v3 5/9] anv: Added support for dynamic sample locations on Gen8+

2019-03-13 Thread Eleni Maria Stea
Added support for setting the locations when the pipeline has been
created with the dynamic state bit enabled according to the Vulkan
Specification section [26.5. Custom Sample Locations] for the function:

'vkCmdSetSampleLocationsEXT'

The reason that we preferred to store the boolean valid inside the
dynamic state struct for locations instead of using a dirty bit
(ANV_CMD_DIRTY_SAMPLE_LOCATIONS for example) is that other functions
can modify the value of the dirty bits causing unexpected behavior.
---
 src/intel/vulkan/anv_cmd_buffer.c  | 19 
 src/intel/vulkan/anv_genX.h|  6 +++-
 src/intel/vulkan/anv_private.h |  6 
 src/intel/vulkan/genX_cmd_buffer.c | 27 ++
 src/intel/vulkan/genX_pipeline.c   | 46 --
 src/intel/vulkan/genX_state.c  | 41 +++---
 6 files changed, 99 insertions(+), 46 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 1b34644a434..101c1375430 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -28,6 +28,7 @@
 #include 
 
 #include "anv_private.h"
+#include "anv_sample_locations.h"
 
 #include "vk_format_info.h"
 #include "vk_util.h"
@@ -558,6 +559,24 @@ void anv_CmdSetStencilReference(
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
 }
 
+void
+anv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
+ const VkSampleLocationsInfoEXT 
*pSampleLocationsInfo)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   assert(pSampleLocationsInfo);
+
+   struct anv_dynamic_state *dyn_state = _buffer->state.gfx.dynamic;
+   dyn_state->sample_locations.num_samples =
+  pSampleLocationsInfo->sampleLocationsPerPixel;
+
+   anv_calc_sample_locations(dyn_state->sample_locations.anv_samples,
+ dyn_state->sample_locations.num_samples,
+ pSampleLocationsInfo);
+
+   cmd_buffer->state.gfx.dynamic.sample_locations.valid = true;
+}
+
 static void
 anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 52415c04a45..f84fe457152 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -89,7 +89,11 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer 
*cmd_buffer,
 void genX(blorp_exec)(struct blorp_batch *batch,
   const struct blorp_params *params);
 
+void genX(emit_multisample)(struct anv_batch *batch,
+uint32_t samples,
+uint32_t log2_samples);
+
 void genX(emit_sample_locations)(struct anv_batch *batch,
+ const struct anv_sample *anv_samples,
  uint32_t num_samples,
- const VkSampleLocationsInfoEXT *sl,
  bool custom_locations);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 981956e5706..a2e1756cd99 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2135,6 +2135,12 @@ struct anv_dynamic_state {
   uint32_t  front;
   uint32_t  back;
} stencil_reference;
+
+   struct {
+  struct anv_sample 
anv_samples[MAX_SAMPLE_LOCATIONS];
+  uint32_t  num_samples;
+  bool  valid;
+   } sample_locations;
 };
 
 extern const struct anv_dynamic_state default_dynamic_state;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 7687507e6b7..9229df84caa 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -25,11 +25,13 @@
 #include 
 
 #include "anv_private.h"
+#include "anv_sample_locations.h"
 #include "vk_format_info.h"
 #include "vk_util.h"
 #include "util/fast_idiv_by_const.h"
 
 #include "common/gen_l3_config.h"
+#include "common/gen_sample_positions.h"
 #include "genxml/gen_macros.h"
 #include "genxml/genX_pack.h"
 
@@ -2638,6 +2640,26 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
cmd_buffer->state.push_constants_dirty &= ~flushed;
 }
 
+static void
+cmd_buffer_emit_sample_locations(struct anv_cmd_buffer *cmd_buffer)
+{
+#if GEN_GEN >= 8
+   const struct anv_sample *anv_samples;
+   uint32_t log2_samples;
+   uint32_t samples;
+
+   samples = cmd_buffer->state.gfx.dynamic.sample_locations.num_samples;
+   assert(samples > 0);
+
+   log2_samples = __builtin_ffs(samples) - 1;
+   anv_samples = cmd_buffer->state.gfx.dynamic.sample_locations.anv_samples;
+
+   genX(emit_multisample)(_buffer->batch, samples, log2_samples);
+   genX(emit_sample_locations)(_buffer->batch, 

[Mesa-dev] [PATCH v3 2/9] anv: Set the values for the VkPhysicalDeviceSampleLocationsPropertiesEXT

2019-03-13 Thread Eleni Maria Stea
The VkPhysicalDeviceSampleLocationPropertiesEXT struct is filled with
implementation dependent values and according to the table from the
Vulkan Specification section [36.1. Limit Requirements]:

pname | max | min
pname:sampleLocationSampleCounts   |-|ename:VK_SAMPLE_COUNT_4_BIT
pname:maxSampleLocationGridSize|-|(1, 1)
pname:sampleLocationCoordinateRange|(0.0, 0.9375)|(0.0, 0.9375)
pname:sampleLocationSubPixelBits   |-|4
pname:variableSampleLocations  | false   |implementation dependent

The hardware only supports setting the same sample location for all the
pixels, so we only support 1x1 grids.

Also, variableSampleLocations is set to false because we don't support the
feature.

v2: 1- Replaced false with VK_FALSE for consistency. (Sagar Ghuge)
2- Used the isl_device_sample_count to take the number of samples
per platform to avoid extra checks. (Sagar Ghuge)

Reviewed-by: Sagar Ghuge 
---
 src/intel/vulkan/anv_device.c  | 19 +++
 src/intel/vulkan/anv_private.h |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 729cceb3e32..bf6f03ebb1a 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1401,6 +1401,25 @@ void anv_GetPhysicalDeviceProperties2(
  break;
   }
 
+  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+ VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
+(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+
+ props->sampleLocationSampleCounts =
+isl_device_get_sample_counts(>isl_dev);
+
+ props->maxSampleLocationGridSize.width = SAMPLE_LOC_GRID_W;
+ props->maxSampleLocationGridSize.height = SAMPLE_LOC_GRID_H;
+
+ props->sampleLocationCoordinateRange[0] = 0;
+ props->sampleLocationCoordinateRange[1] = 0.9375;
+ props->sampleLocationSubPixelBits = 4;
+
+ props->variableSampleLocations = VK_FALSE;
+
+ break;
+  }
+
   default:
  anv_debug_ignored_stype(ext->sType);
  break;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index eed282ff985..5905299e59d 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -195,6 +195,9 @@ struct gen_l3_config;
 
 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
 
+#define SAMPLE_LOC_GRID_W 1
+#define SAMPLE_LOC_GRID_H 1
+
 static inline uint32_t
 align_down_npot_u32(uint32_t v, uint32_t a)
 {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 4/9] anv: Added support for non-dynamic sample locations on Gen8+

2019-03-13 Thread Eleni Maria Stea
Allowing the user to set custom sample locations non-dynamically, by
filling the extension structs and chaining them to the pipeline structs
according to the Vulkan specification section [26.5. Custom Sample Locations]
for the following structures:

'VkPipelineSampleLocationsStateCreateInfoEXT'
'VkSampleLocationsInfoEXT'
'VkSampleLocationEXT'

Once custom locations are used, the default locations are lost and need to be
re-emitted again in the next pipeline creation. For that, we emit the
3DSTATE_SAMPLE_PATTERN at every pipeline creation.
---
 src/intel/common/gen_sample_positions.h | 53 
 src/intel/vulkan/anv_genX.h |  5 ++
 src/intel/vulkan/anv_private.h  |  9 +++
 src/intel/vulkan/anv_sample_locations.c | 38 +++-
 src/intel/vulkan/anv_sample_locations.h | 29 +
 src/intel/vulkan/genX_pipeline.c| 80 +
 src/intel/vulkan/genX_state.c   | 59 ++
 7 files changed, 259 insertions(+), 14 deletions(-)
 create mode 100644 src/intel/vulkan/anv_sample_locations.h

diff --git a/src/intel/common/gen_sample_positions.h 
b/src/intel/common/gen_sample_positions.h
index da48dcb5ed0..e8af2a552dc 100644
--- a/src/intel/common/gen_sample_positions.h
+++ b/src/intel/common/gen_sample_positions.h
@@ -160,4 +160,57 @@ prefix##14YOffset  = 0.9375; \
 prefix##15XOffset  = 0.0625; \
 prefix##15YOffset  = 0.;
 
+/* Examples:
+ * in case of GEN_GEN < 8:
+ * SET_SAMPLE_POS(ms.Sample, 0); expands to:
+ *ms.Sample0XOffset = anv_samples[0].offs_x;
+ *ms.Sample0YOffset = anv_samples[0].offs_y;
+ *
+ * in case of GEN_GEN >= 8:
+ * SET_SAMPLE_POS(sp._16xSample, 0); expands to:
+ *sp._16xSample0XOffset = anv_samples[0].offs_x;
+ *sp._16xSample0YOffset = anv_samples[0].offs_y;
+ */
+#define SET_SAMPLE_POS(prefix, sample_idx) \
+prefix##sample_idx##XOffset = anv_samples[sample_idx].offs_x; \
+prefix##sample_idx##YOffset = anv_samples[sample_idx].offs_y;
+
+#define SET_SAMPLE_POS_2X(prefix) \
+SET_SAMPLE_POS(prefix, 0); \
+SET_SAMPLE_POS(prefix, 1);
+
+#define SET_SAMPLE_POS_4X(prefix) \
+SET_SAMPLE_POS(prefix, 0); \
+SET_SAMPLE_POS(prefix, 1); \
+SET_SAMPLE_POS(prefix, 2); \
+SET_SAMPLE_POS(prefix, 3);
+
+#define SET_SAMPLE_POS_8X(prefix) \
+SET_SAMPLE_POS(prefix, 0); \
+SET_SAMPLE_POS(prefix, 1); \
+SET_SAMPLE_POS(prefix, 2); \
+SET_SAMPLE_POS(prefix, 3); \
+SET_SAMPLE_POS(prefix, 4); \
+SET_SAMPLE_POS(prefix, 5); \
+SET_SAMPLE_POS(prefix, 6); \
+SET_SAMPLE_POS(prefix, 7);
+
+#define SET_SAMPLE_POS_16X(prefix) \
+SET_SAMPLE_POS(prefix, 0); \
+SET_SAMPLE_POS(prefix, 1); \
+SET_SAMPLE_POS(prefix, 2); \
+SET_SAMPLE_POS(prefix, 3); \
+SET_SAMPLE_POS(prefix, 4); \
+SET_SAMPLE_POS(prefix, 5); \
+SET_SAMPLE_POS(prefix, 6); \
+SET_SAMPLE_POS(prefix, 7); \
+SET_SAMPLE_POS(prefix, 8); \
+SET_SAMPLE_POS(prefix, 9); \
+SET_SAMPLE_POS(prefix, 10); \
+SET_SAMPLE_POS(prefix, 11); \
+SET_SAMPLE_POS(prefix, 12); \
+SET_SAMPLE_POS(prefix, 13); \
+SET_SAMPLE_POS(prefix, 14); \
+SET_SAMPLE_POS(prefix, 15);
+
 #endif /* GEN_SAMPLE_POSITIONS_H */
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 8fd32cabf1e..52415c04a45 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -88,3 +88,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer 
*cmd_buffer,
 
 void genX(blorp_exec)(struct blorp_batch *batch,
   const struct blorp_params *params);
+
+void genX(emit_sample_locations)(struct anv_batch *batch,
+ uint32_t num_samples,
+ const VkSampleLocationsInfoEXT *sl,
+ bool custom_locations);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5905299e59d..981956e5706 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -71,6 +71,7 @@ struct anv_buffer;
 struct anv_buffer_view;
 struct anv_image_view;
 struct anv_instance;
+struct anv_sample;
 
 struct gen_l3_config;
 
@@ -165,6 +166,7 @@ struct gen_l3_config;
 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
+#define MAX_SAMPLE_LOCATIONS 16
 
 /* The kernel relocation API has a limitation of a 32-bit delta value
  * applied to the address before it is written which, in spite of it being
@@ -2086,6 +2088,13 @@ struct anv_push_constants {
struct brw_image_param images[MAX_GEN8_IMAGES];
 };
 
+struct
+anv_sample {
+   float offs_x;
+   float offs_y;
+   float radius;
+};
+
 struct anv_dynamic_state {
struct {
   uint32_t  count;
diff --git a/src/intel/vulkan/anv_sample_locations.c 
b/src/intel/vulkan/anv_sample_locations.c
index 1ebf280e05b..c660cb5ae84 100644
--- a/src/intel/vulkan/anv_sample_locations.c
+++ b/src/intel/vulkan/anv_sample_locations.c
@@ -21,7 +21,7 @@
  * IN THE SOFTWARE.
  */
 
-#include "anv_private.h"

[Mesa-dev] [PATCH v3 0/9] Implementation of the VK_EXT_sample_locations

2019-03-13 Thread Eleni Maria Stea
Implemented the requirements from the VK_EXT_sample_locations extension
specification to allow setting custom sample locations on Intel Gen >= 7.

Some decisions explained:

The grid size was set to 1x1 because the hardware only supports a single
set of sample locations for the whole framebuffer.

The user can only set custom sample locations per pipeline by filling
the extension provided structs or dynamically the way it is described
in the sections 26.5, 36.1, 36.2 of the Vulkan specification.

Sections 6.7.3 and 7.4 describe how to use sample locations with images
when a layout transition is about to take place. These sections were
ignored as currently we aren't using sample locations with images in the
driver.

Variable sample locations aren't required and have not been implemented.

We have 754 vk-gl-cts tests for this extension:
The 690 pass on Gen >= 9 (where we can support 16 samples).
The remaining 64 tests aren't supported because they test the variable
sample locations.

Eleni Maria Stea (9):
  anv: Added the VK_EXT_sample_locations extension to the anv_extensions
list
  anv: Set the values for the
VkPhysicalDeviceSampleLocationsPropertiesEXT
  anv: Implemented the vkGetPhysicalDeviceMultisamplePropertiesEXT
  anv: Added support for non-dynamic sample locations on Gen8+
  anv: Added support for dynamic sample locations on Gen8+
  anv: Added support for dynamic and non-dynamic sample locations on
Gen7
  anv: Optimized the emission of the default locations on Gen8+
  anv: Removed unused header file
  anv: Enabled the VK_EXT_sample_locations extension

 src/intel/Makefile.sources  |   1 +
 src/intel/common/gen_sample_positions.h |  53 ++
 src/intel/vulkan/anv_cmd_buffer.c   |  19 
 src/intel/vulkan/anv_device.c   |  21 
 src/intel/vulkan/anv_extensions.py  |   1 +
 src/intel/vulkan/anv_genX.h |   7 ++
 src/intel/vulkan/anv_private.h  |  18 
 src/intel/vulkan/anv_sample_locations.c |  96 ++
 src/intel/vulkan/anv_sample_locations.h |  29 ++
 src/intel/vulkan/genX_blorp_exec.c  |   1 -
 src/intel/vulkan/genX_cmd_buffer.c  |  24 +
 src/intel/vulkan/genX_pipeline.c|  92 +
 src/intel/vulkan/genX_state.c   | 128 
 src/intel/vulkan/meson.build|   1 +
 14 files changed, 450 insertions(+), 41 deletions(-)
 create mode 100644 src/intel/vulkan/anv_sample_locations.c
 create mode 100644 src/intel/vulkan/anv_sample_locations.h

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 01/12] ac: do not force enable IDXEN for 16-bit SSBO loads

2019-03-13 Thread Bas Nieuwenhuizen
NAK.  The entire thing about an index being used and possibly still
constant 0 (and hence the index being constant 0 is not a sign to use
the raw intrinsics) is why we now have both structurized and raw
intrinsics. Don't just introduce that mistake again 

On Wed, Mar 13, 2019 at 11:47 AM Samuel Pitoiset
 wrote:
>
> The struct version enables IDXEN, while the raw one disables it.
> When vindex is unused, the raw version is enough.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index bc64f0bb7e3..8960b5ad4ff 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1383,11 +1383,13 @@ ac_build_tbuffer_load_short(struct ac_llvm_context 
> *ctx,
> LLVMValueRef res;
>
> if (HAVE_LLVM >= 0x0800) {
> +   bool structurized = vindex && vindex != ctx->i32_0;
> +
> voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
>
> res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
>   soffset, 1, dfmt, nfmt, glc,
> - false, true, true);
> + false, true, structurized);
> } else {
> const char *name = "llvm.amdgcn.tbuffer.load.i32";
> LLVMTypeRef type = ctx->i32;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 07/12] ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations

2019-03-13 Thread Samuel Pitoiset
Use the raw version (ie. IDXEN=0) because vindex is unused.

v2: - use raw version

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 66 +
 1 file changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 55d3ce90ce4..a25e5388bce 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1626,57 +1626,75 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
   const nir_intrinsic_instr *instr)
 {
-   const char *name;
-   LLVMValueRef params[6];
+   const char *atomic_name;
+   char intrinsic_name[64];
+   LLVMValueRef params[7];
int arg_count = 0;
-
-   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
-   params[arg_count++] = ac_llvm_extract_elem(>ac, 
get_src(ctx, instr->src[3]), 0);
-   }
-   params[arg_count++] = ac_llvm_extract_elem(>ac, get_src(ctx, 
instr->src[2]), 0);
-   params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
-get_src(ctx, instr->src[0]),
-true);
-   params[arg_count++] = ctx->ac.i32_0; /* vindex */
-   params[arg_count++] = get_src(ctx, instr->src[1]);  /* voffset */
-   params[arg_count++] = ctx->ac.i1false;  /* slc */
+   int length;
 
switch (instr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add:
-   name = "llvm.amdgcn.buffer.atomic.add";
+   atomic_name = "add";
break;
case nir_intrinsic_ssbo_atomic_imin:
-   name = "llvm.amdgcn.buffer.atomic.smin";
+   atomic_name = "smin";
break;
case nir_intrinsic_ssbo_atomic_umin:
-   name = "llvm.amdgcn.buffer.atomic.umin";
+   atomic_name = "umin";
break;
case nir_intrinsic_ssbo_atomic_imax:
-   name = "llvm.amdgcn.buffer.atomic.smax";
+   atomic_name = "smax";
break;
case nir_intrinsic_ssbo_atomic_umax:
-   name = "llvm.amdgcn.buffer.atomic.umax";
+   atomic_name = "umax";
break;
case nir_intrinsic_ssbo_atomic_and:
-   name = "llvm.amdgcn.buffer.atomic.and";
+   atomic_name = "and";
break;
case nir_intrinsic_ssbo_atomic_or:
-   name = "llvm.amdgcn.buffer.atomic.or";
+   atomic_name = "or";
break;
case nir_intrinsic_ssbo_atomic_xor:
-   name = "llvm.amdgcn.buffer.atomic.xor";
+   atomic_name = "xor";
break;
case nir_intrinsic_ssbo_atomic_exchange:
-   name = "llvm.amdgcn.buffer.atomic.swap";
+   atomic_name = "swap";
break;
case nir_intrinsic_ssbo_atomic_comp_swap:
-   name = "llvm.amdgcn.buffer.atomic.cmpswap";
+   atomic_name = "cmpswap";
break;
default:
abort();
}
 
-   return ac_build_intrinsic(>ac, name, ctx->ac.i32, params, 
arg_count, 0);
+   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+   params[arg_count++] = ac_llvm_extract_elem(>ac, 
get_src(ctx, instr->src[3]), 0);
+   }
+   params[arg_count++] = ac_llvm_extract_elem(>ac, get_src(ctx, 
instr->src[2]), 0);
+   params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
+get_src(ctx, instr->src[0]),
+true);
+
+   if (HAVE_LLVM >= 0x0800) {
+   params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+   params[arg_count++] = ctx->ac.i32_0; /* soffset */
+   params[arg_count++] = ctx->ac.i32_0; /* slc */
+
+   length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.raw.buffer.atomic.%s.i32",
+ atomic_name);
+   } else {
+   params[arg_count++] = ctx->ac.i32_0; /* vindex */
+   params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+   params[arg_count++] = ctx->ac.i1false; /* slc */
+
+   length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.buffer.atomic.%s", atomic_name);
+   }
+
+   assert(length < sizeof(intrinsic_name));
+   return ac_build_intrinsic(>ac, intrinsic_name, ctx->ac.i32,
+ params, arg_count, 0);
 }
 
 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH v2 12/12] ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

2019-03-13 Thread Samuel Pitoiset
New buffer intrinsics have a separate soffset parameter.

v2: - use the raw version as vindex is not used

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 66 ++
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 06dc1383121..1742df24c10 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1227,59 +1227,45 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
if (!swizzle_enable_hint) {
LLVMValueRef offset = soffset;
 
-   static const char *types[] = {"f32", "v2f32", "v4f32"};
-
if (inst_offset)
offset = LLVMBuildAdd(ctx->builder, offset,
  LLVMConstInt(ctx->i32, 
inst_offset, 0), "");
-   if (voffset)
-   offset = LLVMBuildAdd(ctx->builder, offset, voffset, 
"");
-
-   LLVMValueRef args[] = {
-   ac_to_float(ctx, vdata),
-   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   ctx->i32_0,
-   offset,
-   LLVMConstInt(ctx->i1, glc, 0),
-   LLVMConstInt(ctx->i1, slc, 0),
-   };
-
-   char name[256];
-   snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
-types[CLAMP(num_channels, 1, 3) - 1]);
 
-   ac_build_intrinsic(ctx, name, ctx->voidt,
-  args, ARRAY_SIZE(args),
-  ac_get_store_intr_attribs(writeonly_memory));
+   if (HAVE_LLVM >= 0x800) {
+   ac_build_llvm8_buffer_store_common(ctx, rsrc,
+  ac_to_float(ctx, 
vdata),
+  ctx->i32_0,
+  voffset, offset,
+  num_channels,
+  glc, slc,
+  writeonly_memory,
+  false, false);
+   } else {
+   if (voffset)
+   offset = LLVMBuildAdd(ctx->builder, offset, 
voffset, "");
+
+   ac_build_buffer_store_common(ctx, rsrc,
+ac_to_float(ctx, vdata),
+ctx->i32_0, offset,
+num_channels, glc, slc,
+writeonly_memory, false);
+   }
return;
}
 
-   static const unsigned dfmt[] = {
+   static const unsigned dfmts[] = {
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_DATA_FORMAT_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32_32
};
-   static const char *types[] = {"i32", "v2i32", "v4i32"};
-   LLVMValueRef args[] = {
-   vdata,
-   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-   ctx->i32_0,
-   voffset ? voffset : ctx->i32_0,
-   soffset,
-   LLVMConstInt(ctx->i32, inst_offset, 0),
-   LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
-   LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
-   LLVMConstInt(ctx->i1, glc, 0),
-   LLVMConstInt(ctx->i1, slc, 0),
-   };
-   char name[256];
-   snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
-types[CLAMP(num_channels, 1, 3) - 1]);
+   unsigned dfmt = dfmts[num_channels - 1];
+   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+   LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
 
-   ac_build_intrinsic(ctx, name, ctx->voidt,
-  args, ARRAY_SIZE(args),
-  ac_get_store_intr_attribs(writeonly_memory));
+   ac_build_tbuffer_store(ctx, rsrc, vdata, ctx->i32_0, voffset, soffset,
+  immoffset, num_channels, dfmt, nfmt, glc, slc,
+  writeonly_memory);
 }
 
 static LLVMValueRef
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 02/12] ac: fix glc parameter use for new LLVM 8 typed buffer intrinsics

2019-03-13 Thread Samuel Pitoiset
ac_build_llvm8_tbuffer_load() expects a boolean for glc.

Fixes: 2cf5433b99f ("ac: use new LLVM 8 intrinsic when loading 16-bit values")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 4 ++--
 src/amd/common/ac_llvm_build.h  | 2 +-
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 8960b5ad4ff..75cebf427ad 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1376,7 +1376,7 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
-   LLVMValueRef glc)
+   bool glc)
 {
unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
@@ -1401,7 +1401,7 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
immoffset,
LLVMConstInt(ctx->i32, dfmt, false),
LLVMConstInt(ctx->i32, nfmt, false),
-   glc,
+   LLVMConstInt(ctx->i32, glc, false),
ctx->i1false,
};
res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index fd5c4295abf..0fb3eb52f05 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -304,7 +304,7 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
-   LLVMValueRef glc);
+   bool glc);
 
 LLVMValueRef
 ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5fb5c8da609..a7b3fdf64aa 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1716,7 +1716,7 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
  offset,
  ctx->ac.i32_0,
  immoffset,
- glc);
+ cache_policy & 
ac_glc);
} else {
const char *load_name;
LLVMTypeRef data_type;
@@ -1787,7 +1787,7 @@ static LLVMValueRef visit_load_ubo_buffer(struct 
ac_nir_context *ctx,
 offset,
 ctx->ac.i32_0,
 
LLVMConstInt(ctx->ac.i32, 2 * i, 0),
-
ctx->ac.i1false);
+false);
}
ret = ac_build_gather_values(>ac, results, num_components);
} else {
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 05/12] ac: add ac_build_buffer_store_format() helper

2019-03-13 Thread Samuel Pitoiset
Similar to ac_build_buffer_load_format().

v2: - fix out of bounds access

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 100 
 src/amd/common/ac_llvm_build.h  |  11 
 src/amd/common/ac_nir_to_llvm.c |  29 +++--
 3 files changed, 119 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2f535259f78..5d52d93d25a 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1082,6 +1082,106 @@ LLVMValueRef 
ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
 }
 
+static void
+ac_build_buffer_store_common(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef data,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+unsigned num_channels,
+bool glc,
+bool slc,
+bool writeonly_memory,
+bool use_format)
+{
+   LLVMValueRef args[] = {
+   data,
+   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+   vindex ? vindex : ctx->i32_0,
+   voffset,
+   LLVMConstInt(ctx->i1, glc, 0),
+   LLVMConstInt(ctx->i1, slc, 0)
+   };
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+   const char *type_names[] = {"f32", "v2f32", "v4f32"};
+   char name[256];
+
+   if (use_format) {
+   snprintf(name, sizeof(name), 
"llvm.amdgcn.buffer.store.format.%s",
+type_names[func]);
+   } else {
+   snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
+type_names[func]);
+   }
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args),
+  ac_get_store_intr_attribs(writeonly_memory));
+}
+
+static void
+ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef data,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  LLVMValueRef soffset,
+  unsigned num_channels,
+  bool glc,
+  bool slc,
+  bool writeonly_memory,
+  bool use_format,
+  bool structurized)
+{
+   LLVMValueRef args[6];
+   int idx = 0;
+   args[idx++] = data;
+   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+   if (structurized)
+   args[idx++] = vindex ? vindex : ctx->i32_0;
+   args[idx++] = voffset ? voffset : ctx->i32_0;
+   args[idx++] = soffset ? soffset : ctx->i32_0;
+   args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+   const char *type_names[] = {"f32", "v2f32", "v4f32"};
+   const char *indexing_kind = structurized ? "struct" : "raw";
+   char name[256];
+
+   if (use_format) {
+   snprintf(name, sizeof(name), 
"llvm.amdgcn.%s.buffer.store.format.%s",
+indexing_kind, type_names[func]);
+   } else {
+   snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
+indexing_kind, type_names[func]);
+   }
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+  ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef data,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+unsigned num_channels,
+bool glc,
+bool writeonly_memory)
+{
+   if (HAVE_LLVM >= 0x800) {
+   ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
+  voffset, NULL, num_channels,
+  glc, false, writeonly_memory,
+  true, true);
+   } else {
+   ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset,
+num_channels, glc, false,
+writeonly_memory, true);
+   }
+}
+
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by 
num_channels=1..4.
  * The type of 

[Mesa-dev] [PATCH v2 03/12] ac: make use of ac_get_store_intr_attribs() where possible

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 75cebf427ad..2f535259f78 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1150,9 +1150,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
ac_build_intrinsic(ctx, name, ctx->voidt,
   args, ARRAY_SIZE(args),
-  writeonly_memory ?
-  AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
-  AC_FUNC_ATTR_WRITEONLY);
+  ac_get_store_intr_attribs(writeonly_memory));
return;
}
 
@@ -1181,9 +1179,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 
ac_build_intrinsic(ctx, name, ctx->voidt,
   args, ARRAY_SIZE(args),
-  writeonly_memory ?
-  AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
-  AC_FUNC_ATTR_WRITEONLY);
+  ac_get_store_intr_attribs(writeonly_memory));
 }
 
 static LLVMValueRef
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 09/12] ac/nir: use ac_build_buffer_store_dword() for SSBO store operations

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 23 +--
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index af0c3cc679d..f4d408cd587 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1591,34 +1591,29 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
   ctx->ac.voidt, tbuffer_params, 10,
   
ac_get_store_intr_attribs(writeonly_memory));
} else {
+   int num_channels = num_bytes / 4;
+
switch (num_bytes) {
case 16: /* v4f32 */
-   store_name = "llvm.amdgcn.buffer.store.v4f32";
data_type = ctx->ac.v4f32;
break;
case 8: /* v2f32 */
-   store_name = "llvm.amdgcn.buffer.store.v2f32";
data_type = ctx->ac.v2f32;
break;
case 4: /* f32 */
-   store_name = "llvm.amdgcn.buffer.store.f32";
data_type = ctx->ac.f32;
break;
default:
unreachable("Malformed vector store.");
}
data = LLVMBuildBitCast(ctx->ac.builder, data, 
data_type, "");
-   LLVMValueRef params[] = {
-   data,
-   rsrc,
-   ctx->ac.i32_0, /* vindex */
-   offset,
-   glc,
-   ctx->ac.i1false,  /* slc */
-   };
-   ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, params, 6,
-  
ac_get_store_intr_attribs(writeonly_memory));
+
+   ac_build_buffer_store_dword(>ac, rsrc, data,
+   num_channels, offset,
+   ctx->ac.i32_0, 0,
+   cache_policy & ac_glc,
+   false, writeonly_memory,
+   false);
}
}
 }
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 08/12] ac/nir: use ac_build_buffer_load() for SSBO load operations

2019-03-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 35 ++---
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a25e5388bce..af0c3cc679d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1704,7 +1704,6 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
int num_components = instr->num_components;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
unsigned cache_policy = get_cache_policy(ctx, access, false, false);
-   LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : 
ctx->ac.i1false;
 
LLVMValueRef offset = get_src(ctx, instr->src[1]);
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
@@ -1735,34 +1734,12 @@ static LLVMValueRef visit_load_buffer(struct 
ac_nir_context *ctx,
  immoffset,
  cache_policy & 
ac_glc);
} else {
-   const char *load_name;
-   LLVMTypeRef data_type;
-   switch (load_bytes) {
-   case 16:
-   case 12:
-   load_name = "llvm.amdgcn.buffer.load.v4f32";
-   data_type = ctx->ac.v4f32;
-   break;
-   case 8:
-   case 6:
-   load_name = "llvm.amdgcn.buffer.load.v2f32";
-   data_type = ctx->ac.v2f32;
-   break;
-   case 4:
-   load_name = "llvm.amdgcn.buffer.load.f32";
-   data_type = ctx->ac.f32;
-   break;
-   default:
-   unreachable("Malformed load buffer.");
-   }
-   LLVMValueRef params[] = {
-   rsrc,
-   vindex,
-   LLVMBuildAdd(ctx->ac.builder, offset, 
immoffset, ""),
-   glc,
-   ctx->ac.i1false,
-   };
-   ret = ac_build_intrinsic(>ac, load_name, 
data_type, params, 5, 0);
+   int num_channels = util_next_power_of_two(load_bytes) / 
4;
+
+   ret = ac_build_buffer_load(>ac, rsrc, num_channels,
+  vindex, offset, immoffset, 0,
+  cache_policy & ac_glc, false,
+  false, false);
}
 
LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, 
ac_get_type_size(LLVMTypeOf(ret)));
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 04/12] ac/nir: set attrib flags for SSBO and image store operations

2019-03-13 Thread Samuel Pitoiset
For consistency regarding other store operations.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a7b3fdf64aa..ff29345ffe5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1591,7 +1591,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
ctx->ac.i1false,
};
ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, tbuffer_params, 10, 
0);
+  ctx->ac.voidt, tbuffer_params, 10,
+  
ac_get_store_intr_attribs(writeonly_memory));
} else {
switch (num_bytes) {
case 16: /* v4f32 */
@@ -1619,7 +1620,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
ctx->ac.i1false,  /* slc */
};
ac_build_intrinsic(>ac, store_name,
-  ctx->ac.voidt, params, 6, 0);
+  ctx->ac.voidt, params, 6,
+  
ac_get_store_intr_attribs(writeonly_memory));
}
}
 }
@@ -2548,7 +2550,8 @@ static void visit_image_store(struct ac_nir_context *ctx,
params[4] = LLVMConstInt(ctx->ac.i1, 
!!(args.cache_policy & ac_glc), 0);
params[5] = ctx->ac.i1false;  /* slc */
}
-   ac_build_intrinsic(>ac, name, ctx->ac.voidt, params, 6, 0);
+   ac_build_intrinsic(>ac, name, ctx->ac.voidt, params, 6,
+  ac_get_store_intr_attribs(writeonly_memory));
} else {
args.opcode = ac_image_store;
args.data[0] = ac_to_float(>ac, get_src(ctx, 
instr->src[3]));
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 11/12] ac: use new LLVM 8 intrinsic when storing 16-bit values

2019-03-13 Thread Samuel Pitoiset
v2: do not force enable IDXEN when unecessary

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 111 
 src/amd/common/ac_llvm_build.h  |  26 
 src/amd/common/ac_nir_to_llvm.c |  26 ++--
 3 files changed, 142 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 8d5682f6f7a..06dc1383121 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1554,6 +1554,117 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
  ac_get_load_intr_attribs(can_speculate));
 }
 
+static void
+ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef vdata,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+LLVMValueRef soffset,
+unsigned num_channels,
+unsigned dfmt,
+unsigned nfmt,
+bool glc,
+bool slc,
+bool writeonly_memory,
+bool structurized)
+{
+   LLVMValueRef args[7];
+   int idx = 0;
+   args[idx++] = vdata;
+   args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+   if (structurized)
+   args[idx++] = vindex ? vindex : ctx->i32_0;
+   args[idx++] = voffset ? voffset : ctx->i32_0;
+   args[idx++] = soffset ? soffset : ctx->i32_0;
+   args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+   args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+   const char *type_names[] = {"i32", "v2i32", "v4i32"};
+   const char *indexing_kind = structurized ? "struct" : "raw";
+   char name[256];
+
+   snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
+indexing_kind, type_names[func]);
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+  ac_get_store_intr_attribs(writeonly_memory));
+}
+
+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vdata,
+  LLVMValueRef vindex,
+  LLVMValueRef voffset,
+  LLVMValueRef soffset,
+  LLVMValueRef immoffset,
+  unsigned num_channels,
+  unsigned dfmt,
+  unsigned nfmt,
+  bool glc,
+  bool slc,
+  bool writeonly_memory)
+{
+   if (HAVE_LLVM >= 0x800) {
+   bool structurized = vindex && vindex != ctx->i32_0;
+
+   voffset = LLVMBuildAdd(ctx->builder,
+  voffset ? voffset : ctx->i32_0,
+  immoffset, "");
+
+   ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset,
+soffset, num_channels, dfmt, nfmt,
+glc, slc, writeonly_memory,
+structurized);
+   } else {
+   LLVMValueRef params[] = {
+   vdata,
+   rsrc,
+   vindex,
+   voffset ? voffset : ctx->i32_0,
+   soffset ? soffset : ctx->i32_0,
+   immoffset,
+   LLVMConstInt(ctx->i32, dfmt, false),
+   LLVMConstInt(ctx->i32, nfmt, false),
+   LLVMConstInt(ctx->i32, glc, false),
+   LLVMConstInt(ctx->i32, slc, false),
+   };
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+   const char *type_names[] = {"i32", "v2i32", "v4i32"};
+   char name[256];
+
+   snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
+type_names[func]);
+
+   ac_build_intrinsic(ctx, name, ctx->voidt, params, 10,
+  ac_get_store_intr_attribs(writeonly_memory));
+   }
+}
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+LLVMValueRef rsrc,
+LLVMValueRef vdata,
+LLVMValueRef vindex,
+LLVMValueRef voffset,
+LLVMValueRef soffset,
+bool glc,
+bool slc,
+bool writeonly_memory)
+{
+   unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+   

[Mesa-dev] [PATCH v2 06/12] ac/nir: remove one useless check in visit_store_ssbo()

2019-03-13 Thread Samuel Pitoiset
Trivial.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c10a0cce16f..55d3ce90ce4 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1567,12 +1567,9 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
}
data = extract_vector_range(>ac, base_data, start, count);
 
-   if (start == 0) {
-   offset = base_offset;
-   } else {
-   offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
- LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
-   }
+   offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
+ LLVMConstInt(ctx->ac.i32, start * 
elem_size_bytes, false), "");
+
if (num_bytes == 2) {
store_name = "llvm.amdgcn.tbuffer.store.i32";
data_type = ctx->ac.i32;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 10/12] ac: use new LLVM 8 intrinsics in ac_build_buffer_load()

2019-03-13 Thread Samuel Pitoiset
v2: - do not force enable IDXEN when unecessary

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 5d52d93d25a..8d5682f6f7a 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1412,6 +1412,16 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
return ac_build_gather_values(ctx, result, num_channels);
}
 
+   if (HAVE_LLVM >= 0x0800) {
+   bool structurized = vindex && vindex != ctx->i32_0;
+
+   return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex,
+offset, ctx->i32_0,
+num_channels, glc, slc,
+can_speculate, false,
+structurized);
+   }
+
return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
   num_channels, glc, slc,
   can_speculate, false);
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 00/12] ac: use LLVM 8 buffer intrinsics everywhere

2019-03-13 Thread Samuel Pitoiset
Hi,

This small series makes use of new LLVM 8 buffer intrinsics.
No CTS regressions on GFX8 with LLVM 7, 8 and master.

V2: fix use of IDXEN for GFX9

Please review,
Thanks!

Samuel Pitoiset (12):
  ac: do not force enable IDXEN for 16-bit SSBO loads
  ac: fix glc parameter use for new LLVM 8 typed buffer intrinsics
  ac: make use of ac_get_store_intr_attribs() where possible
  ac/nir: set attrib flags for SSBO and image store operations
  ac: add ac_build_buffer_store_format() helper
  ac/nir: remove one useless check in visit_store_ssbo()
  ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations
  ac/nir: use ac_build_buffer_load() for SSBO load operations
  ac/nir: use ac_build_buffer_store_dword() for SSBO store operations
  ac: use new LLVM 8 intrinsics in ac_build_buffer_load()
  ac: use new LLVM 8 intrinsic when storing 16-bit values
  ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

 src/amd/common/ac_llvm_build.c  | 299 +++-
 src/amd/common/ac_llvm_build.h  |  39 -
 src/amd/common/ac_nir_to_llvm.c | 189 
 3 files changed, 365 insertions(+), 162 deletions(-)

-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 01/12] ac: do not force enable IDXEN for 16-bit SSBO loads

2019-03-13 Thread Samuel Pitoiset
The struct version enables IDXEN, while the raw one disables it.
When vindex is unused, the raw version is enough.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index bc64f0bb7e3..8960b5ad4ff 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1383,11 +1383,13 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef res;
 
if (HAVE_LLVM >= 0x0800) {
+   bool structurized = vindex && vindex != ctx->i32_0;
+
voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
 
res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
  soffset, 1, dfmt, nfmt, glc,
- false, true, true);
+ false, true, structurized);
} else {
const char *name = "llvm.amdgcn.tbuffer.load.i32";
LLVMTypeRef type = ctx->i32;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac: do not force enable IDXEN for 16-bit SSBO loads

2019-03-13 Thread Samuel Pitoiset
The struct version enables IDXEN, while the raw one disables it.
When vindex is unused, the raw version is enough.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index bc64f0bb7e3..8960b5ad4ff 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1383,11 +1383,13 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef res;
 
if (HAVE_LLVM >= 0x0800) {
+   bool structurized = vindex && vindex != ctx->i32_0;
+
voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
 
res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
  soffset, 1, dfmt, nfmt, glc,
- false, true, true);
+ false, true, structurized);
} else {
const char *name = "llvm.amdgcn.tbuffer.load.i32";
LLVMTypeRef type = ctx->i32;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 34/40] intel/compiler: validate region restrictions for half-float conversions

2019-03-13 Thread Iago Toral
On Tue, 2019-03-12 at 15:46 -0700, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Wed, 2019-03-06 at 09:21 +0100, Iago Toral wrote:
> > > On Tue, 2019-03-05 at 07:35 +0100, Iago Toral wrote:
> > > > On Mon, 2019-03-04 at 15:36 -0800, Francisco Jerez wrote:
> > > > > Iago Toral  writes:
> > > > > 
> > > > > > On Fri, 2019-03-01 at 19:04 -0800, Francisco Jerez wrote:
> > > > > > > Iago Toral  writes:
> > > > > > > 
> > > > > > > > On Thu, 2019-02-28 at 09:54 -0800, Francisco Jerez
> > > > > > > > wrote:
> > > > > > > > > Iago Toral  writes:
> > > > > > > > > 
> > > > > > > > > > On Wed, 2019-02-27 at 13:47 -0800, Francisco Jerez
> > > > > > > > > > wrote:
> > > > > > > > > > > Iago Toral  writes:
> > > > > > > > > > > 
> > > > > > > > > > > > On Tue, 2019-02-26 at 14:54 -0800, Francisco
> > > > > > > > > > > > Jerez
> > > > > > > > > > > > wrote:
> > > > > > > > > > > > > Iago Toral Quiroga 
> > > > > > > > > > > > > writes:
> > > > > > > > > > > > > 
> > > > > > > > > > > > > > ---
> > > > > > > > > > > > > >  src/intel/compiler/brw_eu_validate.c| 
> > > > > > > > > > > > > >  64
> > > > > > > > > > > > > > -
> > > > > > > > > > > > > >  src/intel/compiler/test_eu_validate.cpp |
> > > > > > > > > > > > > > 122
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > >  2 files changed, 185 insertions(+), 1
> > > > > > > > > > > > > > deletion(-
> > > > > > > > > > > > > > )
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > diff --git
> > > > > > > > > > > > > > a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > > index 000a05cb6ac..203641fecb9 100644
> > > > > > > > > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > > @@ -531,7 +531,69 @@
> > > > > > > > > > > > > > general_restrictions_based_on_operand_types
> > > > > > > > > > > > > > (con
> > > > > > > > > > > > > > st
> > > > > > > > > > > > > > struct
> > > > > > > > > > > > > > gen_device_info *devinf
> > > > > > > > > > > > > > exec_type_size == 8 &&
> > > > > > > > > > > > > > dst_type_size ==
> > > > > > > > > > > > > > 4)
> > > > > > > > > > > > > >dst_type_size = 8;
> > > > > > > > > > > > > >  
> > > > > > > > > > > > > > -   if (exec_type_size > dst_type_size) {
> > > > > > > > > > > > > > +   /* From the BDW+ PRM:
> > > > > > > > > > > > > > +*
> > > > > > > > > > > > > > +*"There is no direct conversion
> > > > > > > > > > > > > > from
> > > > > > > > > > > > > > HF
> > > > > > > > > > > > > > to
> > > > > > > > > > > > > > DF
> > > > > > > > > > > > > > or
> > > > > > > > > > > > > > DF to
> > > > > > > > > > > > > > HF.
> > > > > > > > > > > > > > +* There is no direct conversion
> > > > > > > > > > > > > > from
> > > > > > > > > > > > > > HF
> > > > > > > > > > > > > > to
> > > > > > > > > > > > > > Q/UQ or
> > > > > > > > > > > > > > Q/UQ to
> > > > > > > > > > > > > > HF."
> > > > > > > > > > > > > > +*/
> > > > > > > > > > > > > > +   enum brw_reg_type src0_type =
> > > > > > > > > > > > > > brw_inst_src0_type(devinfo,
> > > > > > > > > > > > > > inst);
> > > > > > > > > > > > > > +   ERROR_IF(brw_inst_opcode(devinfo, inst)
> > > > > > > > > > > > > > ==
> > > > > > > > > > > > > > BRW_OPCODE_MOV
> > > > > > > > > > > > > > &&
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Why is only the MOV instruction handled here
> > > > > > > > > > > > > and
> > > > > > > > > > > > > below?  Aren't
> > > > > > > > > > > > > other
> > > > > > > > > > > > > instructions able to do implicit
> > > > > > > > > > > > > conversions?  Probably
> > > > > > > > > > > > > means
> > > > > > > > > > > > > you
> > > > > > > > > > > > > need
> > > > > > > > > > > > > to deal with two sources rather than one.
> > > > > > > > > > > > 
> > > > > > > > > > > > This comes from the programming notes of the
> > > > > > > > > > > > MOV
> > > > > > > > > > > > instruction
> > > > > > > > > > > > (Volume
> > > > > > > > > > > > 2a, Command Reference - Instructions - MOV), so
> > > > > > > > > > > > it
> > > > > > > > > > > > is
> > > > > > > > > > > > described
> > > > > > > > > > > > specifically for the MOV instruction. I should
> > > > > > > > > > > > probably
> > > > > > > > > > > > have
> > > > > > > > > > > > made
> > > > > > > > > > > > this
> > > > > > > > > > > > clear in the comment.
> > > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > Maybe the one above is specified in the MOV page
> > > > > > > > > > > only,
> > > > > > > > > > > probably
> > > > > > > > > > > due
> > > > > > > > > > > to
> > > > > > > > > > > an oversight (If these restrictions were really
> > > > > > > > > > > specific
> > > > > > > > > > > to
> > > > > > > > > > > the
> > > > > > > > > > > MOV
> > > > > > > > > > > instruction, what would prevent you from
> > > > > > > > > > > implementing
> > > > 

Re: [Mesa-dev] [PATCH v4 34/40] intel/compiler: validate region restrictions for half-float conversions

2019-03-13 Thread Iago Toral
On Tue, 2019-03-12 at 15:44 -0700, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Tue, 2019-03-05 at 07:35 +0100, Iago Toral wrote:
> > > On Mon, 2019-03-04 at 15:36 -0800, Francisco Jerez wrote:
> > > > Iago Toral  writes:
> > > > 
> > > > > On Fri, 2019-03-01 at 19:04 -0800, Francisco Jerez wrote:
> > > > > > Iago Toral  writes:
> > > > > > 
> > > > > > > On Thu, 2019-02-28 at 09:54 -0800, Francisco Jerez wrote:
> > > > > > > > Iago Toral  writes:
> > > > > > > > 
> > > > > > > > > On Wed, 2019-02-27 at 13:47 -0800, Francisco Jerez
> > > > > > > > > wrote:
> > > > > > > > > > Iago Toral  writes:
> > > > > > > > > > 
> > > > > > > > > > > On Tue, 2019-02-26 at 14:54 -0800, Francisco
> > > > > > > > > > > Jerez
> > > > > > > > > > > wrote:
> > > > > > > > > > > > Iago Toral Quiroga  writes:
> > > > > > > > > > > > 
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >  src/intel/compiler/brw_eu_validate.c|  6
> > > > > > > > > > > > > 4
> > > > > > > > > > > > > -
> > > > > > > > > > > > >  src/intel/compiler/test_eu_validate.cpp |
> > > > > > > > > > > > > 122
> > > > > > > > > > > > > 
> > > > > > > > > > > > >  2 files changed, 185 insertions(+), 1
> > > > > > > > > > > > > deletion(-
> > > > > > > > > > > > > )
> > > > > > > > > > > > > 
> > > > > > > > > > > > > diff --git
> > > > > > > > > > > > > a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > index 000a05cb6ac..203641fecb9 100644
> > > > > > > > > > > > > --- a/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > +++ b/src/intel/compiler/brw_eu_validate.c
> > > > > > > > > > > > > @@ -531,7 +531,69 @@
> > > > > > > > > > > > > general_restrictions_based_on_operand_types(c
> > > > > > > > > > > > > onst
> > > > > > > > > > > > > struct
> > > > > > > > > > > > > gen_device_info *devinf
> > > > > > > > > > > > > exec_type_size == 8 && dst_type_size
> > > > > > > > > > > > > ==
> > > > > > > > > > > > > 4)
> > > > > > > > > > > > >dst_type_size = 8;
> > > > > > > > > > > > >  
> > > > > > > > > > > > > -   if (exec_type_size > dst_type_size) {
> > > > > > > > > > > > > +   /* From the BDW+ PRM:
> > > > > > > > > > > > > +*
> > > > > > > > > > > > > +*"There is no direct conversion from
> > > > > > > > > > > > > HF
> > > > > > > > > > > > > to
> > > > > > > > > > > > > DF
> > > > > > > > > > > > > or
> > > > > > > > > > > > > DF to
> > > > > > > > > > > > > HF.
> > > > > > > > > > > > > +* There is no direct conversion from
> > > > > > > > > > > > > HF
> > > > > > > > > > > > > to
> > > > > > > > > > > > > Q/UQ or
> > > > > > > > > > > > > Q/UQ to
> > > > > > > > > > > > > HF."
> > > > > > > > > > > > > +*/
> > > > > > > > > > > > > +   enum brw_reg_type src0_type =
> > > > > > > > > > > > > brw_inst_src0_type(devinfo,
> > > > > > > > > > > > > inst);
> > > > > > > > > > > > > +   ERROR_IF(brw_inst_opcode(devinfo, inst)
> > > > > > > > > > > > > ==
> > > > > > > > > > > > > BRW_OPCODE_MOV
> > > > > > > > > > > > > &&
> > > > > > > > > > > > 
> > > > > > > > > > > > Why is only the MOV instruction handled here
> > > > > > > > > > > > and
> > > > > > > > > > > > below?  Aren't
> > > > > > > > > > > > other
> > > > > > > > > > > > instructions able to do implicit
> > > > > > > > > > > > conversions?  Probably
> > > > > > > > > > > > means
> > > > > > > > > > > > you
> > > > > > > > > > > > need
> > > > > > > > > > > > to deal with two sources rather than one.
> > > > > > > > > > > 
> > > > > > > > > > > This comes from the programming notes of the MOV
> > > > > > > > > > > instruction
> > > > > > > > > > > (Volume
> > > > > > > > > > > 2a, Command Reference - Instructions - MOV), so
> > > > > > > > > > > it is
> > > > > > > > > > > described
> > > > > > > > > > > specifically for the MOV instruction. I should
> > > > > > > > > > > probably
> > > > > > > > > > > have
> > > > > > > > > > > made
> > > > > > > > > > > this
> > > > > > > > > > > clear in the comment.
> > > > > > > > > > > 
> > > > > > > > > > 
> > > > > > > > > > Maybe the one above is specified in the MOV page
> > > > > > > > > > only,
> > > > > > > > > > probably
> > > > > > > > > > due
> > > > > > > > > > to
> > > > > > > > > > an oversight (If these restrictions were really
> > > > > > > > > > specific
> > > > > > > > > > to
> > > > > > > > > > the
> > > > > > > > > > MOV
> > > > > > > > > > instruction, what would prevent you from
> > > > > > > > > > implementing
> > > > > > > > > > such
> > > > > > > > > > conversions
> > > > > > > > > > through a different instruction?  E.g. "ADD dst:df,
> > > > > > > > > > src:hf,
> > > > > > > > > > 0"
> > > > > > > > > > which
> > > > > > > > > > would be substantially more efficient than what
> > > > > > > > > > you're
> > > > > > > > > > doing
> > > > > > > > > > in
> > > > > > > > > > PATCH
> > > > > > > > > > 02)
> > > > > > > > > 
> > > > > > > > > Instructions that 

Re: [Mesa-dev] [PATCH 11/11] ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

2019-03-13 Thread Timothy Arceri

On 13/3/19 7:07 pm, Samuel Pitoiset wrote:


On 3/13/19 9:01 AM, Samuel Pitoiset wrote:


On 3/13/19 1:26 AM, Timothy Arceri wrote:

This one causes 2000+ piglit tests to fail on radeonsi. For example:

./bin/shader_runner 
generated_tests/spec/arb_gpu_shader_fp64/execution/conversion/geom-conversion-explicit-bool-double.shader_test 
-auto -fbo

What chip? what LLVM?

GFX9 I assume.


VEGA64 llvm 9



On 13/3/19 3:19 am, Samuel Pitoiset wrote:

New buffer intrinsics have a separate soffset parameter.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_llvm_build.c | 66 
++

  1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c 
b/src/amd/common/ac_llvm_build.c

index ce6639d49bf..8ed5199da55 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1227,59 +1227,45 @@ ac_build_buffer_store_dword(struct 
ac_llvm_context *ctx,

  if (!swizzle_enable_hint) {
  LLVMValueRef offset = soffset;
  -    static const char *types[] = {"f32", "v2f32", "v4f32"};
-
  if (inst_offset)
  offset = LLVMBuildAdd(ctx->builder, offset,
    LLVMConstInt(ctx->i32, inst_offset, 0), 
"");

-    if (voffset)
-    offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
-
-    LLVMValueRef args[] = {
-    ac_to_float(ctx, vdata),
-    LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-    ctx->i32_0,
-    offset,
-    LLVMConstInt(ctx->i1, glc, 0),
-    LLVMConstInt(ctx->i1, slc, 0),
-    };
-
-    char name[256];
-    snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
  -    ac_build_intrinsic(ctx, name, ctx->voidt,
-   args, ARRAY_SIZE(args),
- ac_get_store_intr_attribs(writeonly_memory));
+    if (HAVE_LLVM >= 0x800) {
+    ac_build_llvm8_buffer_store_common(ctx, rsrc,
+   ac_to_float(ctx, vdata),
+   ctx->i32_0,
+   voffset, offset,
+   num_channels,
+   glc, slc,
+   writeonly_memory,
+   false, true);
+    } else {
+    if (voffset)
+    offset = LLVMBuildAdd(ctx->builder, offset, 
voffset, "");

+
+    ac_build_buffer_store_common(ctx, rsrc,
+ ac_to_float(ctx, vdata),
+ ctx->i32_0, offset,
+ num_channels, glc, slc,
+ writeonly_memory, false);
+    }
  return;
  }
  -    static const unsigned dfmt[] = {
+    static const unsigned dfmts[] = {
  V_008F0C_BUF_DATA_FORMAT_32,
  V_008F0C_BUF_DATA_FORMAT_32_32,
  V_008F0C_BUF_DATA_FORMAT_32_32_32,
  V_008F0C_BUF_DATA_FORMAT_32_32_32_32
  };
-    static const char *types[] = {"i32", "v2i32", "v4i32"};
-    LLVMValueRef args[] = {
-    vdata,
-    LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-    ctx->i32_0,
-    voffset ? voffset : ctx->i32_0,
-    soffset,
-    LLVMConstInt(ctx->i32, inst_offset, 0),
-    LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
-    LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
-    LLVMConstInt(ctx->i1, glc, 0),
-    LLVMConstInt(ctx->i1, slc, 0),
-    };
-    char name[256];
-    snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
+    unsigned dfmt = dfmts[num_channels - 1];
+    unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+    LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
  -    ac_build_intrinsic(ctx, name, ctx->voidt,
-   args, ARRAY_SIZE(args),
-   ac_get_store_intr_attribs(writeonly_memory));
+    ac_build_tbuffer_store(ctx, rsrc, vdata, ctx->i32_0, voffset, 
soffset,

+   immoffset, num_channels, dfmt, nfmt, glc, slc,
+   writeonly_memory);
  }
    static LLVMValueRef


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/11] ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

2019-03-13 Thread Samuel Pitoiset


On 3/13/19 9:01 AM, Samuel Pitoiset wrote:


On 3/13/19 1:26 AM, Timothy Arceri wrote:

This one causes 2000+ piglit tests to fail on radeonsi. For example:

./bin/shader_runner 
generated_tests/spec/arb_gpu_shader_fp64/execution/conversion/geom-conversion-explicit-bool-double.shader_test 
-auto -fbo

What chip? what LLVM?

GFX9 I assume.


On 13/3/19 3:19 am, Samuel Pitoiset wrote:

New buffer intrinsics have a separate soffset parameter.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_llvm_build.c | 66 
++

  1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c 
b/src/amd/common/ac_llvm_build.c

index ce6639d49bf..8ed5199da55 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1227,59 +1227,45 @@ ac_build_buffer_store_dword(struct 
ac_llvm_context *ctx,

  if (!swizzle_enable_hint) {
  LLVMValueRef offset = soffset;
  -    static const char *types[] = {"f32", "v2f32", "v4f32"};
-
  if (inst_offset)
  offset = LLVMBuildAdd(ctx->builder, offset,
    LLVMConstInt(ctx->i32, inst_offset, 0), 
"");

-    if (voffset)
-    offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
-
-    LLVMValueRef args[] = {
-    ac_to_float(ctx, vdata),
-    LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-    ctx->i32_0,
-    offset,
-    LLVMConstInt(ctx->i1, glc, 0),
-    LLVMConstInt(ctx->i1, slc, 0),
-    };
-
-    char name[256];
-    snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
  -    ac_build_intrinsic(ctx, name, ctx->voidt,
-   args, ARRAY_SIZE(args),
- ac_get_store_intr_attribs(writeonly_memory));
+    if (HAVE_LLVM >= 0x800) {
+    ac_build_llvm8_buffer_store_common(ctx, rsrc,
+   ac_to_float(ctx, vdata),
+   ctx->i32_0,
+   voffset, offset,
+   num_channels,
+   glc, slc,
+   writeonly_memory,
+   false, true);
+    } else {
+    if (voffset)
+    offset = LLVMBuildAdd(ctx->builder, offset, 
voffset, "");

+
+    ac_build_buffer_store_common(ctx, rsrc,
+ ac_to_float(ctx, vdata),
+ ctx->i32_0, offset,
+ num_channels, glc, slc,
+ writeonly_memory, false);
+    }
  return;
  }
  -    static const unsigned dfmt[] = {
+    static const unsigned dfmts[] = {
  V_008F0C_BUF_DATA_FORMAT_32,
  V_008F0C_BUF_DATA_FORMAT_32_32,
  V_008F0C_BUF_DATA_FORMAT_32_32_32,
  V_008F0C_BUF_DATA_FORMAT_32_32_32_32
  };
-    static const char *types[] = {"i32", "v2i32", "v4i32"};
-    LLVMValueRef args[] = {
-    vdata,
-    LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-    ctx->i32_0,
-    voffset ? voffset : ctx->i32_0,
-    soffset,
-    LLVMConstInt(ctx->i32, inst_offset, 0),
-    LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
-    LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
-    LLVMConstInt(ctx->i1, glc, 0),
-    LLVMConstInt(ctx->i1, slc, 0),
-    };
-    char name[256];
-    snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
+    unsigned dfmt = dfmts[num_channels - 1];
+    unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+    LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
  -    ac_build_intrinsic(ctx, name, ctx->voidt,
-   args, ARRAY_SIZE(args),
-   ac_get_store_intr_attribs(writeonly_memory));
+    ac_build_tbuffer_store(ctx, rsrc, vdata, ctx->i32_0, voffset, 
soffset,

+   immoffset, num_channels, dfmt, nfmt, glc, slc,
+   writeonly_memory);
  }
    static LLVMValueRef


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/11] ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

2019-03-13 Thread Samuel Pitoiset


On 3/13/19 1:26 AM, Timothy Arceri wrote:

This one causes 2000+ piglit tests to fail on radeonsi. For example:

./bin/shader_runner 
generated_tests/spec/arb_gpu_shader_fp64/execution/conversion/geom-conversion-explicit-bool-double.shader_test 
-auto -fbo

What chip? what LLVM?


On 13/3/19 3:19 am, Samuel Pitoiset wrote:

New buffer intrinsics have a separate soffset parameter.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_llvm_build.c | 66 ++
  1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c 
b/src/amd/common/ac_llvm_build.c

index ce6639d49bf..8ed5199da55 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1227,59 +1227,45 @@ ac_build_buffer_store_dword(struct 
ac_llvm_context *ctx,

  if (!swizzle_enable_hint) {
  LLVMValueRef offset = soffset;
  -    static const char *types[] = {"f32", "v2f32", "v4f32"};
-
  if (inst_offset)
  offset = LLVMBuildAdd(ctx->builder, offset,
    LLVMConstInt(ctx->i32, inst_offset, 0), "");
-    if (voffset)
-    offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
-
-    LLVMValueRef args[] = {
-    ac_to_float(ctx, vdata),
-    LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-    ctx->i32_0,
-    offset,
-    LLVMConstInt(ctx->i1, glc, 0),
-    LLVMConstInt(ctx->i1, slc, 0),
-    };
-
-    char name[256];
-    snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
  -    ac_build_intrinsic(ctx, name, ctx->voidt,
-   args, ARRAY_SIZE(args),
- ac_get_store_intr_attribs(writeonly_memory));
+    if (HAVE_LLVM >= 0x800) {
+    ac_build_llvm8_buffer_store_common(ctx, rsrc,
+   ac_to_float(ctx, vdata),
+   ctx->i32_0,
+   voffset, offset,
+   num_channels,
+   glc, slc,
+   writeonly_memory,
+   false, true);
+    } else {
+    if (voffset)
+    offset = LLVMBuildAdd(ctx->builder, offset, voffset, 
"");

+
+    ac_build_buffer_store_common(ctx, rsrc,
+ ac_to_float(ctx, vdata),
+ ctx->i32_0, offset,
+ num_channels, glc, slc,
+ writeonly_memory, false);
+    }
  return;
  }
  -    static const unsigned dfmt[] = {
+    static const unsigned dfmts[] = {
  V_008F0C_BUF_DATA_FORMAT_32,
  V_008F0C_BUF_DATA_FORMAT_32_32,
  V_008F0C_BUF_DATA_FORMAT_32_32_32,
  V_008F0C_BUF_DATA_FORMAT_32_32_32_32
  };
-    static const char *types[] = {"i32", "v2i32", "v4i32"};
-    LLVMValueRef args[] = {
-    vdata,
-    LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-    ctx->i32_0,
-    voffset ? voffset : ctx->i32_0,
-    soffset,
-    LLVMConstInt(ctx->i32, inst_offset, 0),
-    LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
-    LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
-    LLVMConstInt(ctx->i1, glc, 0),
-    LLVMConstInt(ctx->i1, slc, 0),
-    };
-    char name[256];
-    snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
- types[CLAMP(num_channels, 1, 3) - 1]);
+    unsigned dfmt = dfmts[num_channels - 1];
+    unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+    LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
  -    ac_build_intrinsic(ctx, name, ctx->voidt,
-   args, ARRAY_SIZE(args),
-   ac_get_store_intr_attribs(writeonly_memory));
+    ac_build_tbuffer_store(ctx, rsrc, vdata, ctx->i32_0, voffset, 
soffset,

+   immoffset, num_channels, dfmt, nfmt, glc, slc,
+   writeonly_memory);
  }
    static LLVMValueRef


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109898] to much time take to load

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109898

Daniel Stone  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
Product|a big freedesktop.org fly   |Spam
   |ribbon  |
 Resolution|--- |INVALID
  Component|/dev/null   |Two

-- 
You are receiving this mail because:
You are on the CC list for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering

2019-03-13 Thread Mathias Fröhlich
Marek, Marc-Andre,

On Wednesday, 13 March 2019 00:03:26 CET Marek Olšák wrote:
> The env var workaround is fine.
> 
> Thread affinity is used for cache topology related optimizations. I think
> it's a mistake to treat it only as a resource allocation tool.

For a shorter term solution to the problem.
One Idea that comes into my mind:

Can we check the currently set thread affinity mask if it still contains the
cpu we are aiming for and narrow the mask down to our cpu if we can do
that by narrowing. If we would need to assign our thread to a cpu that
we are not bound anymore just do nothing.

That would obviously require that we can still call into pthread_setaffinity_np
without being just killed straight away because we touch something that
somebody else wants to control. And that we even succeed in just narrowing
down the allowed set of cpus.
Marc-Andre, would that still work with qemu then?

Of course this still leaves a small race condition open if somebody changes the
affinitiy mask of the current thread in between our call to 
pthread_getaffinity_np
and pthread_setaffinity_np from the outside of our linux task. Then we may
experience a non narrowing set affinity operation anymore because of an other 
set
operation that came in between and we may get killed then.
... which is an other argument against just killing. But ok ...
IMO this condition happens sufficiently seldom to accept that.

Could that solve our problem??

best
Mathias


> 
> Marek
> 
> On Tue, Mar 12, 2019, 1:59 AM Marc-André Lureau 
> wrote:
> 
> > Hi
> >
> > On Fri, Mar 1, 2019 at 12:13 PM Mathias Fröhlich
> >  wrote:
> > >
> > > On Friday, 1 March 2019 12:15:08 CET Eero Tamminen wrote:
> > > > Hi,
> > > >
> > > > On 1.3.2019 11.12, Michel Dänzer wrote:
> > > > > On 2019-02-28 8:41 p.m., Marek Olšák wrote:
> > > > >>> On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen <
> > eero.t.tammi...@intel.com>
> > > >  Why distro versions of Qemu filter sched_setaffinity() syscall?
> > > > >>>
> > > > >>> (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889)
> > > > >>>
> > > > >>> Daniel Berrange (berrange) wrote on 2019-02-27: #19
> > > > >>>
> > > > >>> "IMHO that mesa change is not valid. It is settings its affinity to
> > > > >>> run on all threads which is definitely *NOT* something we want to
> > be
> > > > >>> allowed. Management applications want to control which CPUs QEMU
> > runs
> > > > >>> on, and as such Mesa should honour the CPU placement that the QEMU
> > > > >>> process has.
> > > > >>>
> > > > >>> This is a great example of why QEMU wants to use seccomp to block
> > > > >>> affinity changes to prevent something silently trying to use more
> > CPUs
> > > > >>> than are assigned to this QEMU."
> > > > >>>
> > > > >>
> > > > >> Mesa uses thread affinity to optimize memory access performance on
> > some
> > > > >> CPUs (see util_pin_thread_to_L3). Other places in Mesa need to
> > restore the
> > > > >> original thread affinity for some child threads. Additionally, if
> > games
> > > > >> limit the thread affinity, Mesa needs to restore the full thread
> > affinity
> > > > >> for some of its child threads.
> > > > >
> > > > > The last part sounds like Mesa clearly overstepping its authority.
> > > > >
> > > > >
> > > > >> In essence, the thread affinity should only be considered a hint
> > for the
> > > > >> kernel for optimal performance. There is no reason to kill the
> > process if
> > > > >> it's disallowed. Just ignore the call or modify the thread mask to
> > make it
> > > > >> legal.
> > > > >
> > > > > The fundamental issue here is that Mesa is using the thread affinity
> > API
> > > > > for something else than it's intended for. If there was an API for
> > what
> > > > > Mesa wants (encouraging certain sets of threads to run on
> > topologically
> > > > > close cores), there should be no need to block that.
> > > >
> > > > Why such process needs to be killed instead the request being masked
> > > > suitably, is there some program that breaks subtly if affinity request
> > > > is masked (and that being worse than the program being killed)?
> > >
> > > But that is still a situation that could be nicely handled with a
> > > EPERM error return. Way better than just kill a process.
> > > That 'badly affected' program still can call abort then.
> > > But nicely working programs don't get just killed then!!
> >
> >
> > Returning an error seems less secure that prohibiting it completely.
> > And it may lead to subtle bugs in rarely tested code paths.
> >
> > It's legitimate that QEMU and management layers want to prevent
> > arbitrary code from changing resource allocation etc.
> >
> > There are no easy way I can think of for mesa (and other libraries) to
> > probe the seccomp filters and associated action.
> >
> > So we need a way to tell mesa not to call setaffinity() (and other
> > syscalls). MESA_NO_THREAD_AFFINITY or MESA_NO_SYSCALLS=setaffinity,...
> > seem like a relatively easy way to go.
> >
> > thanks
> >
> >
> 

Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering

2019-03-13 Thread Mathias Fröhlich
Hi,

On Tuesday, 12 March 2019 09:59:17 CET Marc-André Lureau wrote:
> Hi
> 
> On Fri, Mar 1, 2019 at 12:13 PM Mathias Fröhlich
>  wrote:
> >
> > On Friday, 1 March 2019 12:15:08 CET Eero Tamminen wrote:
> > > Hi,
> > >
> > > On 1.3.2019 11.12, Michel Dänzer wrote:
> > > > On 2019-02-28 8:41 p.m., Marek Olšák wrote:
> > > >>> On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen 
> > > >>> 
> > >  Why distro versions of Qemu filter sched_setaffinity() syscall?
> > > >>>
> > > >>> (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889)
> > > >>>
> > > >>> Daniel Berrange (berrange) wrote on 2019-02-27: #19
> > > >>>
> > > >>> "IMHO that mesa change is not valid. It is settings its affinity to
> > > >>> run on all threads which is definitely *NOT* something we want to be
> > > >>> allowed. Management applications want to control which CPUs QEMU runs
> > > >>> on, and as such Mesa should honour the CPU placement that the QEMU
> > > >>> process has.
> > > >>>
> > > >>> This is a great example of why QEMU wants to use seccomp to block
> > > >>> affinity changes to prevent something silently trying to use more CPUs
> > > >>> than are assigned to this QEMU."
> > > >>>
> > > >>
> > > >> Mesa uses thread affinity to optimize memory access performance on some
> > > >> CPUs (see util_pin_thread_to_L3). Other places in Mesa need to restore 
> > > >> the
> > > >> original thread affinity for some child threads. Additionally, if games
> > > >> limit the thread affinity, Mesa needs to restore the full thread 
> > > >> affinity
> > > >> for some of its child threads.
> > > >
> > > > The last part sounds like Mesa clearly overstepping its authority.
> > > >
> > > >
> > > >> In essence, the thread affinity should only be considered a hint for 
> > > >> the
> > > >> kernel for optimal performance. There is no reason to kill the process 
> > > >> if
> > > >> it's disallowed. Just ignore the call or modify the thread mask to 
> > > >> make it
> > > >> legal.
> > > >
> > > > The fundamental issue here is that Mesa is using the thread affinity API
> > > > for something else than it's intended for. If there was an API for what
> > > > Mesa wants (encouraging certain sets of threads to run on topologically
> > > > close cores), there should be no need to block that.
> > >
> > > Why such process needs to be killed instead the request being masked
> > > suitably, is there some program that breaks subtly if affinity request
> > > is masked (and that being worse than the program being killed)?
> >
> > But that is still a situation that could be nicely handled with a
> > EPERM error return. Way better than just kill a process.
> > That 'badly affected' program still can call abort then.
> > But nicely working programs don't get just killed then!!
> 
> 
> Returning an error seems less secure that prohibiting it completely.
> And it may lead to subtle bugs in rarely tested code paths.
> 
> It's legitimate that QEMU and management layers want to prevent
> arbitrary code from changing resource allocation etc.

I *never* saw this api as resource allocation.

Such a call finally dates back into the IRIX threads (the api *before* pthreads
on those very old OpenGL dinosaurs from Silicon Graphics) where this was pretty
much used in contexts like exactly what Marek wanted to.
To make use hardware topology that you can access specific hardware faster from
specific cpu's. Or aiming for cache locality between threads that exchange lots 
of
data between each other. Think of high performance computing applications for
cache locality or VR application middle end libraries for hardware OpenGL 
access.
That api was replaced on that ancient operating system by pthreads that 
contained
the equivalent api call. And later on the linux pthread implementation gained 
the
equivalent pthread_setaffinity_np call when SMP linux systems got used more 
often.
Means if you just kill an application that tries to optimize for valid uses 
using an API
that used to work for that purpose for years you will just break existing API's.

Beside breaking exiting behavior, just think of what you are doing from an 
applications
view. I think as an application writer now I believe that I want to change this 
property. Now I
know that if I touch that specific property I may just be dead. So, then I want 
to know if I can
touch this property without being dead. But there is no such way to find that 
out.
Well, then this means basically the api is finally unusable because I can't 
tolerate that
I am just dead when trying to change something for good!!!
What you want as an application writer is to change that value and if that does 
not work
handle that accordingly. Where 'handle that' can be ranging from, but is not 
limited to:
Either silently internally handle that in the application, may be use an other 
algorithm that fits that case. 
Present the user some user interface message that you need to bind threads to 
cpus and the
operating system does not allow that and that re 

[Mesa-dev] [Bug 109939] After upgrade mesa to 19.0.0 stop working the game Rise of the Tomb Raider

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109939

Raja Massab  changed:

   What|Removed |Added

 CC||massabnaeem...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 109927] Xorg segfault when a web browser is opened

2019-03-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109927

Raja Massab  changed:

   What|Removed |Added

 CC||massabnaeem...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: Stop using VK_TRUE/FALSE

2019-03-13 Thread Samuel Iglesias Gonsálvez
Reviewed-by: Samuel Iglesias Gonsálvez 

Sam

On Tue, 2019-03-12 at 15:24 -0500, Jason Ekstrand wrote:
> We've been fairly inconsistent about this so we should really choose
> whether we're going to use VK_TRUE/FALSE or the C boolean
> values.  The
> Vulkan #defines are set to 1 and 0 respectively so it's the same
> value
> as C gives you when you cast a boolean expression to an
> integer.  Since
> there are several places where we set a VkBool32 to a C logical
> expression, let's just embrace C booleans and stop using the VK
> defines.
> ---
>  src/intel/vulkan/anv_device.c | 42 +--
> 
>  1 file changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_device.c
> b/src/intel/vulkan/anv_device.c
> index 729cceb3e32..83fa3936c19 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -833,7 +833,7 @@ VkResult anv_EnumeratePhysicalDeviceGroups(
>memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
>p->physicalDevices[0] =
>   anv_physical_device_to_handle(>physicalDevice);
> -  p->subsetAllocation = VK_FALSE;
> +  p->subsetAllocation = false;
>  
>vk_foreach_struct(ext, p->pNext)
>   anv_debug_ignored_stype(ext->sType);
> @@ -967,7 +967,7 @@ void anv_GetPhysicalDeviceFeatures2(
>case
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
>   VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
>  (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
> - features->depthClipEnable = VK_TRUE;
> + features->depthClipEnable = true;
>   break;
>}
>  
> @@ -990,7 +990,7 @@ void anv_GetPhysicalDeviceFeatures2(
>  
>case
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
>   VkPhysicalDeviceProtectedMemoryFeatures *features = (void
> *)ext;
> - features->protectedMemory = VK_FALSE;
> + features->protectedMemory = false;
>   break;
>}
>  
> @@ -1024,23 +1024,23 @@ void anv_GetPhysicalDeviceFeatures2(
>case
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
>   VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
>  (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
> - features->transformFeedback = VK_TRUE;
> - features->geometryStreams = VK_TRUE;
> + features->transformFeedback = true;
> + features->geometryStreams = true;
>   break;
>}
>  
>case
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_E
> XT: {
>   VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features
> =
>  (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT
> *)ext;
> - features->vertexAttributeInstanceRateDivisor = VK_TRUE;
> - features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
> + features->vertexAttributeInstanceRateDivisor = true;
> + features->vertexAttributeInstanceRateZeroDivisor = true;
>   break;
>}
>  
>case
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
>   VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
>  (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
> - features->ycbcrImageArrays = VK_TRUE;
> + features->ycbcrImageArrays = true;
>   break;
>}
>  
> @@ -1234,8 +1234,8 @@ void anv_GetPhysicalDeviceProperties2(
> VK_RESOLVE_MODE_MAX_BIT_KHR;
>   }
>  
> - props->independentResolveNone = VK_TRUE;
> - props->independentResolve = VK_TRUE;
> + props->independentResolveNone = true;
> + props->independentResolve = true;
>   break;
>}
>  
> @@ -1372,7 +1372,7 @@ void anv_GetPhysicalDeviceProperties2(
> VK_SUBGROUP_FEATURE_SHUFF
> LE_RELATIVE_BIT |
> VK_SUBGROUP_FEATURE_CLUST
> ERED_BIT |
> VK_SUBGROUP_FEATURE_QUAD_
> BIT;
> - properties->quadOperationsInAllStages = VK_TRUE;
> + properties->quadOperationsInAllStages = true;
>   break;
>}
>  
> @@ -1386,10 +1386,10 @@ void anv_GetPhysicalDeviceProperties2(
>   props->maxTransformFeedbackStreamDataSize = 128 * 4;
>   props->maxTransformFeedbackBufferDataSize = 128 * 4;
>   props->maxTransformFeedbackBufferDataStride = 2048;
> - props->transformFeedbackQueries = VK_TRUE;
> - props->transformFeedbackStreamsLinesTriangles = VK_FALSE;
> - props->transformFeedbackRasterizationStreamSelect =
> VK_FALSE;
> - props->transformFeedbackDraw = VK_TRUE;
> + props->transformFeedbackQueries = true;
> + props->transformFeedbackStreamsLinesTriangles = false;
> + props->transformFeedbackRasterizationStreamSelect = false;
> +