[Mesa-dev] [PATCH v2] glx: fix error code when there is no context bound

2016-05-27 Thread Tapani Pälli
From: Bernard Kilarski 

v2: change all related NULL checks to check against dummyContext

Signed-off-by: Bernard Kilarski 
Signed-off-by: Matt Roper 
Cc: "11.2" currentDpy != dpy ||
+  if (pdraw == NULL || gc == &dummyContext || gc->currentDpy != dpy ||
  (gc->currentDrawable != drawable &&
  gc->currentReadable != drawable)) {
  __glXSendError(dpy, GLXBadDrawable, drawable,
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 589a87a..699a81f 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -456,7 +456,7 @@ glXDestroyContext(Display * dpy, GLXContext ctx)
 {
struct glx_context *gc = (struct glx_context *) ctx;
 
-   if (gc == NULL || gc->xid == None)
+   if (gc == &dummyContext || gc->xid == None)
   return;
 
__glXLock();
@@ -1581,7 +1581,7 @@ glXFreeContextEXT(Display *dpy, GLXContext ctx)
 {
struct glx_context *gc = (struct glx_context *) ctx;
 
-   if (gc == NULL || gc->xid == None)
+   if (gc == &dummyContext || gc->xid == None)
   return;
 
/* The GLX_EXT_import_context spec says:
@@ -1728,7 +1728,7 @@ __glXSwapIntervalSGI(int interval)
CARD32 *interval_ptr;
CARD8 opcode;
 
-   if (gc == NULL) {
+   if (gc == &dummyContext) {
   return GLX_BAD_CONTEXT;
}
 
@@ -1834,7 +1834,7 @@ __glXGetVideoSyncSGI(unsigned int *count)
__GLXDRIdrawable *pdraw;
 #endif
 
-   if (!gc)
+   if (gc == &dummyContext)
   return GLX_BAD_CONTEXT;
 
 #ifdef GLX_DIRECT_RENDERING
@@ -1876,7 +1876,7 @@ __glXWaitVideoSyncSGI(int divisor, int remainder, 
unsigned int *count)
if (divisor <= 0 || remainder < 0)
   return GLX_BAD_VALUE;
 
-   if (!gc)
+   if (gc == &dummyContext)
   return GLX_BAD_CONTEXT;
 
 #ifdef GLX_DIRECT_RENDERING
@@ -2189,7 +2189,7 @@ __glXSwapBuffersMscOML(Display * dpy, GLXDrawable 
drawable,
struct glx_screen *psc = pdraw ? pdraw->psc : NULL;
 #endif
 
-   if (!gc) /* no GLX for this */
+   if (gc == &dummyContext) /* no GLX for this */
   return -1;
 
 #ifdef GLX_DIRECT_RENDERING
@@ -2408,7 +2408,7 @@ __glXBindTexImageEXT(Display * dpy,
 {
struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL || gc->vtable->bind_tex_image == NULL)
+   if (gc == &dummyContext || gc->vtable->bind_tex_image == NULL)
   return;
 
gc->vtable->bind_tex_image(dpy, drawable, buffer, attrib_list);
@@ -2419,7 +2419,7 @@ __glXReleaseTexImageEXT(Display * dpy, GLXDrawable 
drawable, int buffer)
 {
struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL || gc->vtable->release_tex_image == NULL)
+   if (gc == &dummyContext || gc->vtable->release_tex_image == NULL)
   return;
 
gc->vtable->release_tex_image(dpy, drawable, buffer);
diff --git a/src/glx/query_renderer.c b/src/glx/query_renderer.c
index 9108ec2..4debf06 100644
--- a/src/glx/query_renderer.c
+++ b/src/glx/query_renderer.c
@@ -106,7 +106,7 @@ glXQueryCurrentRendererIntegerMESA(int attribute, unsigned 
int *value)
 {
struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL)
+   if (gc == &dummyContext)
   return False;
 
return __glXQueryRendererInteger(gc->psc, attribute, value);
@@ -166,7 +166,7 @@ glXQueryCurrentRendererStringMESA(int attribute)
 {
struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL)
+   if (gc == &dummyContext)
   return False;
 
return __glXQueryRendererString(gc->psc, attribute);
diff --git a/src/glx/tests/fake_glx_screen.cpp 
b/src/glx/tests/fake_glx_screen.cpp
index db20749..c288aa6 100644
--- a/src/glx/tests/fake_glx_screen.cpp
+++ b/src/glx/tests/fake_glx_screen.cpp
@@ -75,7 +75,11 @@ indirect_create_context_attribs(struct glx_screen *base,
return indirect_create_context(base, config_base, shareList, 0);
 }
 
-__thread void *__glX_tls_Context = NULL;
+/* This is necessary so that we don't have to link with glxcurrent.c
+ * which would require us to link with X libraries and what not.
+ */
+struct glx_context dummyContext;
+__thread void *__glX_tls_Context = &dummyContext;
 
 #if !defined(GLX_USE_TLS)
 extern "C" struct glx_context *
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Drivers/Gallium/swr is missing from the list of Mesa components when entering a new bug

2016-05-27 Thread
Hello.

Please add "Drivers/Gallium/swr" to the list "Component" at
https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa

Thanks.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96238] swr fails to build outside of the main directory

2016-05-27 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96238

Jan Ziak <0xe2.0x9a.0...@gmail.com> changed:

   What|Removed |Added

   Hardware|Other   |x86-64 (AMD64)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96238] swr fails to build outside of the main directory

2016-05-27 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96238

Bug ID: 96238
   Summary: swr fails to build outside of the main directory
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: 0xe2.0x9a.0...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Executing something like this fails:

$ cd mesa
$ mkdir ../build64
$ cd ../build64
$ ../mesa/configure ... --with-gallium-drivers=swr,...
$ make

Output:

gen_llvm_types.py: error: argument --output/-o: can't open
'rasterizer/jitter/state_llvm.h': [Errno 2] No such file or directory:
'rasterizer/jitter/state_llvm.h'
Makefile:2009: recipe for target 'rasterizer/jitter/state_llvm.h' failed
make[3]: *** [rasterizer/jitter/state_llvm.h] Error 2
make[3]: *** Waiting for unfinished jobs
usage: gen_llvm_ir_macros.py [-h] [--input INPUT] --output OUTPUT [--gen_h]
 [--gen_cpp] [--gen_x86_h] [--gen_x86_cpp]
gen_llvm_ir_macros.py: error: argument --output/-o: can't open
'rasterizer/jitter/builder_gen.h': [Errno 2] No such file or directory:
'rasterizer/jitter/builder_gen.h'
Makefile:2015: recipe for target 'rasterizer/jitter/builder_gen.h' failed

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: avoid divide by 0 in new TCS code.

2016-05-27 Thread Bas Nieuwenhuizen
I don't think this is the right approach as we shouldn't be getting 0
in the first place. At least for LDS the output size shoudl be at
least 2 as we load the inner & outer tess factors while writing them
to the tessellation factor ring.

We could just do
num_tcs_patch_outputs = MAX2(num_tcs_patch_outputs, 2);

but I am curious what the CTS expects, as I suspect we are essentially
writing bogus tess factors.

 - Bas

On Fri, May 27, 2016 at 8:21 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> The CTS test:
> GL45-CTS.shader_image_load_store.multiple-uniforms
> regressed recently with the new offchip support.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/radeonsi/si_state_draw.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 788869e..bff2a26 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -143,12 +143,14 @@ static void si_emit_derived_tess_state(struct 
> si_context *sctx,
>  * use LDS for the inputs and outputs.
>  */
> hardware_lds_size = sctx->b.chip_class >= CIK ? 65536 : 32768;
> -   *num_patches = MIN2(*num_patches, hardware_lds_size / 
> (input_patch_size +
> -  
> output_patch_size));
> +   if (input_patch_size || output_patch_size)
> +   *num_patches = MIN2(*num_patches, hardware_lds_size / 
> (input_patch_size +
> + 
> output_patch_size));
>
> /* Make sure the output data fits in the offchip buffer */
> -   *num_patches = MIN2(*num_patches, SI_TESS_OFFCHIP_BLOCK_SIZE /
> - output_patch_size);
> +   if (output_patch_size)
> +   *num_patches = MIN2(*num_patches, SI_TESS_OFFCHIP_BLOCK_SIZE /
> +  output_patch_size);
>
> /* Not necessary for correctness, but improves performance. The
>  * specific value is taken from the proprietary driver.
> --
> 2.5.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: Always reserve space for tessellation factors.

2016-05-27 Thread Bas Nieuwenhuizen
Those are always read for writing to the TF ring.

Should fix CTS test
GL45-CTS.shader_image_load_store.multiple-uniforms
after a regression due to the new tessellation code.

Signed-off-by: Bas Nieuwenhuizen 
---

I have no CTS, so it actually is not tested whether it fixes
this test.


 src/gallium/drivers/radeonsi/si_state_draw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 788869e..40592d0 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -125,6 +125,9 @@ static void si_emit_derived_tess_state(struct si_context 
*sctx,
num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
}
 
+   /* We always load TESSINNER and TESSOUTER to write to the tf ring. */
+   num_tcs_patch_outputs = MAX2(num_tcs_patch_outputs, 2);
+
input_vertex_size = num_tcs_inputs * 16;
output_vertex_size = num_tcs_outputs * 16;
 
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nvc0: drop unused surfaces formats conversion builtins

2016-05-27 Thread Samuel Pitoiset
This codegen lib code is no longer used for Kepler since we convert
the formats directly in the lowering pass.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/lib/gk104.asm | 459 --
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   |  52 ---
 2 files changed, 511 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/lib/gk104.asm 
b/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
index cd65b54..ef51406 100644
--- a/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
+++ b/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
@@ -87,465 +87,6 @@ gk104_div_s32:
$p2 cvt s32 $r1 neg s32 $r1
long ret
 
-// SULDP [for each format]
-// $r4d: address
-// $r2: surface info (format)
-// $p0: access predicate
-// $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg)
-//
-// RGBA32
-$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
-long ret
-// RGBA16_UNORM
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
-cvt rn f32 $r3 u16 1 $r1
-cvt rn f32 $r2 u16 0 $r1
-mul f32 $r3 $r3 0x37800074
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-cvt rn f32 $r1 u16 1 $r0
-mul f32 $r2 $r2 0x37800074
-cvt rn f32 $r0 u16 0 $r0
-mul f32 $r1 $r1 0x37800074
-mul f32 $r0 $r0 0x37800074
-long ret
-// RGBA16_SNORM
-$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
-cvt rn f32 $r3 s16 1 $r1
-cvt rn f32 $r2 s16 0 $r1
-mul f32 $r3 $r3 0x38000187
-cvt rn f32 $r1 s16 1 $r0
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-mul f32 $r2 $r2 0x38000187
-cvt rn f32 $r0 s16 0 $r0
-mul f32 $r1 $r1 0x38000187
-mul f32 $r0 $r0 0x38000187
-long ret
-// RGBA16_SINT
-$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
-cvt s32 $r3 s16 1 $r1
-cvt s32 $r2 s16 0 $r1
-cvt s32 $r1 s16 1 $r0
-cvt s32 $r0 s16 0 $r0
-long ret
-// RGBA16_UINT
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
-cvt u32 $r3 u16 1 $r1
-cvt u32 $r2 u16 0 $r1
-cvt u32 $r1 u16 1 $r0
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-cvt u32 $r0 u16 0 $r0
-long ret
-// RGBA16_FLOAT
-$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
-cvt f32 $r3 f16 $r1 1
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-cvt f32 $r2 f16 $r1 0
-cvt f32 $r1 f16 $r0 1
-cvt f32 $r0 f16 $r0 0
-long ret
-// RG32_FLOAT
-$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
-long mov b32 $r2 0x
-long mov b32 $r3 0x3f80
-long ret
-// RG32_xINT
-$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
-long mov b32 $r2 0x
-long mov b32 $r3 0x0001
-long ret
-// RGB10A2_UNORM
-$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
-ext u32 $r1 $r0 0x0a0a
-long mov b32 $r3 0x3f80
-ext u32 $r2 $r0 0x0a14
-long and b32 $r0 $r0 0x3ff
-cvt rn f32 $r2 u16 0 $r2
-cvt rn f32 $r1 u16 0 $r1
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-mul f32 $r2 $r2 0x3a802007
-cvt rn f32 $r0 u16 0 $r0
-mul f32 $r1 $r1 0x3a802007
-mul f32 $r0 $r0 0x3a802007
-long ret
-// RGB10A2_UINT
-$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
-ext u32 $r1 $r0 0x0a0a
-long mov b32 $r3 0x0001
-ext u32 $r2 $r0 0x0a14
-long and b32 $r0 $r0 0x3ff
-long ret
-// RGBA8_UNORM
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
-set $p1 0x1 $p1 xor not $p2
-$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
-$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
-cvt rn f32 $r3 u8 3 $r0
-cvt rn f32 $r2 u8 2 $r0
-mul f32 $r3 $r3 0x3b808081
-sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
-cvt rn f32 $r1 u8 1 $r0
-mul f32 $r2 $r2 0x3b808081
-cvt rn f32 $r0 u8 0 $r0
-mul f32 $r1 $

[Mesa-dev] [PATCH 2/2] nvc0: remove outdated surfaces validation code for GK104

2016-05-27 Thread Samuel Pitoiset
This code was used for validating surfaces with compute but now we use
pipe_image_view instead. Anyway, surfaces support should be
re-introduced properly once OpenCL happens.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 70 -
 1 file changed, 70 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 7334c5f..2e3554d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -223,76 +223,6 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
}
 }
 
-/* Will be removed once images are completely done. */
-#if 0
-static void
-nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
-{
-   struct nvc0_screen *screen = nvc0->screen;
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   struct nv50_surface *sf;
-   struct nv04_resource *res;
-   uint32_t mask;
-   unsigned i;
-   const unsigned t = 1;
-   uint64_t address;
-
-   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
-
-   mask = nvc0->surfaces_dirty[t];
-   while (mask) {
-  i = ffs(mask) - 1;
-  mask &= ~(1 << i);
-
-  /*
-   * NVE4's surface load/store instructions receive all the information
-   * directly instead of via binding points, so we have to supply them.
-   */
-  BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
-  PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
-  PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
-  BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
-  PUSH_DATA (push, 64);
-  PUSH_DATA (push, 1);
-  BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
-  PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
-
-  nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
-
-  sf = nv50_surface(nvc0->surfaces[t][i]);
-  if (sf) {
- res = nv04_resource(sf->base.texture);
-
- if (sf->base.writable)
-BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
- else
-BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
-  }
-   }
-   if (nvc0->surfaces_dirty[t]) {
-  BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
-  PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
-   }
-
-   /* re-reference non-dirty surfaces */
-   mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
-   while (mask) {
-  i = ffs(mask) - 1;
-  mask &= ~(1 << i);
-
-  sf = nv50_surface(nvc0->surfaces[t][i]);
-  res = nv04_resource(sf->base.texture);
-
-  if (sf->base.writable)
- BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
-  else
- BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
-   }
-
-   nvc0->surfaces_dirty[t] = 0;
-}
-#endif
-
 /* Thankfully, textures with samplers follow the normal rules. */
 static void
 nve4_compute_validate_samplers(struct nvc0_context *nvc0)
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] i965/blorp: Preparing for flat inputs + small optimizations

2016-05-27 Thread Kenneth Graunke
On Wednesday, May 25, 2016 7:08:35 PM PDT Topi Pohjolainen wrote:
> First three clarify a little how vertex buffers are configured in the
> command stream. There is a subtle difference between gen8+ and earlier,
> and I thought it would be clearer to have it documented with bspec
> quotes.
> 
> Next four drop some hardcoding in upload logic making the emitters
> to consult compiled fragment programs for correct settings.
> 
> Patches seven and eight allow subsequent uploads to avoid
> reconfiguration of urb if the settings used by blorp are applicable.
> 
> Patches 8-12 build support for fast color clears and resolves to use
> repclear without any color payload setup.
> 
> Final patch really prepares for flat inputs but might save us a few
> electrons even with current setup.
> 
> Topi Pohjolainen (14):
>   i965/draw: Expose vertex buffer state setup
>   i965/gen8: Fix the vertex buffer size
>   i965/blorp: Use core vertex buffer state setup
>   i965/blorp: Use prog data counters to guide sf/sbe setup
>   i965/blorp: Use prog data counters to guide wm/ps setup
>   i965/blorp: Let program data tell if push constants are needed
>   i965/urb: Allow blorp to record current settings
>   i965/blorp: Fix the size requirement for vertex elements
>   i965/fs: Provide compiler options using a flags argument
>   i965/fs: Add support for repclear without color setup
>   i965/blorp: Ignore color setup in fast clears and resolves
>   i965/blorp/gen7+: Skip disabling already disabled stages
>   i965: Do not flush texture and data port caches for clears
>   i965/blorp: Disable vertex element swizzling
> 
>  src/intel/vulkan/anv_pipeline.c   |   6 +-
>  src/mesa/drivers/dri/i965/brw_blorp.c |  17 +---
>  src/mesa/drivers/dri/i965/brw_blorp.h |  14 ++-
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  |  11 ++-
>  src/mesa/drivers/dri/i965/brw_blorp_clear.cpp |  47 +++---
>  src/mesa/drivers/dri/i965/brw_compiler.h  |   9 +-
>  src/mesa/drivers/dri/i965/brw_context.h   |  12 +--
>  src/mesa/drivers/dri/i965/brw_draw.h  |  13 +++
>  src/mesa/drivers/dri/i965/brw_draw_upload.c   |  74 ++-
>  src/mesa/drivers/dri/i965/brw_fs.cpp  |  23 ++---
>  src/mesa/drivers/dri/i965/brw_fs.h|   4 +-
>  src/mesa/drivers/dri/i965/brw_wm.c|   6 +-
>  src/mesa/drivers/dri/i965/gen6_blorp.c| 124 
> +++---
>  src/mesa/drivers/dri/i965/gen7_blorp.c| 113 ++-
>  src/mesa/drivers/dri/i965/gen7_urb.c  |  90 ++-
>  src/mesa/drivers/dri/i965/gen8_blorp.c|  69 ++
>  src/mesa/drivers/dri/i965/gen8_draw_upload.c  |  41 -
>  17 files changed, 403 insertions(+), 270 deletions(-)

Patches 4-11 and 14 are:
Reviewed-by: Kenneth Graunke 

I'm not sure whether patch 11 is worth doing, though...it'd be nice to
get some more data or a second opinion there.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/14] i965/blorp/gen7+: Skip disabling already disabled stages

2016-05-27 Thread Kenneth Graunke
On Wednesday, May 25, 2016 7:08:47 PM PDT Topi Pohjolainen wrote:
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/gen7_blorp.c | 16 
>  src/mesa/drivers/dri/i965/gen8_blorp.c | 12 
>  2 files changed, 28 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c 
> b/src/mesa/drivers/dri/i965/gen7_blorp.c
> index 3a12bb5..2e57d6b 100644
> --- a/src/mesa/drivers/dri/i965/gen7_blorp.c
> +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c
> @@ -273,6 +273,10 @@ gen7_blorp_emit_vs_disable(struct brw_context *brw)
>  static void
>  gen7_blorp_emit_hs_disable(struct brw_context *brw)
>  {
> +   if (!brw->tess_eval_program &&
> +   !(gen7_hs_state.dirty.brw & brw->ctx.NewDriverState))
> +  return;
> +

I'm not crazy about inspecting the dirty flags.  It seems like we might
want to instead track whether a stage was previously disabled via some
booleans.

Jason noticed that the main drawing code re-emits 3DSTATE_URB whenever
we switch tess and geometry shaders, even if it's just going from
active->active, which is kind of lame.  Maybe we could use something
for both...


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: make use of NVC0_CB_AUX_XXX_SIZE as much as possible

2016-05-27 Thread Samuel Pitoiset
This avoids using magic numbers for the driver constant buffer areas
and might also prevent using wrong sizes and offsets.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c|  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c|  4 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c |  6 +++---
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c|  2 +-
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c| 14 +++---
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 832c085..7574a95 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -235,7 +235,7 @@ nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
PUSH_DATA (push, 2048);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
-   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + (NVC0_CB_AUX_BUF_SIZE / 4));
PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
 
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 27cbbc4..bb7fa7f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1830,7 +1830,7 @@ nvc0_hw_sm_upload_input(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
   PUSH_DATAh(push, address + NVC0_CB_AUX_MP_INFO);
   PUSH_DATA (push, address + NVC0_CB_AUX_MP_INFO);
   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
-  PUSH_DATA (push, 3 * 4);
+  PUSH_DATA (push, NVC0_CB_AUX_MP_SIZE);
   PUSH_DATA (push, 0x1);
   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 3);
   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
@@ -1839,7 +1839,7 @@ nvc0_hw_sm_upload_input(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
   PUSH_DATA (push, 2048);
   PUSH_DATAh(push, address);
   PUSH_DATA (push, address);
-  BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 3);
+  BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + (NVC0_CB_AUX_MP_SIZE / 4));
   PUSH_DATA (push, NVC0_CB_AUX_MP_INFO);
}
PUSH_DATA (push, (hq->bo->offset + hq->base_offset));
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6541241..de28fb0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -964,7 +964,7 @@ nvc0_screen_create(struct nouveau_device *dev)
   PUSH_DATA (push, (15 << 4) | 1);
   if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
  unsigned j;
- BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + (NVC0_CB_AUX_UNK_SIZE / 4));
  PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
  for (j = 0; j < 8; ++j)
 PUSH_DATA(push, j);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index a77486d..09f0862 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -336,9 +336,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, 
unsigned s)
PUSH_DATA (push, 2048);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
-   BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
+   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + (NVC0_CB_AUX_UCP_SIZE / 4));
PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
-   PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
+   PUSH_DATAp(push, &nvc0->clip.ucp[0][0], (NVC0_CB_AUX_UCP_SIZE / 4));
 }
 
 static inline void
@@ -506,7 +506,7 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
   PUSH_DATA (push, 2048);
   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
-  BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+  BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + (NVC0_CB_AUX_BUF_SIZE / 4));
   PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
   for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
  if (nvc0->buffers[s][i].buffer) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 14a34d2..6d6fcae 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -1094,7 +1094,7 @@ nve4_update_surface_bindings(struct nvc0_context *nvc0)
   PUSH_DATA (push, 2048);
   PUSH_DATAh

Re: [Mesa-dev] Drivers/Gallium/swr is missing from the list of Mesa components when entering a new bug

2016-05-27 Thread Michel Dänzer
On 27.05.2016 16:37, ⚛ wrote:
> Hello.
> 
> Please add "Drivers/Gallium/swr" to the list "Component" at
> https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa

Such requests should be filed in bugzilla itself:

https://bugs.freedesktop.org/enter_bug.cgi?product=freedesktop.org&component=Bugzilla


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: fix xfb_offset unsized array validation

2016-05-27 Thread Timothy Arceri
This partially fixes CTS test:
GL44-CTS.enhanced_layouts.xfb_get_program_resource_api

The test now fails at a tes evaluation shader with unsized output arrays.

The ARB_enhanced_layouts spec says:

   "It is a compile-time error to apply xfb_offset to the declaration of an
   unsized array."

So this seems like a bug in the CTS.
---
 src/compiler/glsl/ast_to_hir.cpp| 23 +++
 src/compiler/glsl/ir.cpp| 23 +++
 src/compiler/glsl/ir.h  |  3 +++
 src/compiler/glsl/link_varyings.cpp | 23 ---
 4 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 7b464e0..b6db669 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3422,11 +3422,11 @@ apply_layout_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
if (qual->flags.q.explicit_xfb_offset) {
   unsigned qual_xfb_offset;
   unsigned component_size = var->type->contains_double() ? 8 : 4;
-
+  const glsl_type *t = get_varying_type(var, state->stage);
   if (process_qualifier_constant(state, loc, "xfb_offset",
  qual->offset, &qual_xfb_offset) &&
   validate_xfb_offset_qualifier(loc, state, (int) qual_xfb_offset,
-var->type, component_size)) {
+t, component_size)) {
  var->data.offset = qual_xfb_offset;
  var->data.explicit_xfb_offset = true;
   }
@@ -7309,12 +7309,6 @@ ast_interface_block::hir(exec_list *instructions,
 packing,
 this->block_name);
 
-   unsigned component_size = block_type->contains_double() ? 8 : 4;
-   int xfb_offset =
-  layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
-   validate_xfb_offset_qualifier(&loc, state, xfb_offset, block_type,
- component_size);
-
if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) 
{
   YYLTYPE loc = this->get_location();
   _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' "
@@ -7453,6 +7447,13 @@ ast_interface_block::hir(exec_list *instructions,
   var_mode);
   }
 
+  unsigned component_size = block_type->contains_double() ? 8 : 4;
+  int xfb_offset =
+ layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
+  const glsl_type *t = get_varying_type(var, state->stage);
+  validate_xfb_offset_qualifier(&loc, state, xfb_offset, t,
+component_size);
+
   var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
  ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
 
@@ -7528,6 +7529,12 @@ ast_interface_block::hir(exec_list *instructions,
*/
   assert(this->array_specifier == NULL);
 
+  unsigned component_size = block_type->contains_double() ? 8 : 4;
+  int xfb_offset =
+ layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
+  validate_xfb_offset_qualifier(&loc, state, xfb_offset, block_type,
+component_size);
+
   for (unsigned i = 0; i < num_variables; i++) {
  ir_variable *var =
 new(state) ir_variable(fields[i].type,
diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index 5bb3ac3..70859a7 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -2021,3 +2021,26 @@ mode_string(const ir_variable *var)
assert(!"Should not get here.");
return "invalid variable";
 }
+
+/**
+ * Get the varying type stripped of the outermost array if we're processing
+ * a stage whose varyings are arrays indexed by a vertex number (such as
+ * geometry shader inputs).
+ */
+const glsl_type *
+get_varying_type(const ir_variable *var, gl_shader_stage stage)
+{
+   const glsl_type *type = var->type;
+
+   if (!var->data.patch &&
+   ((var->data.mode == ir_var_shader_out &&
+ stage == MESA_SHADER_TESS_CTRL) ||
+(var->data.mode == ir_var_shader_in &&
+ (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
+  stage == MESA_SHADER_GEOMETRY {
+  assert(type->is_array());
+  type = type->fields.array;
+   }
+
+   return type;
+}
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 6236245..eee7bf9 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -2625,6 +2625,9 @@ is_gl_identifier(const char *s)
return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_';
 }
 
+const glsl_type *
+get_varying_type(const ir_variable *var, gl_shader_stage stage);
+
 extern "C" {
 #endif /* __cplusplus */
 
diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 4b2a672..49e9157 100644
--- a/src/compiler/glsl/li

Re: [Mesa-dev] [PATCH 2/2] winsys/amdgpu: disable CE preamble until the kernel is fixed

2016-05-27 Thread Christian König

Am 27.05.2016 um 00:19 schrieb Marek Olšák:

On Fri, May 27, 2016 at 12:13 AM, Alex Deucher  wrote:

On Thu, May 26, 2016 at 5:51 PM, Marek Olšák  wrote:

From: Marek Olšák 

SDMA submission somehow interacts with the skipping CE preamble logic.
This is a workaround for current kernels which have the bug.

Sadly, I can't see what's wrong with the kernel driver. The CE preamble
handling there looks good to me.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95545

What exactly is going wrong?  Is the kernel not scheduling the
preamble or is the ordering wrong or something else?

There are VM faults for addresses that are too far away from memory
allocated by the process.

It looks like the contents of CE RAM come from a different process,
which suggests that the CE preamble IB was skipped.

Disabling the CE preamble IB means that the preamble packets are added
to the main CE IB instead, which makes the problem go away.


Just a guess, but does commit 9f8fb5a2b339ba83493991ca8f1173a939a696d3 
in Alex drm-next-4.7 tree helps with the problem?


commit 9f8fb5a2b339ba83493991ca8f1173a939a696d3
Author: Christian König 
Date:   Fri May 6 14:52:57 2016 +0200

drm/amdgpu: move preamble IB handling into common code

This fixes the handling which was completely broken when you
ad more than one preamble IB.


Regards,
Christian.



Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: call link_uniform blocks on linked shader.

2016-05-27 Thread Timothy Arceri
On Fri, 2016-05-27 at 13:24 +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> The old code called this on the prelinked shader list,
> but at this point we have the linked shader, so we should
> call the interface on that alone.
> 
> This fixes a regression in:
> dEQP-GLES31.functional.ssbo.layout.random.all_per_block_buffers.13
> introduced in
> 5b2675093e863a52b610f112884ae12d42513770
> glsl: handle implicit sized arrays in ssbo
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96228
> Reported-by: Mark James
> Signed-off-by: Dave Airlie 

Reviewed-by: Timothy Arceri 

> ---
>  src/compiler/glsl/linker.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/compiler/glsl/linker.cpp
> b/src/compiler/glsl/linker.cpp
> index 1d55529..e712ee3 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -2318,7 +2318,7 @@ link_intrastage_shaders(void *mem_ctx,
> v.fixup_unnamed_interface_types();
>  
> /* Link up uniform blocks defined within this stage. */
> -   link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
> +   link_uniform_blocks(mem_ctx, ctx, prog, &linked, 1,
> &ubo_blocks, &num_ubo_blocks, &ssbo_blocks,
> &num_ssbo_blocks);
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/program_interface_query: fix transform feedback varyings.

2016-05-27 Thread Timothy Arceri
On Wed, 2016-05-25 at 09:10 +1000, Dave Airlie wrote:
> From: Dave Airlie 
> 
> The spec says gl_NextBuffer and gl_SkipComponents need to be
> returned to userspace in the program interface queries.
> 
> We currently throw those away, this requires a complete piglit
> run to make sure no drivers fallover due to the extra varyings.
> 
> This fixes:
> GL45-CTS.program_interface_query.transform-feedback-built-in

Looks good to me.

Reviewed-by: Timothy Arceri 

> 
> Signed-off-by: Dave Airlie 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Drivers/Gallium/swr is missing from the list of Mesa components when entering a new bug

2016-05-27 Thread Emil Velikov
On 27 May 2016 at 09:44, Michel Dänzer  wrote:
> On 27.05.2016 16:37, ⚛ wrote:
>> Hello.
>>
>> Please add "Drivers/Gallium/swr" to the list "Component" at
>> https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa
>
> Such requests should be filed in bugzilla itself:
>
> https://bugs.freedesktop.org/enter_bug.cgi?product=freedesktop.org&component=Bugzilla
>
Indeed. Furthermore the SWR devs did not see any benefit last time
I've asked them.

Tim, what's your view on this request ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96242] Latest mesa from git doesn't build - Python3 not found

2016-05-27 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96242

Bug ID: 96242
   Summary: Latest mesa from git doesn't build - Python3 not found
   Product: Mesa
   Version: git
  Hardware: PowerPC
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: joro-2...@t-online.de
QA Contact: mesa-dev@lists.freedesktop.org

Latest mesa from git doesn't build, fails while configure-ing,

error message:

checking for indent... cat
checking if module mako in python is installed... yes
configure: error: Python3 not found - unable to generate sources

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96235] st_nir.h:34: error: redefinition of typedef ‘nir_shader’

2016-05-27 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96235

--- Comment #1 from Emil Velikov  ---
Using the forward declaration allows us to avoid including the nir.h header,
thus keeping headers include chain shorter and less things get rebuild as a
result.

On the other hand, typedef redefinition is a C11 feature (iirc) which we cannot
enforce atm. There was a lengthy discussion about killing off the typedefs in
NIR, as they made sense in the C++ GLSL but not the C based NIR.

Don't recall the conclusion of the thread, but fwiw I'm for killing the
typedefs thus being able to build mesa with GCC 4.x series and (in due time)
killing off the unneeded includes of nir*.h and replacing them with fwd
declarations.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] nir/search: Use bitfields to shrink struct sizes

2016-05-27 Thread Jose Fonseca

On 27/05/16 12:06, Jason Ekstrand wrote:


On May 26, 2016 7:06 PM, "Ian Romanick" mailto:i...@freedesktop.org>> wrote:
 >
 > On 05/26/2016 06:30 PM, Jason Ekstrand wrote:
 > > This shrinks the .text section of nir_opt_algebraic.o by 30.5 KB:
 > >
 > >text data  bss  dec  hex  filename
 > >   48703645920   1132951ba8f  nir_opt_algebraic.o
 > >   179516458408253514267  nir_opt_algebraic.o
 > > ---
 > >  src/compiler/nir/nir_search.h | 12 ++--
 > >  1 file changed, 6 insertions(+), 6 deletions(-)
 > >
 > > diff --git a/src/compiler/nir/nir_search.h
b/src/compiler/nir/nir_search.h
 > > index 888a2a3..b97522a 100644
 > > --- a/src/compiler/nir/nir_search.h
 > > +++ b/src/compiler/nir/nir_search.h
 > > @@ -39,23 +39,23 @@ typedef enum {
 > >  } nir_search_value_type;
 > >
 > >  typedef struct {
 > > -   nir_search_value_type type;
 > > +   uint8_t type; /* enum nir_search_value_type */
 >
 > Do we lose any checking by having this not be an enum?  Places where the
 > compiler would warning about missing cases, etc.  Would telling GCC to
 > pack the enum be just as good?  I've gotten similar feedback on similar
 > kinds of patches.

The C99 spec states that bit-field elements must be an integer type or
_Bool.  Everything I find indicates that enums aren't allowed.  That
said, GCC does allow them and we did use an enum in a bit-field for
nir_variable.data.mode for a while.  IIRC, it was making MSVC grumpy
which is why we stopped.

I'd personally rather keep it as an enum four the sake of type safety as
you say.  Since this is never included from C++ we may be able to get
away with it but I'm not actually sure that makes a difference.  I added
Jose to the Cc; maybe he can shed sine light on it.


I recall seeing warnings about bit structs with enums, but I don't 
recall if it was MSVC or `GCC -Wall`, or what.


A grep of Mesa source tree reveals a precedent of code that's being 
compiled by MSVC:


$ git grep '\.*:\s*[0-9]\+\s*;'
src/gallium/auxiliary/tgsi/tgsi_info.h:   enum tgsi_output_mode 
output_mode:3;


It was added in 2012.  So whatever was the limitation, it might have 
disappeared long time ago.



Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/29] Make more use of bitmasks

2016-05-27 Thread Mathias Fröhlich
On Wednesday, May 25, 2016 09:36:41 Ian Romanick wrote:
> On 05/23/2016 11:41 PM, mathias.froehl...@gmx.net wrote:
> > From: Mathias Fröhlich 
> > 
> > Hi all,
> > 
> > following a series with performance improvements
> > for cpu/draw bound applications. This part makes
> > more use of the bitmask/ffs technique for iterating
> > a set of enabled items. The gains are not huge
> > but they are noticable for some of my favourite
> > workloads.
> 
> Performance enhancements aren't generally accepted without any
> performance data.  What workloads?  What system?

Ian,
lspci | grep VGA | grep Intel
00:02.0 VGA compatible controller: Intel Corporation 3rd Gen Core processor 
Graphics Controller (rev 09)
lspci -n -s 00:02.0
00:02.0 0300: 8086:0166 (rev 09)

osgviewer is hand compiled from the OpenSceneGraph-3.4 branch. The default
rendering uses display lists and fixed function.

For more details for the whole picture see the other mail I just sent and CC to 
you.

> 
> That said, the first subseries definitely makes the code better.  I sent
> a couple comments, but with those fixed, patches 1 through 8 are
> 
> Reviewed-by: Ian Romanick 

Thanks!

Mathias___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/29] mesa: Use bitmask/ffs to iterate enabled lights for building ff shader keys.

2016-05-27 Thread Mathias Fröhlich
Hi,

On Wednesday, May 25, 2016 12:06:02 you wrote:
> On Tue, May 24, 2016 at 8:42 AM,   wrote:
> > From: Mathias Fröhlich 
> >
> > Replaces a loop that iterates all lights and test
> > which of them is enabled by a loop only iterating over
> > the bits set in the enabled bitmask.
> 
> This takes the code from something very obvious and easy to follow to
> something you'll have to think twice about the correctness of...

IMO that is a matter of if you are used to that or not.
When I saw this first now some years ago I had to look twice also,
but I tend to take something like that as an opportunity to learn a
new pattern that I can apply where appropriate.

> Since MAX_LIGHTS is 8, this seems a bit like a premature optimization
> to me. Does this patch yield any measurable improvement in speed in
> any real-world applications?

Depends on what you call real world. I did mostly look at osgviewer with
some of the models I have here around. So, no nobody uses this model
just purely with osgviewer in its daywork. But this is part of what you see
in for example flightgear or closed applications for similar purposes I know of.
Now, I get an improvement of about 750 to 800 frames with at least
one model in osgviewer - well kind of representative favorite model I
often use.
Do you call this real world if I care about 750+50 fps when I cant see all
the pictures being drawn because of the display frequency?
I would say 'kind of'. Because this shows that for cpu bound models, 
we can save some time on the cpu.
And the final real word application, where you typically display plenty
of such models, may just be able to display more of these models in a
single scene without taking the risk of frame drops at stable frame rate.

Now you may argue that this is just a bad application or a bad model in a
weakly optimizing application or both.
Yes, kind of, I agree, but you cant always change the applications.

If I look at the draw and gpu times in OpenSceneGraph based applications
over the years, I observe that the closed source nvidia driver needs
incredible few cpu time to schedule the draws for the gpu. The same model
on a comparable machine with a comparable amd/ati card is usually
2-3 times slower in terms of draw time on the cpu with the amd closed
source driver. And then there are mesa based drivers usually far
off of that picture.

Over the past few years, the intel driver backend have gained some
speed not limited to but also in this regard. So that has really
improved most in mesa among the machines I can look at every now and
then (Great work guys Thanks!). If you now look at the profiles,
you can see today a fair amount of mesa core functions beside
the driver backend functions. Well I currently talk about zooming
into the application of interest and then the i965_dri.so
using perf. Having applied this series you see less
of the mesa core functions that high in the profiles.
Does that result in huge performance gains? No, not huge. For i965_dri
I would claim that the ball is then back in the driver backends ballpark.
But when the intel guys are playing that ball we get finally more improvements.

Then there are other drivers too. Some of them will not see any measurable
improvement because the backend still eats up most of the cpu time. I don't
know which of them do what amount.

I cannot place improvement tags on each of the individual changes.
This is more like: If I do them all, I do observe improvements. So this pattern 
seems
beneficial and I apply that to places where I can potentially see
that they may help.

The series as such is part of more fight against O(#max possible numbers)
loops in the fast path of draws in core mesa. But what I have there still
needs cleanup and a proper split into a series that can be reviewed. With that
additional unpublished proof of concept hackery here I get an other frame rate
improvement of 50-100fps at already 800fps then. Or if you want to
put that in more positive words, than you get an other 10% faster cpu
side draw times.

So, why the patch series:
I do today observe on my private notebook profile results that encourage
looking again at core mesa functions. For a fair amount of the mesa
functions being visible in these profiles, I have an idea how to push
them down. This idea is already used a lot in gallium (I have learned
that a mail ago) and somehow in core mesa, but it contains a pattern that
is not that widely known but not too bad.
That pattern enables loop complexities to go down
from O(#max possible number) to O(#actual number) where typically
'#actual number' is much smaller than '#max possible number'. And I can see
the result in terms of improvement in the profiles. And even this appears
visibly in an application.

This sums up in: I have an opportunity for an improvement where
I can just see that theory matches practical observations.
For me this is a save bet.

Thanks

Mathias___
mesa-dev mailing list
mesa-dev@li

Re: [Mesa-dev] [PATCH 10/29] mesa: Use bitmask/ffs to iterate enabled lights

2016-05-27 Thread Mathias Fröhlich
Hi,

On Wednesday, May 25, 2016 09:11:19 Ian Romanick wrote:
> Each variable should be declared on its own line.  Also, we can mix code
> and data now, so you can declare mask at the assignment.

In general, thanks for the comments: I will incorporate.

For mixing declarations and code:
This is meant treewide now?
Or is this still limited to specific subtrees?

Thanks!

Mathias___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] winsys/amdgpu: disable CE preamble until the kernel is fixed

2016-05-27 Thread Marek Olšák
On Fri, May 27, 2016 at 11:12 AM, Christian König
 wrote:
> Am 27.05.2016 um 00:19 schrieb Marek Olšák:
>>
>> On Fri, May 27, 2016 at 12:13 AM, Alex Deucher 
>> wrote:
>>>
>>> On Thu, May 26, 2016 at 5:51 PM, Marek Olšák  wrote:

 From: Marek Olšák 

 SDMA submission somehow interacts with the skipping CE preamble logic.
 This is a workaround for current kernels which have the bug.

 Sadly, I can't see what's wrong with the kernel driver. The CE preamble
 handling there looks good to me.

 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95545
>>>
>>> What exactly is going wrong?  Is the kernel not scheduling the
>>> preamble or is the ordering wrong or something else?
>>
>> There are VM faults for addresses that are too far away from memory
>> allocated by the process.
>>
>> It looks like the contents of CE RAM come from a different process,
>> which suggests that the CE preamble IB was skipped.
>>
>> Disabling the CE preamble IB means that the preamble packets are added
>> to the main CE IB instead, which makes the problem go away.
>
>
> Just a guess, but does commit 9f8fb5a2b339ba83493991ca8f1173a939a696d3 in
> Alex drm-next-4.7 tree helps with the problem?
>
> commit 9f8fb5a2b339ba83493991ca8f1173a939a696d3
> Author: Christian König 
> Date:   Fri May 6 14:52:57 2016 +0200
>
> drm/amdgpu: move preamble IB handling into common code
>
> This fixes the handling which was completely broken when you
> ad more than one preamble IB.

I'm using amd-staging-4.5, which has that. Apparently it doesn't help.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/29] mesa: Use bitmask/ffs to iterate enabled lights for building ff shader keys.

2016-05-27 Thread Mathias Fröhlich
Hi,

On Wednesday, May 25, 2016 09:13:46 Ian Romanick wrote:
> On 05/25/2016 03:06 AM, Erik Faye-Lund wrote:
> > On Tue, May 24, 2016 at 8:42 AM,   wrote:
> >> From: Mathias Fröhlich 
> >>
> >> Replaces a loop that iterates all lights and test
> >> which of them is enabled by a loop only iterating over
> >> the bits set in the enabled bitmask.
> > 
> > This takes the code from something very obvious and easy to follow to
> > something you'll have to think twice about the correctness of...
> > 
> > Since MAX_LIGHTS is 8, this seems a bit like a premature optimization
> > to me. Does this patch yield any measurable improvement in speed in
> > any real-world applications?
> 
> Right... because now the compiler will likely not unroll the loop.  I'd
> be curious to compare the before / after code.  My guess is that it
> doesn't make much difference either way.

As already told in the longer mail, I cannot place improvement tags on
individual changes. Apart from what I get presented with perf top, I was also 
not
looking explicitly at the assembly of each particular patch.

But while working with that series, the overall impression is that the bare
iteration already hurts. Most of the improvement seems like the comparison
between O(#max something) and O(#actual something).

Loop unrolling or not, the cpu has to look at all these places and check for 
the enabled.
May be this is also showing cache line effects where we can now leave unused 
lights
just alone in memory without ever looking at them - only guessing, I have
not checked struct layouts if they get pulled either way.

What I did check in the assembly is that the ffs* translate into something 
simple
so that we do not hugely pessimize loops going over a bigger amount of 
lights/whatnot.

Greetings

Mathias___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/uvd: fix the H264 level for Tonga

2016-05-27 Thread Christian König

Am 26.05.2016 um 11:27 schrieb Andy Furniss:

Alex Deucher wrote:

On Wed, May 25, 2016 at 10:57 AM, Christian König
 wrote:

From: Christian König 

We support 5.1 for a while now.


Resend as the last one didn't have the CCs.

I know (well think) vdpau doesn't really mention 5.2 anywhere, but for
ffmpeg I've been making this change for some time to say 5.2.

Tonga can easily do 5.2, players don't seem to look at this field, but
ffmpeg cli now does and will refuse to use uvd for 5.2 vids.



5.2 requires the hardware to handle more than twice as much macroblocks 
per second than 5.1. So the decoder needs to handle 4k at 66fps.


I'm not sure about the absolute numbers, but I think that could be to 
much even for a Tonga.



In the past ffmpeg cli also didn't look at this, but they merged
something in from libav which changed things.

I have a trac open, but the dev who replied said fix the driver - he
didn't reply further when I said I didn't think vdpau went as high as
5.2 ...


VDPAU actually doesn't have an enumeration for the level, so you can 
even return something like 9.9 without a problem.


Regards,
Christian.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: avoid divide by 0 in new TCS code.

2016-05-27 Thread Marek Olšák
On Fri, May 27, 2016 at 9:57 AM, Bas Nieuwenhuizen
 wrote:
> I don't think this is the right approach as we shouldn't be getting 0
> in the first place. At least for LDS the output size shoudl be at
> least 2 as we load the inner & outer tess factors while writing them
> to the tessellation factor ring.
>
> We could just do
> num_tcs_patch_outputs = MAX2(num_tcs_patch_outputs, 2);
>
> but I am curious what the CTS expects, as I suspect we are essentially
> writing bogus tess factors.

He said it's a division by zero. It looks like the shader doesn't have
any inputs or outputs.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] configure.ac: Add support for Android builds

2016-05-27 Thread Nicolas Boichat
Hi Emil,

Took us some time to clean things up, but we got an ebuild and repo to
share with you.

On Tue, May 24, 2016 at 10:52 PM, Emil Velikov  wrote:
[snip]
>> We also set PKGCONFIG="false", because, well, we do not have .pc files
>> for Android libs. We _could_ create them manually, though,
> Arr... it seems like there's more 'hacks' then expected. I would
> kindly urge that if you're using the autoconf build to use .pc files,
> please ?
>
> There's not need to manually create any of them - just throw the
> template and wire it up in the build system.

Not quite sure how that'd work out, I guess I'll see ,-)

>> but I'm not
>> 100% convinced it's any better than specifying them in the mesa ebuild
>> (knowing that mesa is the only package we build this way, the
>> dependencies are prebuilts that we pull from Android builders).
>>
> Is adding such workarounds encouranged/wide spread in the ebuild ?
> Last time I've looked at the Gentoo ones, there weren't many such
> cases.

No, it's not usual, at all. The issue here is that we have a chroot
that is meant to be a "normal" Linux (that is, Chromium OS), for which
we build all the libraries. But, in the same chroot, we also switch to
a different toolchain and vastly different system (Android), using
prebuilt libraries, to build the second copy of mesa. I guess we are
bound to have a number of hacks...

>> So we replace them with LIBXYZ_[CFLAGS/LIBS], and configure is happy with 
>> that.
>>
>> One thing that I wonder about is how we could specify
>> libEGL_la_LIBADD += -lhardware -lcutils -lsync
>> without hardcoding it in the Makefile.am.
>>
>> Any idea how we could do that? Or do you think it's ok to hardcode the libs?
>>
> The proposed solution will handle these. If you guys feel that it's
> too much/annoying to deal with, show me a repo and I'll send you the
> patches ;-) Please ?

Alright, so the ebuild is here:
https://chromium-review.googlesource.com/#/c/347700/ (if you have a
Chromium OS chroot, it should just work).

And the patches are here:
https://chromium.googlesource.com/chromiumos/third_party/mesa/+log/arc-11.3.0-pre1

They are still based on a slightly older version of mesa. Tomasz is
working on rebasing to the latest mesa master (it looks like someone
implemented similar changes to ours to add support for PRIME FD).

Let us know if you hit any issue, and looking forward to your patches,
thanks a lot!

Best,

Nicolas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/29] mesa: Use bitmask/ffs to iterate enabled lights for building ff shader keys.

2016-05-27 Thread Erik Faye-Lund
On Fri, May 27, 2016 at 11:59 AM, Mathias Fröhlich
 wrote:
>
> Hi,
>
> On Wednesday, May 25, 2016 12:06:02 you wrote:
>> On Tue, May 24, 2016 at 8:42 AM,  wrote:
>> > From: Mathias Fröhlich 
>> >
>> > Replaces a loop that iterates all lights and test
>> > which of them is enabled by a loop only iterating over
>> > the bits set in the enabled bitmask.
>
>> This takes the code from something very obvious and easy to follow to
>> something you'll have to think twice about the correctness of...
>
> IMO that is a matter of if you are used to that or not.
> When I saw this first now some years ago I had to look twice also,
> but I tend to take something like that as an opportunity to learn a
> new pattern that I can apply where appropriate.

I tend to disagree. There's more steps involved, and more things that
could be wrong, no matter how experienced you are. We're all human,
and mistakes do happen.

Besides, It's not like I haven't seen the pattern before; I've been
doing high-performance programming for the better part of 20 years.
But in that time, I've also learned that clever tricks come at a cost,
and you need to be careful in evaluating when that cost is worthwhile.

I'm not saying your changes aren't worthwhile, but without having some
(reproducable) numbers to back the claims up, it *might* be safer to
be conservative and not apply the patches.

>> Since MAX_LIGHTS is 8, this seems a bit like a premature optimization
>> to me. Does this patch yield any measurable improvement in speed in
>> any real-world applications?
>
> Depends on what you call real world. I did mostly look at osgviewer with
> some of the models I have here around. So, no nobody uses this model
> just purely with osgviewer in its daywork. But this is part of what you see
> in for example flightgear or closed applications for similar purposes I know
> of.
>
> Now, I get an improvement of about 750 to 800 frames with at least
> one model in osgviewer - well kind of representative favorite model I
> often use.
>
> Do you call this real world if I care about 750+50 fps when I cant see all
> the pictures being drawn because of the display frequency?
> I would say 'kind of'. Because this shows that for cpu bound models,
> we can save some time on the cpu.

If you're saying this patch alone takes a known work-load from ~750
fps to ~800 fps for viewing some model, I'd say that's worth it.
That's going from 1.33 ms to 1.25 ms per frame, a saving of about 6-7%
of CPU time. That's a big gain.

But if you're saying this whole patch-series gives that gain, then I'd
like to see performance break-down of the whole series; which of the
patches yield the biggest gain, and look into cherry-picking only the
ones with the most bang for the bucks.

> And the final real word application, where you typically display plenty
> of such models, may just be able to display more of these models in a
> single scene without taking the risk of frame drops at stable frame rate.
>
> Now you may argue that this is just a bad application or a bad model in a
> weakly optimizing application or both.

I'm not.

> Over the past few years, the intel driver backend have gained some
> speed not limited to but also in this regard. So that has really
> improved most in mesa among the machines I can look at every now and
> then (Great work guys Thanks!). If you now look at the profiles,
> you can see today a fair amount of mesa core functions beside
> the driver backend functions. Well I currently talk about zooming
> into the application of interest and then the i965_dri.so
> using perf. Having applied this series you see less
> of the mesa core functions that high in the profiles.
> Does that result in huge performance gains? No, not huge. For i965_dri
> I would claim that the ball is then back in the driver backends ballpark.
> But when the intel guys are playing that ball we get finally more
> improvements.
>
> Then there are other drivers too. Some of them will not see any measurable
> improvement because the backend still eats up most of the cpu time. I don't
> know which of them do what amount.
>
> I cannot place improvement tags on each of the individual changes.
> This is more like: If I do them all, I do observe improvements.

That sounds strange. Why can't you? Does really *every* one of the
patches need to be in place for there to be an improvement? This
sounds unlikely to me...

I can understand if you don't want to do 29 separate measurements, but
there must be some possible middle ground, no? Perhaps grouping them a
bit logically and try to measure each groups impact?

You say above that you have perf-measurements that shows numbers going
down, how about sharing those?

> So this pattern seems beneficial and I apply that to places where I can
> potentially see that they may help.
>
> The series as such is part of more fight against O(#max possible numbers)
> loops in the fast path of draws in core mesa. But what I have there still
> needs cleanup and a proper spli

Re: [Mesa-dev] [PATCH] radeonsi: Always reserve space for tessellation factors.

2016-05-27 Thread Marek Olšák
And how about the attached patch?

Marek

On Fri, May 27, 2016 at 10:08 AM, Bas Nieuwenhuizen
 wrote:
> Those are always read for writing to the TF ring.
>
> Should fix CTS test
> GL45-CTS.shader_image_load_store.multiple-uniforms
> after a regression due to the new tessellation code.
>
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>
> I have no CTS, so it actually is not tested whether it fixes
> this test.
>
>
>  src/gallium/drivers/radeonsi/si_state_draw.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 788869e..40592d0 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -125,6 +125,9 @@ static void si_emit_derived_tess_state(struct si_context 
> *sctx,
> num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
> }
>
> +   /* We always load TESSINNER and TESSOUTER to write to the tf ring. */
> +   num_tcs_patch_outputs = MAX2(num_tcs_patch_outputs, 2);
> +
> input_vertex_size = num_tcs_inputs * 16;
> output_vertex_size = num_tcs_outputs * 16;
>
> --
> 2.8.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
From f49301767300e98fb293f4e5d800410a0015143b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= 
Date: Fri, 27 May 2016 12:39:30 +0200
Subject: [PATCH] radeonsi: always reserve output space for tess factors

---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c6f51ea..8ec0470 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1104,8 +1104,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 			u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
 		break;
 
-	case PIPE_SHADER_VERTEX:
 	case PIPE_SHADER_TESS_CTRL:
+		/* Always reserve space for these. */
+		sel->patch_outputs_written |=
+			(1llu << si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0)) |
+			(1llu << si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0));
+		/* fall through */
+	case PIPE_SHADER_VERTEX:
 	case PIPE_SHADER_TESS_EVAL:
 		for (i = 0; i < sel->info.num_outputs; i++) {
 			unsigned name = sel->info.output_semantic_name[i];
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] vl/vdpau: implement luma keyring

2016-05-27 Thread Nayan Deshmukh
This is my first time contributing to mesa. I have implemented
luma keyring in vdpau. I used vl_matrix_filter.c as guideline
for lumakey filter. I have tested the patches but more testing 
is required.


Nayan Deshmukh (2):
  vl: add a luma key filter
  st/vdpau: use lumakey filter to apply luma keyring

 src/gallium/auxiliary/Makefile.sources   |   2 +
 src/gallium/auxiliary/vl/vl_lumakey_filter.c | 254 +++
 src/gallium/auxiliary/vl/vl_lumakey_filter.h |  60 ++
 src/gallium/state_trackers/vdpau/mixer.c |  76 +--
 src/gallium/state_trackers/vdpau/vdpau_private.h |   8 +-
 5 files changed, 384 insertions(+), 16 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_lumakey_filter.c
 create mode 100644 src/gallium/auxiliary/vl/vl_lumakey_filter.h

-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/vdpau: use lumakey filter to apply luma keyring

2016-05-27 Thread Nayan Deshmukh
Signed-off-by: Nayan Deshmukh 
---
 src/gallium/state_trackers/vdpau/mixer.c | 76 +++-
 src/gallium/state_trackers/vdpau/vdpau_private.h |  8 ++-
 2 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index dec79ff..f44d5c9 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -92,7 +92,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL:
@@ -107,6 +106,10 @@ vlVdpVideoMixerCreate(VdpDevice device,
  vmixer->noise_reduction.supported = true;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ vmixer->luma_key.supported = true;
+ break;
+
   default: goto no_params;
   }
}
@@ -148,8 +151,8 @@ vlVdpVideoMixerCreate(VdpDevice device,
 vmixer->video_height, max_size);
   goto no_params;
}
-   vmixer->luma_key_min = 0.f;
-   vmixer->luma_key_max = 1.f;
+   vmixer->luma_key.min_luma = 0.f;
+   vmixer->luma_key.max_luma = 1.f;
pipe_mutex_unlock(dev->mutex);
 
return VDP_STATUS_OK;
@@ -199,6 +202,12 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
   vl_matrix_filter_cleanup(vmixer->sharpness.filter);
   FREE(vmixer->sharpness.filter);
}
+
+   if (vmixer->luma_key.filter) {
+  vl_lumakey_filter_cleanup(vmixer->luma_key.filter);
+  FREE(vmixer->luma_key.filter);
+   }
+
pipe_mutex_unlock(vmixer->device->mutex);
DeviceReference(&vmixer->device, NULL);
 
@@ -341,7 +350,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
}
 
vl_compositor_set_dst_clip(&vmixer->cstate, RectToPipe(destination_rect, 
&clip));
-   if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter)
+   if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && 
!vmixer->luma_key.filter)
   vlVdpSave4DelayedRendering(vmixer->device, destination_surface, 
&vmixer->cstate);
else {
   vl_compositor_render(&vmixer->cstate, compositor, dst->surface, 
&dst->dirty_area, true);
@@ -356,6 +365,11 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
   if (vmixer->sharpness.filter)
  vl_matrix_filter_render(vmixer->sharpness.filter,
  dst->sampler_view, dst->surface);
+
+  if (vmixer->luma_key.filter)
+ vl_lumakey_filter_render(vmixer->luma_key.filter,
+ dst->sampler_view, dst->surface);
+
}
pipe_mutex_unlock(vmixer->device->mutex);
 
@@ -457,6 +471,27 @@ vlVdpVideoMixerUpdateSharpnessFilter(vlVdpVideoMixer 
*vmixer)
}
 }
 
+static void
+vlVdpVideoMixerUpdateLumaKeyFilter(vlVdpVideoMixer *vmixer)
+{
+   assert(vmixer);
+
+   /* if present remove the old filter first */
+   if (vmixer->luma_key.filter) {
+  vl_lumakey_filter_cleanup(vmixer->luma_key.filter);
+  FREE(vmixer->luma_key.filter);
+  vmixer->luma_key.filter = NULL;
+   }
+
+   /* and create a new filter as needed */
+   if (vmixer->luma_key.enabled) {
+  vmixer->luma_key.filter = MALLOC(sizeof(struct vl_lumakey_filter));
+  vl_lumakey_filter_init(vmixer->luma_key.filter, vmixer->device->context,
+vmixer->video_width, vmixer->video_height,
+vmixer->luma_key.min_luma, 
vmixer->luma_key.max_luma);
+   }
+}
+
 /**
  * Retrieve whether features were requested at creation time.
  */
@@ -490,7 +525,6 @@ vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  feature_supports[i] = false;
  break;
 
@@ -506,6 +540,10 @@ vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
  feature_supports[i] = vmixer->noise_reduction.supported;
  break;
 
+  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
+ feature_supports[i] = vmixer->luma_key.supported;
+ break;
+
   default:
  return VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE;
   }
@@ -548,7 +586,6 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L8:
   case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L9:
   case VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE:
-  case VDP_VIDEO_MIXER_FEATURE_LUMA_KEY:
  break;
 
   case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL:
@@ -566,6 +603,11 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
  vlVdpVideoMixerUpdateNoiseReductionFilter(vmixer);
  break;
 
+  case VDP_V

[Mesa-dev] [PATCH 1/2] vl: add a luma key filter

2016-05-27 Thread Nayan Deshmukh
Signed-off-by: Nayan Deshmukh 
---
 src/gallium/auxiliary/Makefile.sources   |   2 +
 src/gallium/auxiliary/vl/vl_lumakey_filter.c | 254 +++
 src/gallium/auxiliary/vl/vl_lumakey_filter.h |  60 +++
 3 files changed, 316 insertions(+)
 create mode 100644 src/gallium/auxiliary/vl/vl_lumakey_filter.c
 create mode 100644 src/gallium/auxiliary/vl/vl_lumakey_filter.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 9b0c9a3..a6de370 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -328,6 +328,8 @@ VL_SOURCES := \
vl/vl_deint_filter.h \
vl/vl_idct.c \
vl/vl_idct.h \
+   vl/vl_lumakey_filter.c \
+   vl/vl_lumakey_filter.h \
vl/vl_matrix_filter.c \
vl/vl_matrix_filter.h \
vl/vl_mc.c \
diff --git a/src/gallium/auxiliary/vl/vl_lumakey_filter.c 
b/src/gallium/auxiliary/vl/vl_lumakey_filter.c
new file mode 100644
index 000..7767f16
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_lumakey_filter.c
@@ -0,0 +1,254 @@
+/**
+ *
+ * Copyright 2016 Nayan Deshmukh.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include 
+
+#include "pipe/p_context.h"
+
+#include "tgsi/tgsi_ureg.h"
+
+#include "util/u_draw.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "vl_types.h"
+#include "vl_vertex_buffers.h"
+#include "vl_lumakey_filter.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS = 0,
+   VS_O_VTEX = 0
+};
+
+static void *
+create_vert_shader(struct vl_lumakey_filter *filter)
+{
+   struct ureg_program *shader;
+   struct ureg_src i_vpos;
+   struct ureg_dst o_vpos, o_vtex;
+
+   shader = ureg_create(PIPE_SHADER_VERTEX);
+   if (!shader)
+  return NULL;
+
+   i_vpos = ureg_DECL_vs_input(shader, 0);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
+
+   ureg_MOV(shader, o_vpos, i_vpos);
+   ureg_MOV(shader, o_vtex, i_vpos);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, filter->pipe);
+}
+
+static void *
+create_frag_shader(struct vl_lumakey_filter *filter, float min_luma,
+  float max_luma)
+{
+
+   struct ureg_program *shader;
+   struct ureg_src i_vtex;
+   struct ureg_src sampler;
+   struct ureg_dst t_tex[3];
+   struct ureg_dst o_fragment;
+   unsigned i;
+
+   shader = ureg_create(PIPE_SHADER_FRAGMENT);
+
+   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, 
TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+
+   for (i = 0; i < 3; ++i) {
+  t_tex[i] = ureg_DECL_temporary(shader);
+   }
+
+   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   ureg_MOV(shader, t_tex[0], i_vtex);
+
+   struct ureg_src src =  ureg_src(t_tex[0]);
+   ureg_TEX(shader, t_tex[0], TGSI_TEXTURE_2D, src, sampler);
+
+   ureg_MOV(shader, o_fragment, ureg_src(t_tex[0]));
+   ureg_DP3(shader, t_tex[0], ureg_src(t_tex[0]), ureg_imm3f(shader, 0.299, 
0.587, 0.144));
+   ureg_SLE(shader, t_tex[1], ureg_src(t_tex[0]), ureg_imm1f(shader, 
min_luma));
+   ureg_SGT(shader, t_tex[2], ureg_src(t_tex[0]), ureg_imm1f(shader, 
max_luma));
+   ureg_MAX(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), 
ureg_src(t_tex[1]), ureg_src(t_tex[2]));
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, filter->pipe);
+}
+
+bool
+vl_lumakey_filter_init(struct vl_lumakey_filter *filter, struct pipe_context 
*pipe,
+  unsigned video_width, unsigned video_height, float 
min_luma,
+  float max_luma)
+{
+   struct pipe_rasterizer_state rs_state;
+   struct pipe_blend_state bl

[Mesa-dev] [PATCH] gallium/ddebug: Add passthrough for query_memory_info.

2016-05-27 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 src/gallium/drivers/ddebug/dd_screen.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index ebe090b..5a883bd 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -101,6 +101,14 @@ dd_screen_get_timestamp(struct pipe_screen *_screen)
return screen->get_timestamp(screen);
 }
 
+static void dd_screen_query_memory_info(struct pipe_screen *_screen,
+struct pipe_memory_info *info)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->query_memory_info(screen, info);
+}
+
 static struct pipe_context *
 dd_screen_context_create(struct pipe_screen *_screen, void *priv,
  unsigned flags)
@@ -332,6 +340,7 @@ ddebug_screen_create(struct pipe_screen *screen)
dscreen->base.get_paramf = dd_screen_get_paramf;
dscreen->base.get_compute_param = dd_screen_get_compute_param;
dscreen->base.get_shader_param = dd_screen_get_shader_param;
+   dscreen->base.query_memory_info = dd_screen_query_memory_info;
/* get_video_param */
/* get_compute_param */
SCR_INIT(get_timestamp);
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Drivers/Gallium/swr is missing from the list of Mesa components when entering a new bug

2016-05-27 Thread Rob Clark
On Fri, May 27, 2016 at 5:39 AM, Emil Velikov  wrote:
> On 27 May 2016 at 09:44, Michel Dänzer  wrote:
>> On 27.05.2016 16:37, ⚛ wrote:
>>> Hello.
>>>
>>> Please add "Drivers/Gallium/swr" to the list "Component" at
>>> https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa
>>
>> Such requests should be filed in bugzilla itself:
>>
>> https://bugs.freedesktop.org/enter_bug.cgi?product=freedesktop.org&component=Bugzilla
>>
> Indeed. Furthermore the SWR devs did not see any benefit last time
> I've asked them.
>
> Tim, what's your view on this request ?

imho, I'd generally say it should be more up to the users than the
dev's (after all we tell them to file bugs at
https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa rather than
different instructions per driver)

BR,
-R

> Thanks
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] scons: support 2.5.0

2016-05-27 Thread Jose Fonseca

Looks good to me.  Thanks.

Reviewed-by: Jose Fonseca 


On 24/05/16 21:13, Emil Velikov wrote:

[Adding Jose]

On 24 May 2016 at 09:43, Giuseppe Bilotta  wrote:

The get_implicit_deps changed in SCons 2.5, expecting a callable rather
than a path as third argument. Detect the SCons versions and set the
argument appropriately to support both 2.5 and earlier versions.

This closes #95211.

For the future please use the full URL. We also might want this in
stable (barring any objections from Jose/others) ?

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95211
Cc: mesa-sta...@lists.freedesktop.org
Acked-by: Emil Velikov 

If anyone else is pushing this, please add the above three lines.

Thanks
Emil


---
  scons/custom.py | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scons/custom.py b/scons/custom.py
index ff7a7a9..e66f496 100644
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -43,6 +43,13 @@ import fixes

  import source_list

+# the get_implicit_deps() method changed between 2.4 and 2.5: now it expects
+# a callable that takes a scanner as argument and returns a path, rather than
+# a path directly. We want to support both, so we need to detect the SCons 
version,
+# for which no API is provided by SCons 8-P
+
+scons_version = tuple(map(int, SCons.__version__.split('.')))
+
  def quietCommandLines(env):
  # Quiet command lines
  # See also http://www.scons.org/wiki/HidingCommandLinesInOutput
@@ -129,7 +136,7 @@ def code_generate(env, script, target, source, command):

  # Explicitly mark that the generated code depends on the generator,
  # and on implicitly imported python modules
-path = (script_src.get_dir(),)
+path = (script_src.get_dir(),) if scons_version < (2, 5, 0) else lambda x: 
script_src
  deps = [script_src]
  deps += script_src.get_implicit_deps(env, python_scanner, path)
  env.Depends(code, deps)
--
2.8.1.372.g9612035

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Always reserve space for tessellation factors.

2016-05-27 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, May 27, 2016 at 12:40 PM, Marek Olšák  wrote:
> And how about the attached patch?
>
> Marek
>
> On Fri, May 27, 2016 at 10:08 AM, Bas Nieuwenhuizen
>  wrote:
>> Those are always read for writing to the TF ring.
>>
>> Should fix CTS test
>> GL45-CTS.shader_image_load_store.multiple-uniforms
>> after a regression due to the new tessellation code.
>>
>> Signed-off-by: Bas Nieuwenhuizen 
>> ---
>>
>> I have no CTS, so it actually is not tested whether it fixes
>> this test.
>>
>>
>>  src/gallium/drivers/radeonsi/si_state_draw.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>> index 788869e..40592d0 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>> @@ -125,6 +125,9 @@ static void si_emit_derived_tess_state(struct si_context 
>> *sctx,
>> num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
>> }
>>
>> +   /* We always load TESSINNER and TESSOUTER to write to the tf ring. */
>> +   num_tcs_patch_outputs = MAX2(num_tcs_patch_outputs, 2);
>> +
>> input_vertex_size = num_tcs_inputs * 16;
>> output_vertex_size = num_tcs_outputs * 16;
>>
>> --
>> 2.8.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/uvd: fix the H264 level for Tonga

2016-05-27 Thread Andy Furniss

Christian König wrote:

Am 26.05.2016 um 11:27 schrieb Andy Furniss:

Alex Deucher wrote:

On Wed, May 25, 2016 at 10:57 AM, Christian König
 wrote:

From: Christian König 

We support 5.1 for a while now.


Resend as the last one didn't have the CCs.

I know (well think) vdpau doesn't really mention 5.2 anywhere, but
for ffmpeg I've been making this change for some time to say 5.2.

Tonga can easily do 5.2, players don't seem to look at this field,
but ffmpeg cli now does and will refuse to use uvd for 5.2 vids.



5.2 requires the hardware to handle more than twice as much
macroblocks per second than 5.1. So the decoder needs to handle 4k at
66fps.

I'm not sure about the absolute numbers, but I think that could be to
 much even for a Tonga.


300 mbit 2160p60 works fine for me and I don't think an extra 6 fps will
make any difference - the reason being I can go a lot above 5.2
(4080x4096 60 fps) and it still decodes OK, with the caveat that it
seems to expose a powerplay/clocks issue as noted in my powerplay bug.
(issue = OK with clocks forced high, but distort/lock on auto, no lock
with pp=0 but distorted.

I don't need to force clocks for 4kp60 - which is all that really exists
"for real".

Review sites that tested tonga on windows list 4kp60 as a feature as
does wikipedia - and testing on windows it does work OK with dxva2.



In the past ffmpeg cli also didn't look at this, but they merged
something in from libav which changed things.

I have a trac open, but the dev who replied said fix the driver -
he didn't reply further when I said I didn't think vdpau went as
high as 5.2 ...


VDPAU actually doesn't have an enumeration for the level, so you can
 even return something like 9.9 without a problem.


Yea, it's a bit strange that they didn't do all the levels.

So the patch as is limits anything that actually obeys that info to
4kp30 rather that 4kp60 which is working.

Given players with more pragmatic devs than ffmpeg ignore it anyway it
doesn't really matter that much. Another example being Constrained
baseline being listed as not supported - also ignored due to being wrong.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/uvd: fix the H264 level for Tonga

2016-05-27 Thread Emil Velikov
On 27 May 2016 at 11:28, Christian König  wrote:
> Am 26.05.2016 um 11:27 schrieb Andy Furniss:
>>
>> Alex Deucher wrote:
>>>
>>> On Wed, May 25, 2016 at 10:57 AM, Christian König
>>>  wrote:

 From: Christian König 

 We support 5.1 for a while now.
>>
>>
>> Resend as the last one didn't have the CCs.
>>
>> I know (well think) vdpau doesn't really mention 5.2 anywhere, but for
>> ffmpeg I've been making this change for some time to say 5.2.
>>
>> Tonga can easily do 5.2, players don't seem to look at this field, but
>> ffmpeg cli now does and will refuse to use uvd for 5.2 vids.
>>
>
> 5.2 requires the hardware to handle more than twice as much macroblocks per
> second than 5.1. So the decoder needs to handle 4k at 66fps.
>
> I'm not sure about the absolute numbers, but I think that could be to much
> even for a Tonga.
>
>> In the past ffmpeg cli also didn't look at this, but they merged
>> something in from libav which changed things.
>>
>> I have a trac open, but the dev who replied said fix the driver - he
>> didn't reply further when I said I didn't think vdpau went as high as
>> 5.2 ...
>
>
> VDPAU actually doesn't have an enumeration for the level, so you can even
> return something like 9.9 without a problem.
>
One of us is getting confused here..

Are you saying that VDPAU users have no way of quering the said
numbers ? The way I see it it's the complete opposite.
It is the only API on which the user that _can_ query such info. In
mesa/gallium we have PIPE_VIDEO_CAP_MAX_LEVEL explicitly for VDPAU ;-)

The odd things is that VLC uses/used to? check that information before
feeding the video to the decoder, while others implementations (like
the original one in mplayer done by the Nvidia devs) do/did? not
bother.

Just clarifying some facts, there's nothing wrong with the patch obviously.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/get: drop unused extension checks.

2016-05-27 Thread Emil Velikov
On 27 May 2016 at 06:12, Dave Airlie  wrote:
> From: Dave Airlie 
>
> These all show up as unused warnings here, so drop them for now.
>
> Signed-off-by: Dave Airlie 
Reviewed-by: Emil Velikov 

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH resend] pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms

2016-05-27 Thread Hans de Goede
Make pipe_loader_sw_probe_kms take ownership of the passed in fd,
like pipe_loader_drm_probe_fd does.

The only caller is dri_kms_init_screen which passes in a dupped fd,
just like dri2_init_screen passes in a dupped fd to
pipe_loader_drm_probe_fd.

Signed-off-by: Hans de Goede 
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index c8e1f13..e7fa974 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -45,6 +45,7 @@ struct pipe_loader_sw_device {
struct util_dl_library *lib;
 #endif
struct sw_winsys *ws;
+   int fd;
 };
 
 #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)
@@ -92,6 +93,7 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device 
*sdev)
sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
sdev->base.driver_name = "swrast";
sdev->base.ops = &pipe_loader_sw_ops;
+   sdev->fd = -1;
 
 #ifdef GALLIUM_STATIC_TARGETS
sdev->dd = &driver_descriptors;
@@ -169,6 +171,8 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, 
int fd)
if (!pipe_loader_sw_probe_init_common(sdev))
   goto fail;
 
+   sdev->fd = fd;
+
for (i = 0; sdev->dd->winsys[i].name; i++) {
   if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
  sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
@@ -273,6 +277,9 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
   util_dl_close(sdev->lib);
 #endif
 
+   if (sdev->fd != -1)
+  close(sdev->fd);
+
FREE(sdev);
*dev = NULL;
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] clover: Pass unquoted compiler arguments to Clang

2016-05-27 Thread Vedran Miletić
OpenCL apps can quote arguments they pass to the OpenCL compiler, most
commonly include paths containing spaces. If the OpenCL compiler was
called via a shell, the shell would remove (single or double) quotes
before passing the argument to the compiler. Since we call Clang as a
library, we have to remove quotes before passing the argument.
---
 .../state_trackers/clover/llvm/invocation.cpp  | 41 +++---
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index e2cadda..d389a76 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -147,12 +147,43 @@ namespace {
 
   // Parse the compiler options:
   std::vector opts_array;
-  std::istringstream ss(opts);
+  std::ostringstream builder;
+
+  // OpenCL programs can pass a single or double quoted argument, most
+  // frequently include path. This is useful so that the path containing
+  // spaces is treated as a single argument, but we should anyhow unquote
+  // quoted arguments before passing them to the compiler.
+  // We do not want to avoid using std::string::replace here, as include
+  // path can contain quotes in file names.
+  bool escape_next = false;
+  bool skip_space = false;
+  bool in_quote_double = false;
+  bool in_quote_single = false;
+  for (auto pos = std::begin(opts); pos != std::end(opts); ++pos) {
+ if (escape_next) {
+builder.put(*pos);
+escape_next = false;
+ } else if (*pos == '\\') {
+escape_next = true;
+ } else if (*pos == '"' && !in_quote_single) {
+in_quote_double = !in_quote_double;
+skip_space = !skip_space;
+ } else if (*pos == '\'' && !in_quote_double) {
+in_quote_single = !in_quote_single;
+skip_space = !skip_space;
+ } else if (*pos == ' ' && !skip_space && builder.tellp() > 0) {
+opts_array.emplace_back(builder.str());
+builder.str("");
+ } else if (*pos != ' ' || skip_space) {
+builder.put(*pos);
+ }
+  }
+  if (builder.tellp() > 0) {
+ opts_array.emplace_back(builder.str());
+  }
 
-  while (!ss.eof()) {
- std::string opt;
- getline(ss, opt, ' ');
- opts_array.push_back(opt);
+  if (in_quote_double || in_quote_single) {
+ throw error(CL_INVALID_COMPILER_OPTIONS);
   }
 
   opts_array.push_back(name);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH resend] pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms

2016-05-27 Thread Emil Velikov
Hi Hans,

On 27 May 2016 at 15:06, Hans de Goede  wrote:
> Make pipe_loader_sw_probe_kms take ownership of the passed in fd,
> like pipe_loader_drm_probe_fd does.
>
> The only caller is dri_kms_init_screen which passes in a dupped fd,
> just like dri2_init_screen passes in a dupped fd to
> pipe_loader_drm_probe_fd.
>
My memory is failing ... I thought I replied to this.

The patch is correct, so
Reviewed-by: Emil Velikov 

I wonder when I'll get the chance to fold the
almost-but-no-quite-the-same sw and hw side of the pipe loader. If
you're interested let me know.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] nir/search: Use bitfields to shrink struct sizes

2016-05-27 Thread Jason Ekstrand
On May 27, 2016 2:58 AM, "Jose Fonseca"  wrote:
>
> On 27/05/16 12:06, Jason Ekstrand wrote:
>>
>>
>> On May 26, 2016 7:06 PM, "Ian Romanick" > > wrote:
>>  >
>>  > On 05/26/2016 06:30 PM, Jason Ekstrand wrote:
>>  > > This shrinks the .text section of nir_opt_algebraic.o by 30.5 KB:
>>  > >
>>  > >text data  bss  dec  hex  filename
>>  > >   48703645920   1132951ba8f  nir_opt_algebraic.o
>>  > >   179516458408253514267  nir_opt_algebraic.o
>>  > > ---
>>  > >  src/compiler/nir/nir_search.h | 12 ++--
>>  > >  1 file changed, 6 insertions(+), 6 deletions(-)
>>  > >
>>  > > diff --git a/src/compiler/nir/nir_search.h
>> b/src/compiler/nir/nir_search.h
>>  > > index 888a2a3..b97522a 100644
>>  > > --- a/src/compiler/nir/nir_search.h
>>  > > +++ b/src/compiler/nir/nir_search.h
>>  > > @@ -39,23 +39,23 @@ typedef enum {
>>  > >  } nir_search_value_type;
>>  > >
>>  > >  typedef struct {
>>  > > -   nir_search_value_type type;
>>  > > +   uint8_t type; /* enum nir_search_value_type */
>>  >
>>  > Do we lose any checking by having this not be an enum?  Places where
the
>>  > compiler would warning about missing cases, etc.  Would telling GCC to
>>  > pack the enum be just as good?  I've gotten similar feedback on
similar
>>  > kinds of patches.
>>
>> The C99 spec states that bit-field elements must be an integer type or
>> _Bool.  Everything I find indicates that enums aren't allowed.  That
>> said, GCC does allow them and we did use an enum in a bit-field for
>> nir_variable.data.mode for a while.  IIRC, it was making MSVC grumpy
>> which is why we stopped.
>>
>> I'd personally rather keep it as an enum four the sake of type safety as
>> you say.  Since this is never included from C++ we may be able to get
>> away with it but I'm not actually sure that makes a difference.  I added
>> Jose to the Cc; maybe he can shed sine light on it.
>
>
> I recall seeing warnings about bit structs with enums, but I don't recall
if it was MSVC or `GCC -Wall`, or what.
>
> A grep of Mesa source tree reveals a precedent of code that's being
compiled by MSVC:
>
> $ git grep '\.*:\s*[0-9]\+\s*;'
> src/gallium/auxiliary/tgsi/tgsi_info.h:   enum tgsi_output_mode
output_mode:3;
>
> It was added in 2012.  So whatever was the limitation, it might have
disappeared long time ago.

Of you've got it in TGSI then I think it's fair to say it compiled fine on
everything we care about. :-). I'll keep the enums.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/18] genxml: move the sources (headers) list to Makefile.sources

2016-05-27 Thread Emil Velikov
From: Mauro Rossi 

[Emil Velikov: use the file in the autoconf build]
Signed-off-by: Emil Velikov 
---
 src/intel/genxml/Makefile.am  | 9 +++--
 src/intel/genxml/Makefile.sources | 6 ++
 2 files changed, 9 insertions(+), 6 deletions(-)
 create mode 100644 src/intel/genxml/Makefile.sources

diff --git a/src/intel/genxml/Makefile.am b/src/intel/genxml/Makefile.am
index 0b5b3a6..c393ebe 100644
--- a/src/intel/genxml/Makefile.am
+++ b/src/intel/genxml/Makefile.am
@@ -19,12 +19,9 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-BUILT_SOURCES = \
-   gen6_pack.h \
-   gen7_pack.h \
-   gen75_pack.h\
-   gen8_pack.h \
-   gen9_pack.h
+include Makefile.sources
+
+BUILT_SOURCES = $(GENXML_GENERATED_FILES)
 
 PYTHON3_GEN = $(AM_V_GEN)$(PYTHON3) $(PYTHON_FLAGS)
 
diff --git a/src/intel/genxml/Makefile.sources 
b/src/intel/genxml/Makefile.sources
new file mode 100644
index 000..9298b4a
--- /dev/null
+++ b/src/intel/genxml/Makefile.sources
@@ -0,0 +1,6 @@
+GENXML_GENERATED_FILES = \
+   gen6_pack.h \
+   gen7_pack.h \
+   gen75_pack.h \
+   gen8_pack.h \
+   gen9_pack.h
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/18] isl: automake: remove unneeded SUBDIRS

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

As we do not include any other subdirs but self, we don't need to set
it.

Signed-off-by: Emil Velikov 
---
 src/intel/isl/Makefile.am | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
index d371a29..3542386 100644
--- a/src/intel/isl/Makefile.am
+++ b/src/intel/isl/Makefile.am
@@ -19,9 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = .
-
-
 ISL_GEN_LIBS =   \
libisl-gen7.la   \
libisl-gen75.la  \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/18] GROSS HACK: anv: add dummy MESA_GIT_SHA1 define

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Otherwise we'll error out if build from a release tarball.

Cc: Jason Ekstrand 
Cc: Kristian Høgsberg Kristensen 
---
This patch isn't going to fly obviously but is just enough to get things
building ;-)

Any input on proper fix is highly appreciated.
---
 src/intel/vulkan/anv_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index eb40e2d..98eda2b 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -416,6 +416,9 @@ void
 anv_device_get_cache_uuid(void *uuid)
 {
memset(uuid, 0, VK_UUID_SIZE);
+#ifndef MESA_GIT_SHA1
+#define MESA_GIT_SHA1 "git-unknown"
+#endif
snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
 }
 
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/18] anv: automake: bring back VULKAN_ENTRYPOINT_CPPFLAGS

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

We should not have removed them in the first place. There's a subtle
difference between generating the complete sources and using them which
was not obvious as we nuked them.

Without this, the release tarball ends up without various hunks of the
generated sources, thus things fail at a later stage as we attempt to
build them.

Signed-off-by: Emil Velikov 
---
 src/intel/vulkan/Makefile.am | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 0df2480..3411709 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -124,12 +124,16 @@ VULKAN_LIB_DEPS += \
 nodist_EXTRA_libvulkan_intel_la_SOURCES = dummy.cpp
 libvulkan_intel_la_SOURCES = $(VULKAN_GEM_FILES)
 
+VULKAN_ENTRYPOINT_CPPFLAGS = \
+   -DVK_USE_PLATFORM_XCB_KHR \
+   -DVK_USE_PLATFORM_WAYLAND_KHR
+
 anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
-   $(AM_V_GEN)$(CPP) $(AM_CPPFLAGS) 
$(top_srcdir)/include/vulkan/vulkan_intel.h |\
+   $(AM_V_GEN)$(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) 
$(top_srcdir)/include/vulkan/vulkan_intel.h |\
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py header > $@
 
 anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
-   $(AM_V_GEN)$(CPP) $(AM_CPPFLAGS) 
$(top_srcdir)/include/vulkan/vulkan_intel.h |\
+   $(AM_V_GEN)$(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) 
$(top_srcdir)/include/vulkan/vulkan_intel.h |\
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
 
 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/18] swr: automake: attempt to fix the out-of-tree build

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Make sure that the output folder is created otherwise the python scripts
yells at us.

Cc: 0xe2.0x9a.0...@gmail.com
Cc: Tim Rowley 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96238
Signed-off-by: Emil Velikov 
---

This fixes the problem here, although the build barfs as below.

Tim, how are you building swr ? Is the following something on my end ?

/usr/lib/gcc/x86_64-pc-linux-gnu/6.1.1/include/popcntintrin.h: In function 
‘void* swr_create_vs_state(pipe_context*, const pipe_shader_state*)’:
/usr/lib/gcc/x86_64-pc-linux-gnu/6.1.1/include/popcntintrin.h:35:1: error: 
inlining failed in call to always_inline ‘int _mm_popcnt_u32(unsigned int)’: 
target specific option mismatch
 _mm_popcnt_u32 (unsigned int __X)
 ^~
../../../../../src/gallium/drivers/swr/swr_state.cpp:344:28: note: called from 
here
  _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
  ~~^~~~
---
 src/gallium/drivers/swr/Makefile.am | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 291b5b2..90dd040 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -56,6 +56,7 @@ BUILT_SOURCES = \
rasterizer/jitter/builder_x86.h \
rasterizer/jitter/builder_x86.cpp
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
 swr_context_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py swr_context.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
@@ -63,17 +64,20 @@ swr_context_llvm.h: 
rasterizer/jitter/scripts/gen_llvm_types.py swr_context.h
--output swr_context_llvm.h
 
 rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: 
rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py 
rasterizer/scripts/templates/knobs.template
+   $(MKDIR_GEN)
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/scripts/gen_knobs.py \
rasterizer/scripts
 
 rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py 
rasterizer/core/state.h
+   $(MKDIR_GEN)
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
--input $(srcdir)/rasterizer/core/state.h \
--output rasterizer/jitter/state_llvm.h
 
 rasterizer/jitter/builder_gen.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+   $(MKDIR_GEN)
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
@@ -81,6 +85,7 @@ rasterizer/jitter/builder_gen.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
--gen_h
 
 rasterizer/jitter/builder_gen.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+   $(MKDIR_GEN)
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
@@ -88,12 +93,14 @@ rasterizer/jitter/builder_gen.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.
--gen_cpp
 
 rasterizer/jitter/builder_x86.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+   $(MKDIR_GEN)
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.h \
--gen_x86_h
 
 rasterizer/jitter/builder_x86.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+   $(MKDIR_GEN)
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.cpp \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/18] isl: add isl_priv.h to the sources list

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Otherwise it will be missing from the release tarball.

Signed-off-by: Emil Velikov 
---
 src/intel/isl/Makefile.sources | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/isl/Makefile.sources b/src/intel/isl/Makefile.sources
index e06568b..fe6a00f 100644
--- a/src/intel/isl/Makefile.sources
+++ b/src/intel/isl/Makefile.sources
@@ -7,6 +7,7 @@ ISL_FILES = \
isl_gen4.h \
isl_gen6.c \
isl_gen6.h \
+   isl_priv.h \
isl_storage_image.c
 
 ISL_GEN7_FILES = \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/18] anv: automake: don't forget to cleanup dev_icd.json

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Otherwise `make distcheck' will barf at us as the file is dangling.

Ideally this should be part of the clean-local hook, although we include
install-lib-links.mk which already has one.

Signed-off-by: Emil Velikov 
---
I'm leaning that people prefer the install... symlink script, so I've
not nuked it. Let me know if you have no use of it.
---
 src/intel/vulkan/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 3411709..d2ea288 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -137,7 +137,7 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
$(vulkan_include_HEADERS)
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
 
 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-CLEANFILES = $(BUILT_SOURCES)
+CLEANFILES = $(BUILT_SOURCES) dev_icd.json
 EXTRA_DIST = \
$(top_srcdir)/include/vulkan/vk_icd.h \
anv_entrypoints_gen.py \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/18] swr: remove LLVM dependency from source generation rules.

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

The dependencies should not mention any files external to the project.
If we want to do sanity checks for the LLVM installed on the system we
should do that in configure, yet again where is the merit which header
gets checked and which doesn't ?

Cc: Tim Rowley 
Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/swr/Makefile.am | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index b4317f6..291b5b2 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -73,14 +73,14 @@ rasterizer/jitter/state_llvm.h: 
rasterizer/jitter/scripts/gen_llvm_types.py rast
--input $(srcdir)/rasterizer/core/state.h \
--output rasterizer/jitter/state_llvm.h
 
-rasterizer/jitter/builder_gen.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py 
$(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+rasterizer/jitter/builder_gen.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.h \
--gen_h
 
-rasterizer/jitter/builder_gen.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py 
$(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+rasterizer/jitter/builder_gen.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/18] freedreno: make sure we pick up ir3_nir_trig.py in the release tarball

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/freedreno/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/freedreno/Makefile.am 
b/src/gallium/drivers/freedreno/Makefile.am
index 1af8dec..148dd0e 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -25,6 +25,7 @@ libfreedreno_la_SOURCES = \
 
 BUILT_SOURCES := $(ir3_GENERATED_FILES)
 CLEANFILES := $(BUILT_SOURCES)
+EXTRA_DIST = ir3/ir3_nir_trig.py
 
 noinst_PROGRAMS = ir3_compiler
 
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/18] isl: automake: flatten the tests rules

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Fold the unneeded extra variable tests_ldadd, the explicit sources
section (single file with the default extension) and flip the
check_PROGRAMS <> TESTS order (TESTS includes scripts, while
check_PROGRAMS is binaries only).

Signed-off-by: Emil Velikov 
---
 src/intel/isl/Makefile.am | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
index 51e2ae1..57313ba 100644
--- a/src/intel/isl/Makefile.am
+++ b/src/intel/isl/Makefile.am
@@ -100,20 +100,15 @@ isl_format_layout.c: isl_format_layout_gen.bash \
 #  Tests
 # 
 
-TESTS = tests/isl_surf_get_image_offset_test
+check_PROGRAMS = tests/isl_surf_get_image_offset_test
 
-check_PROGRAMS = $(TESTS)
+TESTS = $(check_PROGRAMS)
 
-# Link tests to lib965_compiler.la for brw_get_device_info().
-tests_ldadd =  \
-   libisl.la   \
+tests_isl_surf_get_image_offset_test_LDADD = \
+   libisl.la \
$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
-lm
 
-tests_isl_surf_get_image_offset_test_SOURCES = \
-   tests/isl_surf_get_image_offset_test.c
-tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd)
-
 # 
 
 EXTRA_DIST = \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/18] isl: automake: list builddir before srcdir in the includes list

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

As seen elsewhere - we want to include the freshly built sources as
opposed the the (likely) stale ones in the srcdir.

Signed-off-by: Emil Velikov 
---
 src/intel/isl/Makefile.am | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
index 57313ba..46b2683 100644
--- a/src/intel/isl/Makefile.am
+++ b/src/intel/isl/Makefile.am
@@ -34,16 +34,15 @@ AM_CPPFLAGS = \
$(VALGRIND_CFLAGS) \
$(DEFINES) \
-I$(top_srcdir)/include \
+   -I$(top_builddir)/src \
-I$(top_srcdir)/src \
+   -I$(top_builddir)/src/intel \
-I$(top_srcdir)/src/intel \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
-   -I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/i965 \
-I$(top_srcdir)/src/gallium/auxiliary \
-   -I$(top_srcdir)/src/gallium/include \
-   -I$(top_builddir)/src \
-   -I$(top_builddir)/src/intel
+   -I$(top_srcdir)/src/gallium/include
 
 libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
 
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/18] isl: automake: remove unneeded install-lib-links.mk include

2016-05-27 Thread Emil Velikov
One uses the makefile to create compatibility symlinks (to
$top_builddir/libs) for shared libraries/modules. As we don't create any
here, there's no need to include the file.

Signed-off-by: Emil Velikov 
---
 src/intel/isl/Makefile.am | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
index 3542386..51e2ae1 100644
--- a/src/intel/isl/Makefile.am
+++ b/src/intel/isl/Makefile.am
@@ -116,8 +116,6 @@ tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd)
 
 # 
 
-include $(top_srcdir)/install-lib-links.mk
-
 EXTRA_DIST = \
isl_format_layout_gen.bash \
isl_format_layout.csv \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/18] isl: move the sources lists to Makefile.sources

2016-05-27 Thread Emil Velikov
From: Mauro Rossi 

[Emil Velikov: use the file in the autoconf build]
Signed-off-by: Emil Velikov 
---
 src/intel/isl/Makefile.am  | 40 
 src/intel/isl/Makefile.sources | 31 +++
 2 files changed, 39 insertions(+), 32 deletions(-)
 create mode 100644 src/intel/isl/Makefile.sources

diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
index 46b2683..4922b1f 100644
--- a/src/intel/isl/Makefile.am
+++ b/src/intel/isl/Makefile.am
@@ -19,6 +19,8 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+include Makefile.sources
+
 ISL_GEN_LIBS =   \
libisl-gen7.la   \
libisl-gen75.la  \
@@ -48,47 +50,21 @@ libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
 
 libisl_la_LIBADD = $(ISL_GEN_LIBS)
 
-libisl_la_SOURCES = \
-   isl.c   \
-   isl.h   \
-   isl_format.c\
-   isl_format_layout.c \
-   isl_gen4.c  \
-   isl_gen4.h  \
-   isl_gen6.c  \
-   isl_gen6.h  \
-   isl_priv.h  \
-   isl_storage_image.c \
-   $(NULL)
+libisl_la_SOURCES = $(ISL_FILES)
 
-libisl_gen7_la_SOURCES =\
-   isl_gen7.c  \
-   isl_gen7.h  \
-isl_surface_state.c \
-   $(NULL)
+libisl_gen7_la_SOURCES = $(ISL_GEN7_FILES)
 libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
 
-libisl_gen75_la_SOURCES =   \
-isl_surface_state.c \
-   $(NULL)
+libisl_gen75_la_SOURCES = $(ISL_GEN75_FILES)
 libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75
 
-libisl_gen8_la_SOURCES =\
-   isl_gen8.c  \
-   isl_gen8.h  \
-isl_surface_state.c \
-   $(NULL)
+libisl_gen8_la_SOURCES = $(ISL_GEN8_FILES)
 libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80
 
-libisl_gen9_la_SOURCES =\
-   isl_gen9.c  \
-   isl_gen9.h  \
-isl_surface_state.c \
-   $(NULL)
+libisl_gen9_la_SOURCES = $(ISL_GEN9_FILES)
 libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90
 
-BUILT_SOURCES = \
-   isl_format_layout.c
+BUILT_SOURCES = $(ISL_GENERATED_FILES)
 
 isl_format_layout.c: isl_format_layout_gen.bash \
  isl_format_layout.csv
diff --git a/src/intel/isl/Makefile.sources b/src/intel/isl/Makefile.sources
new file mode 100644
index 000..e06568b
--- /dev/null
+++ b/src/intel/isl/Makefile.sources
@@ -0,0 +1,31 @@
+ISL_FILES = \
+   isl.c \
+   isl.h \
+   isl_format.c \
+   isl_format_layout.c \
+   isl_gen4.c \
+   isl_gen4.h \
+   isl_gen6.c \
+   isl_gen6.h \
+   isl_storage_image.c
+
+ISL_GEN7_FILES = \
+   isl_gen7.c \
+   isl_gen7.h \
+   isl_surface_state.c
+
+ISL_GEN75_FILES = \
+   isl_surface_state.c
+
+ISL_GEN8_FILES = \
+   isl_gen8.c \
+   isl_gen8.h \
+   isl_surface_state.c
+
+ISL_GEN9_FILES = \
+   isl_gen9.c \
+   isl_gen9.h \
+   isl_surface_state.c
+
+ISL_GENERATED_FILES = \
+   isl_format_layout.c
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/18] mesa_glinterop: remove mesa_glinterop typedefs

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

As is there are two places that do the typedefs - dri_interface.h and
this header. As we cannot include the former in here, just drop the
typedefs and use the struct directly (as needed).

This is required because typedef redefinition is C11 feature which is
not supported on all the versions of GCC used to build mesa.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96236
Cc: Vinson Lee 
Cc: Marek Olšák 
Signed-off-by: Emil Velikov 
---
Vison, can you please test this ?
---
 include/GL/mesa_glinterop.h | 36 ++--
 src/egl/main/eglapi.c   |  6 +++---
 src/glx/glxcmds.c   |  6 +++---
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
index b805d63..d44e385 100644
--- a/include/GL/mesa_glinterop.h
+++ b/include/GL/mesa_glinterop.h
@@ -92,7 +92,7 @@ enum {
 /**
  * Device information returned by Mesa.
  */
-typedef struct _mesa_glinterop_device_info {
+struct _mesa_glinterop_device_info {
/* The caller should set this to the version of the struct they support */
/* The callee will overwrite it if it supports a lower version.
 *
@@ -113,14 +113,14 @@ typedef struct _mesa_glinterop_device_info {
uint32_t device_id;
 
/* Structure version 1 ends here. */
-} mesa_glinterop_device_info;
+};
 
 #define MESA_GLINTEROP_EXPORT_IN_VERSION 1
 
 /**
  * Input parameters to Mesa interop export functions.
  */
-typedef struct _mesa_glinterop_export_in {
+struct _mesa_glinterop_export_in {
/* The caller should set this to the version of the struct they support */
/* The callee will overwrite it if it supports a lower version.
 *
@@ -178,14 +178,14 @@ typedef struct _mesa_glinterop_export_in {
 */
void *out_driver_data;
/* Structure version 1 ends here. */
-} mesa_glinterop_export_in;
+};
 
 #define MESA_GLINTEROP_EXPORT_OUT_VERSION 1
 
 /**
  * Outputs of Mesa interop export functions.
  */
-typedef struct _mesa_glinterop_export_out {
+struct _mesa_glinterop_export_out {
/* The caller should set this to the version of the struct they support */
/* The callee will overwrite it if it supports a lower version.
 *
@@ -233,7 +233,7 @@ typedef struct _mesa_glinterop_export_out {
/* The number of bytes written to out_driver_data. */
uint32_t out_driver_data_written;
/* Structure version 1 ends here. */
-} mesa_glinterop_export_out;
+};
 
 
 /**
@@ -247,7 +247,7 @@ typedef struct _mesa_glinterop_export_out {
  */
 int
 MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
-mesa_glinterop_device_info *out);
+struct _mesa_glinterop_device_info *out);
 
 
 /**
@@ -256,7 +256,7 @@ MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext 
context,
  */
 int
 MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
-mesa_glinterop_device_info *out);
+struct _mesa_glinterop_device_info *out);
 
 
 /**
@@ -272,8 +272,8 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext 
context,
  */
 int
 MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
- mesa_glinterop_export_in *in,
- mesa_glinterop_export_out *out);
+ struct _mesa_glinterop_export_in *in,
+ struct _mesa_glinterop_export_out *out);
 
 
 /**
@@ -282,20 +282,20 @@ MesaGLInteropGLXExportObject(Display *dpy, GLXContext 
context,
  */
 int
 MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
- mesa_glinterop_export_in *in,
- mesa_glinterop_export_out *out);
+ struct _mesa_glinterop_export_in *in,
+ struct _mesa_glinterop_export_out *out);
 
 
 typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, GLXContext 
context,
- 
mesa_glinterop_device_info *out);
+ struct 
_mesa_glinterop_device_info *out);
 typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, 
EGLContext context,
- 
mesa_glinterop_device_info *out);
+ struct 
_mesa_glinterop_device_info *out);
 typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext 
context,
-  mesa_glinterop_export_in *in,
-  mesa_glinterop_export_out 
*out);
+  struct 
_mesa_glinterop_export_in *in,
+  struct 
_mesa_glinterop_export_out *out);
 typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGL

[Mesa-dev] [PATCH 15/18] swr: add all the generators to the release tarball.

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Namely the python scripts and the knobs.template.

Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/swr/Makefile.am | 24 
 1 file changed, 24 insertions(+)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 72d0ac4..b4317f6 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -144,3 +144,27 @@ libswrAVX2_la_LDFLAGS = \
$(COMMON_LDFLAGS)
 
 include $(top_srcdir)/install-gallium-links.mk
+
+EXTRA_DIST = \
+   rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+   rasterizer/jitter/scripts/gen_llvm_types.py \
+   rasterizer/scripts/gen_knobs.py \
+   rasterizer/scripts/knob_defs.py \
+   rasterizer/scripts/mako/ast.py \
+   rasterizer/scripts/mako/_ast_util.py \
+   rasterizer/scripts/mako/cache.py \
+   rasterizer/scripts/mako/cmd.py \
+   rasterizer/scripts/mako/codegen.py \
+   rasterizer/scripts/mako/compat.py \
+   rasterizer/scripts/mako/exceptions.py \
+   rasterizer/scripts/mako/filters.py \
+   rasterizer/scripts/mako/__init__.py \
+   rasterizer/scripts/mako/lexer.py \
+   rasterizer/scripts/mako/lookup.py \
+   rasterizer/scripts/mako/parsetree.py \
+   rasterizer/scripts/mako/pygen.py \
+   rasterizer/scripts/mako/pyparser.py \
+   rasterizer/scripts/mako/runtime.py \
+   rasterizer/scripts/mako/template.py \
+   rasterizer/scripts/mako/util.py \
+   rasterizer/scripts/templates/knobs.template
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/18] anv: automake: ship the json files in the release tarball

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Signed-off-by: Emil Velikov 
---
 src/intel/vulkan/Makefile.am | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 662d720..0df2480 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -136,7 +136,9 @@ BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)
 EXTRA_DIST = \
$(top_srcdir)/include/vulkan/vk_icd.h \
-   anv_entrypoints_gen.py
+   anv_entrypoints_gen.py \
+   dev_icd.json.in \
+   intel_icd.json
 
 libvulkan_intel_la_LIBADD = $(VULKAN_LIB_DEPS)
 
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/18] swr: automake: silence the python invocation

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Cc: Tim Rowley 
Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/swr/Makefile.am | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 90dd040..d211f2e 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -57,28 +57,29 @@ BUILT_SOURCES = \
rasterizer/jitter/builder_x86.cpp
 
 MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
 swr_context_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py swr_context.h
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
--input $(srcdir)/swr_context.h \
--output swr_context_llvm.h
 
 rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: 
rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py 
rasterizer/scripts/templates/knobs.template
$(MKDIR_GEN)
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/scripts/gen_knobs.py \
rasterizer/scripts
 
 rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py 
rasterizer/core/state.h
$(MKDIR_GEN)
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
--input $(srcdir)/rasterizer/core/state.h \
--output rasterizer/jitter/state_llvm.h
 
 rasterizer/jitter/builder_gen.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(MKDIR_GEN)
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.h \
@@ -86,7 +87,7 @@ rasterizer/jitter/builder_gen.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
 
 rasterizer/jitter/builder_gen.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(MKDIR_GEN)
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.cpp \
@@ -94,14 +95,14 @@ rasterizer/jitter/builder_gen.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.
 
 rasterizer/jitter/builder_x86.h: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(MKDIR_GEN)
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.h \
--gen_x86_h
 
 rasterizer/jitter/builder_x86.cpp: 
rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(MKDIR_GEN)
-   $(PYTHON2) $(PYTHON_FLAGS) \
+   $(PYTHON_GEN) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.cpp \
--gen_x86_cpp
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/uvd: fix the H264 level for Tonga

2016-05-27 Thread Christian König

Am 27.05.2016 um 15:16 schrieb Emil Velikov:

On 27 May 2016 at 11:28, Christian König  wrote:

Am 26.05.2016 um 11:27 schrieb Andy Furniss:

Alex Deucher wrote:

On Wed, May 25, 2016 at 10:57 AM, Christian König
 wrote:

From: Christian König 

We support 5.1 for a while now.


Resend as the last one didn't have the CCs.

I know (well think) vdpau doesn't really mention 5.2 anywhere, but for
ffmpeg I've been making this change for some time to say 5.2.

Tonga can easily do 5.2, players don't seem to look at this field, but
ffmpeg cli now does and will refuse to use uvd for 5.2 vids.


5.2 requires the hardware to handle more than twice as much macroblocks per
second than 5.1. So the decoder needs to handle 4k at 66fps.

I'm not sure about the absolute numbers, but I think that could be to much
even for a Tonga.


In the past ffmpeg cli also didn't look at this, but they merged
something in from libav which changed things.

I have a trac open, but the dev who replied said fix the driver - he
didn't reply further when I said I didn't think vdpau went as high as
5.2 ...


VDPAU actually doesn't have an enumeration for the level, so you can even
return something like 9.9 without a problem.


One of us is getting confused here..

Are you saying that VDPAU users have no way of quering the said
numbers ?


No, what I'm saying is that it is a number and not an enum.

This way you don't need to change the specification when you want to 
support a new level.


Christian.


  The way I see it it's the complete opposite.
It is the only API on which the user that _can_ query such info. In
mesa/gallium we have PIPE_VIDEO_CAP_MAX_LEVEL explicitly for VDPAU ;-)

The odd things is that VLC uses/used to? check that information before
feeding the video to the decoder, while others implementations (like
the original one in mplayer done by the Nvidia devs) do/did? not
bother.

Just clarifying some facts, there's nothing wrong with the patch obviously.

-Emil


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/18] softpipe: add sp_buffer.h to the sources list (release tarball)

2016-05-27 Thread Emil Velikov
From: Emil Velikov 

Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/softpipe/Makefile.sources | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/softpipe/Makefile.sources 
b/src/gallium/drivers/softpipe/Makefile.sources
index d72266f..e405ef2 100644
--- a/src/gallium/drivers/softpipe/Makefile.sources
+++ b/src/gallium/drivers/softpipe/Makefile.sources
@@ -1,5 +1,6 @@
 C_SOURCES := \
sp_buffer.c \
+   sp_buffer.h \
sp_clear.c \
sp_clear.h \
sp_context.c \
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/uvd: fix the H264 level for Tonga

2016-05-27 Thread Emil Velikov
On 27 May 2016 at 15:40, Christian König  wrote:

> No, what I'm saying is that it is a number and not an enum.
>
> This way you don't need to change the specification when you want to support
> a new level.
>
That's the case indeed. Thanks for explaining.

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] clover: Compute aligned size for scalar argument

2016-05-27 Thread Vedran Miletić
Scalar arguments are aligned to the next power of two per OpenCL 1.2
specification, Ch. 6.1.5: "A built-in data type that is not a power of
two bytes in size must be aligned to the next larger power of two." We
already compute size aligned to the next power of two in
get_kernel_args() and store it in scalar_argument object. This patch
adds same computation in kernel::scalar_argument::set().

Since the code might now silently fail where it previously failed with
CL_INVALID_ARG_SIZE, this patch also adds assert checking that Clang
compiled a struct argument into LLVM IR that we can handle.
---
 src/gallium/state_trackers/clover/core/kernel.cpp | 7 +--
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 4 
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp 
b/src/gallium/state_trackers/clover/core/kernel.cpp
index 9231462..b2dc7f9 100644
--- a/src/gallium/state_trackers/clover/core/kernel.cpp
+++ b/src/gallium/state_trackers/clover/core/kernel.cpp
@@ -376,10 +376,13 @@ kernel::scalar_argument::set(size_t size, const void 
*value) {
if (!value)
   throw error(CL_INVALID_ARG_VALUE);
 
-   if (size != this->size)
+   // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
+   // type that is not a power of two bytes in size must be
+   // aligned to the next larger power of two."
+   if (util_next_power_of_two(size) != this->size)
   throw error(CL_INVALID_ARG_SIZE);
 
-   v = { (uint8_t *)value, (uint8_t *)value + size };
+   v = { (uint8_t *)value, (uint8_t *)value + util_next_power_of_two(size) };
_set = true;
 }
 
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index e2cadda..b4b8e74 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -463,6 +463,10 @@ namespace {
  llvm::Type *arg_type = arg.getType();
  const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
 
+ assert(!(arg_type->isPointerTy() && arg.hasByValAttr() &&
+arg_type->getPointerElementType()->isStructTy()) &&
+"Unable to handle struct compiled as pointer with byval.");
+
  // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
  // type that is not a power of two bytes in size must be
  // aligned to the next larger power of two".  We need this
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/18] mesa_glinterop: remove mesa_glinterop typedefs

2016-05-27 Thread Marek Olšák
You don't need to keep the underscore at the beginning of the type
names anymore.

Marek

On Fri, May 27, 2016 at 4:35 PM, Emil Velikov  wrote:
> From: Emil Velikov 
>
> As is there are two places that do the typedefs - dri_interface.h and
> this header. As we cannot include the former in here, just drop the
> typedefs and use the struct directly (as needed).
>
> This is required because typedef redefinition is C11 feature which is
> not supported on all the versions of GCC used to build mesa.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96236
> Cc: Vinson Lee 
> Cc: Marek Olšák 
> Signed-off-by: Emil Velikov 
> ---
> Vison, can you please test this ?
> ---
>  include/GL/mesa_glinterop.h | 36 ++--
>  src/egl/main/eglapi.c   |  6 +++---
>  src/glx/glxcmds.c   |  6 +++---
>  3 files changed, 24 insertions(+), 24 deletions(-)
>
> diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
> index b805d63..d44e385 100644
> --- a/include/GL/mesa_glinterop.h
> +++ b/include/GL/mesa_glinterop.h
> @@ -92,7 +92,7 @@ enum {
>  /**
>   * Device information returned by Mesa.
>   */
> -typedef struct _mesa_glinterop_device_info {
> +struct _mesa_glinterop_device_info {
> /* The caller should set this to the version of the struct they support */
> /* The callee will overwrite it if it supports a lower version.
>  *
> @@ -113,14 +113,14 @@ typedef struct _mesa_glinterop_device_info {
> uint32_t device_id;
>
> /* Structure version 1 ends here. */
> -} mesa_glinterop_device_info;
> +};
>
>  #define MESA_GLINTEROP_EXPORT_IN_VERSION 1
>
>  /**
>   * Input parameters to Mesa interop export functions.
>   */
> -typedef struct _mesa_glinterop_export_in {
> +struct _mesa_glinterop_export_in {
> /* The caller should set this to the version of the struct they support */
> /* The callee will overwrite it if it supports a lower version.
>  *
> @@ -178,14 +178,14 @@ typedef struct _mesa_glinterop_export_in {
>  */
> void *out_driver_data;
> /* Structure version 1 ends here. */
> -} mesa_glinterop_export_in;
> +};
>
>  #define MESA_GLINTEROP_EXPORT_OUT_VERSION 1
>
>  /**
>   * Outputs of Mesa interop export functions.
>   */
> -typedef struct _mesa_glinterop_export_out {
> +struct _mesa_glinterop_export_out {
> /* The caller should set this to the version of the struct they support */
> /* The callee will overwrite it if it supports a lower version.
>  *
> @@ -233,7 +233,7 @@ typedef struct _mesa_glinterop_export_out {
> /* The number of bytes written to out_driver_data. */
> uint32_t out_driver_data_written;
> /* Structure version 1 ends here. */
> -} mesa_glinterop_export_out;
> +};
>
>
>  /**
> @@ -247,7 +247,7 @@ typedef struct _mesa_glinterop_export_out {
>   */
>  int
>  MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
> -mesa_glinterop_device_info *out);
> +struct _mesa_glinterop_device_info *out);
>
>
>  /**
> @@ -256,7 +256,7 @@ MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext 
> context,
>   */
>  int
>  MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
> -mesa_glinterop_device_info *out);
> +struct _mesa_glinterop_device_info *out);
>
>
>  /**
> @@ -272,8 +272,8 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, 
> EGLContext context,
>   */
>  int
>  MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
> - mesa_glinterop_export_in *in,
> - mesa_glinterop_export_out *out);
> + struct _mesa_glinterop_export_in *in,
> + struct _mesa_glinterop_export_out *out);
>
>
>  /**
> @@ -282,20 +282,20 @@ MesaGLInteropGLXExportObject(Display *dpy, GLXContext 
> context,
>   */
>  int
>  MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
> - mesa_glinterop_export_in *in,
> - mesa_glinterop_export_out *out);
> + struct _mesa_glinterop_export_in *in,
> + struct _mesa_glinterop_export_out *out);
>
>
>  typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, 
> GLXContext context,
> - 
> mesa_glinterop_device_info *out);
> + struct 
> _mesa_glinterop_device_info *out);
>  typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, 
> EGLContext context,
> - 
> mesa_glinterop_device_info *out);
> + struct 
> _mesa_glinterop_device_info *out);
>  typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext 
> context,
> -

Re: [Mesa-dev] [PATCH] gallium/ddebug: Add passthrough for query_memory_info.

2016-05-27 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, May 27, 2016 at 1:58 PM, Bas Nieuwenhuizen
 wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/gallium/drivers/ddebug/dd_screen.c | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
> b/src/gallium/drivers/ddebug/dd_screen.c
> index ebe090b..5a883bd 100644
> --- a/src/gallium/drivers/ddebug/dd_screen.c
> +++ b/src/gallium/drivers/ddebug/dd_screen.c
> @@ -101,6 +101,14 @@ dd_screen_get_timestamp(struct pipe_screen *_screen)
> return screen->get_timestamp(screen);
>  }
>
> +static void dd_screen_query_memory_info(struct pipe_screen *_screen,
> +struct pipe_memory_info *info)
> +{
> +   struct pipe_screen *screen = dd_screen(_screen)->screen;
> +
> +   return screen->query_memory_info(screen, info);
> +}
> +
>  static struct pipe_context *
>  dd_screen_context_create(struct pipe_screen *_screen, void *priv,
>   unsigned flags)
> @@ -332,6 +340,7 @@ ddebug_screen_create(struct pipe_screen *screen)
> dscreen->base.get_paramf = dd_screen_get_paramf;
> dscreen->base.get_compute_param = dd_screen_get_compute_param;
> dscreen->base.get_shader_param = dd_screen_get_shader_param;
> +   dscreen->base.query_memory_info = dd_screen_query_memory_info;
> /* get_video_param */
> /* get_compute_param */
> SCR_INIT(get_timestamp);
> --
> 2.8.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/18] mesa_glinterop: remove mesa_glinterop typedefs

2016-05-27 Thread Emil Velikov
On 27 May 2016 at 16:03, Marek Olšák  wrote:
> You don't need to keep the underscore at the beginning of the type
> names anymore.
>
If I drop that one things will clash with the typedef in
dri_interface.h. And obviously we don't want to include
mesa_glinterop.h from dri_interface.h as that will require us to
install the header (not to mention the implicit include in hundreds of
more compilation/build units).

Perhaps there's something subtle that I'm missing ?

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] configure.ac: Add support for Android builds

2016-05-27 Thread Emil Velikov
Hello gents,

On 27 May 2016 at 12:33, Tomasz Figa  wrote:
> Hi,
>
> On Fri, May 27, 2016 at 7:36 PM, Nicolas Boichat  
> wrote:
>> Hi Emil,
>>
>> Took us some time to clean things up, but we got an ebuild and repo to
>> share with you.
>>
>> On Tue, May 24, 2016 at 10:52 PM, Emil Velikov  
>> wrote:
>> [snip]
 We also set PKGCONFIG="false", because, well, we do not have .pc files
 for Android libs. We _could_ create them manually, though,
>>> Arr... it seems like there's more 'hacks' then expected. I would
>>> kindly urge that if you're using the autoconf build to use .pc files,
>>> please ?
>>>
>>> There's not need to manually create any of them - just throw the
>>> template and wire it up in the build system.
>>
>> Not quite sure how that'd work out, I guess I'll see ,-)
>>
>
> I'd also vote for using .pc files for the non-Android dependencies,
> but I'm not sure how we could use them for Android ones. Do you mean
> hacking up something like android_egl.pc that would include all the
> necessary Android libraries for the EGL platform module?
>
Ideally there'll be a separate .pc for each required library/module.

> One more thing to note is that this series is an attempt to make it
> possible to build Mesa for Android externally, without the need to
> maintain a whole alternative set of makefiles or other redundant
> entities. So I think we shouldn't go to extremes and now the users of
> this create yet another type of such objects for Android side of
 > things (.pc files), if it could just stay there in the source tree,
> unlikely to require changes in any near future.
>
/me scratches head and attempts to parse two consecutive 4 line sentences.
Can you please rephrase the above ?

> One alternative for the two mentioned solutions would be just letting
> the user specify the list of Android libraries to include using
> environment variables, without involving pkgconfig into this at all.
> This way we would neither have to create .pc files for Android
> libraries nor hardcode specific library names into Makefile.am.
>
That's an option indeed. Sadly the the easiest way to butcher
things... as in one will get things right eventually, but the time
spent debugging/asking around will be far greater than hacking 2-3 .pc
files.

 but I'm not
 100% convinced it's any better than specifying them in the mesa ebuild
 (knowing that mesa is the only package we build this way, the
 dependencies are prebuilts that we pull from Android builders).

>>> Is adding such workarounds encouranged/wide spread in the ebuild ?
>>> Last time I've looked at the Gentoo ones, there weren't many such
>>> cases.
>>
>> No, it's not usual, at all. The issue here is that we have a chroot
>> that is meant to be a "normal" Linux (that is, Chromium OS), for which
>> we build all the libraries. But, in the same chroot, we also switch to
>> a different toolchain and vastly different system (Android), using
>> prebuilt libraries, to build the second copy of mesa. I guess we are
>> bound to have a number of hacks...
>
> Well, yeah, this is a bit of a special case. I'm planning to improve
> the state of things a bit, though, because we're not yet using all the
> portage functionality that could be useful for this.
>
Indeed. Having a look at the (well placed) FIXME's in the ebuild, I
fully agree with all of them.

>>
 So we replace them with LIBXYZ_[CFLAGS/LIBS], and configure is happy with 
 that.

 One thing that I wonder about is how we could specify
 libEGL_la_LIBADD += -lhardware -lcutils -lsync
 without hardcoding it in the Makefile.am.

 Any idea how we could do that? Or do you think it's ok to hardcode the 
 libs?

>>> The proposed solution will handle these. If you guys feel that it's
>>> too much/annoying to deal with, show me a repo and I'll send you the
>>> patches ;-) Please ?
>>
>> Alright, so the ebuild is here:
>> https://chromium-review.googlesource.com/#/c/347700/ (if you have a
>> Chromium OS chroot, it should just work).
>>
>> And the patches are here:
>> https://chromium.googlesource.com/chromiumos/third_party/mesa/+log/arc-11.3.0-pre1
>>
>> They are still based on a slightly older version of mesa. Tomasz is
>> working on rebasing to the latest mesa master (it looks like someone
>> implemented similar changes to ours to add support for PRIME FD).
>
> Yeah, we've been building things up on top of the DRI loader
> extension, but now there is equivalent functionality upstream with DRI
> image loader, however somehow it doesn't work for us yet, I'm
> debugging it right now.
>
Some things that come to mind:
 - do you have the render nodes created ?
 - does drmGetNodeTypeFromFd() return the correct value ?
 - does the assumption in get_native_buffer_fd() hold true on your
platform (gralloc implementation) ?


Regards,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo

Re: [Mesa-dev] [PATCH 14/18] mesa_glinterop: remove mesa_glinterop typedefs

2016-05-27 Thread Marek Olšák
On Fri, May 27, 2016 at 5:20 PM, Emil Velikov  wrote:
> On 27 May 2016 at 16:03, Marek Olšák  wrote:
>> You don't need to keep the underscore at the beginning of the type
>> names anymore.
>>
> If I drop that one things will clash with the typedef in
> dri_interface.h. And obviously we don't want to include
> mesa_glinterop.h from dri_interface.h as that will require us to
> install the header (not to mention the implicit include in hundreds of
> more compilation/build units).
>
> Perhaps there's something subtle that I'm missing ?

dri_interface.h can be changed from:

typedef struct _mesa_glinterop_device_info mesa_glinterop_device_info;
typedef struct _mesa_glinterop_export_in mesa_glinterop_export_in;
typedef struct _mesa_glinterop_export_out mesa_glinterop_export_out;

to:

struct mesa_glinterop_device_info;
struct mesa_glinterop_export_in;
struct mesa_glinterop_export_out;

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] platform_android: prevent deadlock in droid_swap_buffers

2016-05-27 Thread Emil Velikov
Hi Haixia Shi,

On 26 May 2016 at 00:02, Haixia Shi  wrote:

> @@ -434,8 +434,15 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, 
> _EGLSurface *draw)
>
> dri2_flush_drawable_for_swapbuffers(disp, draw);
>
> -   if (dri2_surf->buffer)
> +   if (dri2_surf->buffer) {
> +  /* To avoid blocking other EGL calls, release the display mutex before
> +   * we enter droid_window_enqueue_buffer() and re-acquire the mutex upon
> +   * return.
> +   */
> +  mtx_unlock(&disp->Mutex);
>droid_window_enqueue_buffer(dri2_surf);
> +  mtx_lock(&disp->Mutex);
> +   }
>
Shouldn't this be moved to droid_window_enqueue_buffer() ? This way
destroy_surface will also benefit from the non-blocking behaviour.

Related: if the Android API has a method to cancel/discard the actual
buffer we can use that instead for
destroy_surface/droid_window_cancel_buffer.

Regards,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/18] mesa_glinterop: remove mesa_glinterop typedefs

2016-05-27 Thread Emil Velikov
On 27 May 2016 at 16:49, Marek Olšák  wrote:
> On Fri, May 27, 2016 at 5:20 PM, Emil Velikov  
> wrote:
>> On 27 May 2016 at 16:03, Marek Olšák  wrote:
>>> You don't need to keep the underscore at the beginning of the type
>>> names anymore.
>>>
>> If I drop that one things will clash with the typedef in
>> dri_interface.h. And obviously we don't want to include
>> mesa_glinterop.h from dri_interface.h as that will require us to
>> install the header (not to mention the implicit include in hundreds of
>> more compilation/build units).
>>
>> Perhaps there's something subtle that I'm missing ?
>
> dri_interface.h can be changed from:
>
> typedef struct _mesa_glinterop_device_info mesa_glinterop_device_info;
> typedef struct _mesa_glinterop_export_in mesa_glinterop_export_in;
> typedef struct _mesa_glinterop_export_out mesa_glinterop_export_out;
>
> to:
>
> struct mesa_glinterop_device_info;
> struct mesa_glinterop_export_in;
> struct mesa_glinterop_export_out;
>
Sure we can drop the typedef all together. Will send out v2 in a few hours.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96242] Latest mesa from git doesn't build - Python3 not found

2016-05-27 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96242

--- Comment #1 from joro-2013  ---
Just a follow-up: The reason seems to be commit "configure.ac: error out when
building from git without python3" that has to do with some Intel stuff. But
I'm on a PowerPC platform ! Up until a few weeks ago mesa built fine with
python 2.7.3 installed.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/18] GROSS HACK: anv: add dummy MESA_GIT_SHA1 define

2016-05-27 Thread Jason Ekstrand
On Fri, May 27, 2016 at 7:35 AM, Emil Velikov 
wrote:

> From: Emil Velikov 
>
> Otherwise we'll error out if build from a release tarball.
>
> Cc: Jason Ekstrand 
> Cc: Kristian Høgsberg Kristensen 
> ---
> This patch isn't going to fly obviously but is just enough to get things
> building ;-)
>

You're right.  It won't. :-)

I think what we need is to ship something that provides MESA_GIT_SHA1 in
the tarball.  The purpose of this is to ensure that any shader caches get
100% invalidated if the mesa version changes at all.  When Timothy lands
his shader cache stuff, we'll need this in the GL driver as well so we
might as well find a solution now.

All of the other (not this one) anv/isl patches in the series are

Reviewed-by: Jason Ekstrand 


> Any input on proper fix is highly appreciated.
> ---
>  src/intel/vulkan/anv_device.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index eb40e2d..98eda2b 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -416,6 +416,9 @@ void
>  anv_device_get_cache_uuid(void *uuid)
>  {
> memset(uuid, 0, VK_UUID_SIZE);
> +#ifndef MESA_GIT_SHA1
> +#define MESA_GIT_SHA1 "git-unknown"
> +#endif
> snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
>  }
>
> --
> 2.8.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/util: use enum pipe_prim_type instead of unsigned some more

2016-05-27 Thread sroland
From: Roland Scheidegger 

There were complaints from a mingw build:
u_draw.h:134:14: error: invalid conversion from ‘uint {aka unsigned int}’
to ‘pipe_prim_type’ [-fpermissive]
---
 src/gallium/auxiliary/util/u_draw.h | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_draw.h 
b/src/gallium/auxiliary/util/u_draw.h
index 5c0880f..b16f106 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -49,7 +49,10 @@ util_draw_init_info(struct pipe_draw_info *info)
 
 
 static inline void
-util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
+util_draw_arrays(struct pipe_context *pipe,
+ enum pipe_prim_type mode,
+ uint start,
+ uint count)
 {
struct pipe_draw_info info;
 
@@ -65,7 +68,9 @@ util_draw_arrays(struct pipe_context *pipe, uint mode, uint 
start, uint count)
 
 static inline void
 util_draw_elements(struct pipe_context *pipe, int index_bias,
-   uint mode, uint start, uint count)
+   enum pipe_prim_type mode,
+   uint start,
+   uint count)
 {
struct pipe_draw_info info;
 
@@ -81,7 +86,9 @@ util_draw_elements(struct pipe_context *pipe, int index_bias,
 
 static inline void
 util_draw_arrays_instanced(struct pipe_context *pipe,
-   uint mode, uint start, uint count,
+   enum pipe_prim_type mode,
+   uint start,
+   uint count,
uint start_instance,
uint instance_count)
 {
@@ -102,7 +109,9 @@ util_draw_arrays_instanced(struct pipe_context *pipe,
 static inline void
 util_draw_elements_instanced(struct pipe_context *pipe,
  int index_bias,
- uint mode, uint start, uint count,
+ enum pipe_prim_type mode,
+ uint start,
+ uint count,
  uint start_instance,
  uint instance_count)
 {
@@ -125,7 +134,9 @@ util_draw_range_elements(struct pipe_context *pipe,
  int index_bias,
  uint min_index,
  uint max_index,
- uint mode, uint start, uint count)
+ enum pipe_prim_type mode,
+ uint start,
+ uint count)
 {
struct pipe_draw_info info;
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: use enum pipe_prim_type instead of unsigned some more

2016-05-27 Thread Brian Paul


Reviewed-by: Brian Paul 

On 05/27/2016 10:53 AM, srol...@vmware.com wrote:

From: Roland Scheidegger 

There were complaints from a mingw build:
u_draw.h:134:14: error: invalid conversion from ‘uint {aka unsigned int}’
to ‘pipe_prim_type’ [-fpermissive]
---
  src/gallium/auxiliary/util/u_draw.h | 21 -
  1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_draw.h 
b/src/gallium/auxiliary/util/u_draw.h
index 5c0880f..b16f106 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -49,7 +49,10 @@ util_draw_init_info(struct pipe_draw_info *info)


  static inline void
-util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
+util_draw_arrays(struct pipe_context *pipe,
+ enum pipe_prim_type mode,
+ uint start,
+ uint count)
  {
 struct pipe_draw_info info;

@@ -65,7 +68,9 @@ util_draw_arrays(struct pipe_context *pipe, uint mode, uint 
start, uint count)

  static inline void
  util_draw_elements(struct pipe_context *pipe, int index_bias,
-   uint mode, uint start, uint count)
+   enum pipe_prim_type mode,
+   uint start,
+   uint count)
  {
 struct pipe_draw_info info;

@@ -81,7 +86,9 @@ util_draw_elements(struct pipe_context *pipe, int index_bias,

  static inline void
  util_draw_arrays_instanced(struct pipe_context *pipe,
-   uint mode, uint start, uint count,
+   enum pipe_prim_type mode,
+   uint start,
+   uint count,
 uint start_instance,
 uint instance_count)
  {
@@ -102,7 +109,9 @@ util_draw_arrays_instanced(struct pipe_context *pipe,
  static inline void
  util_draw_elements_instanced(struct pipe_context *pipe,
   int index_bias,
- uint mode, uint start, uint count,
+ enum pipe_prim_type mode,
+ uint start,
+ uint count,
   uint start_instance,
   uint instance_count)
  {
@@ -125,7 +134,9 @@ util_draw_range_elements(struct pipe_context *pipe,
   int index_bias,
   uint min_index,
   uint max_index,
- uint mode, uint start, uint count)
+ enum pipe_prim_type mode,
+ uint start,
+ uint count)
  {
 struct pipe_draw_info info;




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/18] GROSS HACK: anv: add dummy MESA_GIT_SHA1 define

2016-05-27 Thread Emil Velikov
On Friday, 27 May 2016, Jason Ekstrand  wrote:

> On Fri, May 27, 2016 at 7:35 AM, Emil Velikov  > wrote:
>
>> From: Emil Velikov > >
>>
>> Otherwise we'll error out if build from a release tarball.
>>
>> Cc: Jason Ekstrand > >
>> Cc: Kristian Høgsberg Kristensen > >
>> ---
>> This patch isn't going to fly obviously but is just enough to get things
>> building ;-)
>>
>
> You're right.  It won't. :-)
>
> I think what we need is to ship something that provides MESA_GIT_SHA1 in
> the tarball.  The purpose of this is to ensure that any shader caches get
> 100% invalidated if the mesa version changes at all.  When Timothy lands
> his shader cache stuff, we'll need this in the GL driver as well so we
> might as well find a solution now.
>
> Indeed. I was looking at how GL does it only to find it isn't wired up
yet. And yes shipping git_sha1.h sounds like a good solution. Question is
do we want to print the sha in other places (GL_VERSION iirc) for release
builds ? I'd go with yes on that one as well.

I'm on the move(train) atm so it might be a little late in the day for
anyone to review 'v2'.


> All of the other (not this one) anv/isl patches in the series are
>
> Reviewed-by: Jason Ekstrand  >
>

Thanks.
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 4/4] glx: fix error code when there is no context bound

2016-05-27 Thread Emil Velikov
On Friday, 27 May 2016, Tapani Pälli  wrote:

>
>
> On 05/26/2016 05:16 PM, Emil Velikov wrote:
>
>> Hi all,
>>
>> On 29 February 2016 at 07:14, Tapani Pälli 
>> wrote:
>>
>>>
>>> On 02/22/2016 10:16 PM, Ian Romanick wrote:
>>>

 There are 17 total occurrences of

  grep -r '[(]!gc[)]' src/glx/

 and

  grep -r 'gc[[:space:]]*==[[:space:]]*NULL' src/glx/

 None of these check for dummyContext.  This is all very suspicious.
 Looking at the implementation(s) of __glXGetCurrentContext, I don't
 think it can ever return NULL.  Look in src/glx/glxcurrent.c.  It's
 possible that __glXGetCurrentContext used to be able to return NULL, but
 I find it unlikely.

 My guess is that all (or nearly all) of the !gc or gc == NULL checks are
 wrong.  A bunch of them probably "just work" because they end up sending
 protocol requests to the server, and the server sends back an error.

>>>
>>>
>>> I spent some time with this and it looks like some of these are correct
>>> as
>>> create_context (or indirect_create_context) can return NULL and also
>>> pointer
>>> given by client may be NULL (and can't be dummyContext). The places with
>>> explicit __glXGetCurrentContext call (9 of these) and a NULL check are
>>> incorrect. I can add these to the patch.
>>>
>>> At the very least, I think these gc == NULL checks should be replaced by
 asserts.  If the unit tests call these functions with
 __glXGetCurrentContext returning NULL, the unit tests should be fixed to
 return &dummyContext instead.

>>>
>>>
>>> Should it be then 'own dummyContext' implemented by fake_glx_screen.cpp
>>> something along lines in this patch and not trying to link with
>>> glxcurrent.c?
>>>
>>> I'd really like to see analysis of the other NULL checks and either have
 justifications for no change or have changes.  I'd also really like to
 see piglit tests that could hit some of these.

>>>
>>>
>>> It looks like glx-test is testing return value of __glXGetCurrentContext
>>> currently (which is why it breaks), wouldn't fixing glx-test be
>>> sufficient?
>>>
>>>
>>> Any news on the status of this patch ? The Suse guys did bring some
>> fixes recently (check __glXGetCurrentContext() vs dummyContext as
>> opposed to NULL), although I think we still want something like the
>> proposed here. Correct ?
>>
>>
> No progress, this patch has been living as is in internal project. The fix
> itself is quite simple, all places with __glXGetCurrentContext should check
> against dummyContext.
>
> Indeed. I believe I mentioned /suggested that ;-)


> This patch introduced its own 'dummyContext' in the unit test since it
> seemed very challenging to compile the test together with files in glx
> folder (results in linking with a *lot* of stuff). I can take a peek again
> what was the issue it replacing all of the checks and reply back to this.
>
> I think there were some concerns
 - not everything going is updated (Ian), looks fine now but will need to
double check
 - piglits (Ian)
 - some mesa glx tests are crashing/failing (yourself mentioned that iirc).
I take it that things are fine now ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 3/3] gallium: push offset down to driver

2016-05-27 Thread Emil Velikov
Hi Stan

On Thursday, 26 May 2016, Stanimir Varbanov 
wrote:

> Push offset down to drivers when importing dmabuf. This is needed
> to more fully support EGL_EXT_image_dma_buf_import when a non-zero
> offset is specified.
>
> Signed-off-by: Stanimir Varbanov  >
> ---
>
> Tesing has been done for freedreno, and compile tested following
> gallium drivers:
> nouveau,svga,virgl,r600,r300,radeonsi,swrast,i915,ilo
>
> Series looks a lot better and is
Reviewed-by: Emil Velikov 

Rob feel free to push but please move the "Testing has been..." hunk before
the --- line.We _want_ that information in git log.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/18] GROSS HACK: anv: add dummy MESA_GIT_SHA1 define

2016-05-27 Thread Jason Ekstrand
On Fri, May 27, 2016 at 10:06 AM, Emil Velikov 
wrote:

> On Friday, 27 May 2016, Jason Ekstrand  wrote:
>
>> On Fri, May 27, 2016 at 7:35 AM, Emil Velikov 
>> wrote:
>>
>>> From: Emil Velikov 
>>>
>>> Otherwise we'll error out if build from a release tarball.
>>>
>>> Cc: Jason Ekstrand 
>>> Cc: Kristian Høgsberg Kristensen 
>>> ---
>>> This patch isn't going to fly obviously but is just enough to get things
>>> building ;-)
>>>
>>
>> You're right.  It won't. :-)
>>
>> I think what we need is to ship something that provides MESA_GIT_SHA1 in
>> the tarball.  The purpose of this is to ensure that any shader caches get
>> 100% invalidated if the mesa version changes at all.  When Timothy lands
>> his shader cache stuff, we'll need this in the GL driver as well so we
>> might as well find a solution now.
>>
>> Indeed. I was looking at how GL does it only to find it isn't wired up
> yet. And yes shipping git_sha1.h sounds like a good solution. Question is
> do we want to print the sha in other places (GL_VERSION iirc) for release
> builds ? I'd go with yes on that one as well.
>
> I'm on the move(train) atm so it might be a little late in the day for
> anyone to review 'v2'.
>

It's not late in my day.  It's not even noon yet. :-)


> All of the other (not this one) anv/isl patches in the series are
>>
>> Reviewed-by: Jason Ekstrand 
>>
>
> Thanks.
> Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96247] nir_lower_tex.c:202:11: error: field designator cannot initialize a non-struct, non-union type 'float [4]'

2016-05-27 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96247

Bug ID: 96247
   Summary: nir_lower_tex.c:202:11: error: field designator cannot
initialize a non-struct, non-union type 'float [4]'
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Keywords: regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org

mesa: 0482efdc93f130a1defa021e5d198bc6ef59af5a (master 11.3.0-devel)

  CC   nir/nir_lower_tex.lo
nir/nir_lower_tex.c:202:11: error: field designator cannot initialize a
non-struct, non-union type 'float [4]'
  { { .f32 = { 1.0f,  0.0f, 1.59602678f, 0.0f } } },
  ^
nir/nir_lower_tex.c:203:11: error: field designator cannot initialize a
non-struct, non-union type 'float [4]'
  { { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } } },
  ^
nir/nir_lower_tex.c:204:11: error: field designator cannot initialize a
non-struct, non-union type 'float [4]'
  { { .f32 = { 1.0f,  2.01723214f,  0.0f,0.0f } } }
  ^

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi: fix CP DMA hazard with index buffer fetches

2016-05-27 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index cbb84b0..882458c 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -73,12 +73,23 @@ static void si_emit_cp_dma_copy_buffer(struct si_context 
*sctx,
radeon_emit(cs, (dst_va >> 32) & 0x);   /* DST_ADDR_HI 
[15:0] */
radeon_emit(cs, size | wr_confirm | raw_wait);  /* COMMAND 
[29:22] | BYTE_COUNT [20:0] */
}
+
+   /* CP DMA is executed in ME, but index buffers are read by PFP.
+* This ensures that ME (CP DMA) is idle before PFP starts fetching
+* indices. If we wanted to execute CP DMA in PFP, this packet
+* should precede it.
+*/
+   if (sync_flag) {
+   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cs, 0);
+   }
 }
 
 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
 static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
uint64_t dst_va, unsigned size,
-   uint32_t clear_value, unsigned flags)
+   uint32_t clear_value, unsigned flags,
+   enum r600_coherency coher)
 {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
@@ -105,6 +116,12 @@ static void si_emit_cp_dma_clear_buffer(struct si_context 
*sctx,
radeon_emit(cs, (dst_va >> 32) & 0x);   /* DST_ADDR_HI 
[15:0] */
radeon_emit(cs, size | wr_confirm | raw_wait);  /* COMMAND 
[29:22] | BYTE_COUNT [20:0] */
}
+
+   /* See "copy_buffer" for explanation. */
+   if (coher == R600_COHERENCY_SHADER && sync_flag) {
+   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cs, 0);
+   }
 }
 
 static unsigned get_flush_flags(struct si_context *sctx, enum r600_coherency 
coher)
@@ -207,7 +224,8 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 
&dma_flags);
 
/* Emit the clear packet. */
-   si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, 
dma_flags);
+   si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value,
+   dma_flags, coher);
 
size -= byte_count;
va += byte_count;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] gallium/radeon: don't use the DMA ring for pipelined buffer uploads

2016-05-27 Thread Marek Olšák
From: Marek Olšák 

Submitting a DMA IB flushes the GFX IB and all GPU caches.
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 145cc9f..a47aa78 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -368,9 +368,9 @@ static void *r600_buffer_transfer_map(struct pipe_context 
*ctx,
box->width + (box->x % 
R600_MAP_BUFFER_ALIGNMENT));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
-   rctx->dma_copy(ctx, &staging->b.b, 0,
-  box->x % R600_MAP_BUFFER_ALIGNMENT,
-  0, 0, resource, level, box);
+   ctx->resource_copy_region(ctx, &staging->b.b, 0,
+ box->x % 
R600_MAP_BUFFER_ALIGNMENT,
+ 0, 0, resource, level, box);
 
data = r600_buffer_map_sync_with_rings(rctx, staging, 
PIPE_TRANSFER_READ);
if (!data) {
@@ -398,7 +398,6 @@ static void r600_buffer_do_flush_region(struct pipe_context 
*ctx,
struct pipe_transfer *transfer,
const struct pipe_box *box)
 {
-   struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_resource *rbuffer = r600_resource(transfer->resource);
 
@@ -414,7 +413,7 @@ static void r600_buffer_do_flush_region(struct pipe_context 
*ctx,
u_box_1d(soffset, box->width, &dma_box);
 
/* Copy the staging buffer into the original one. */
-   rctx->dma_copy(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
+   ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, 
&dma_box);
}
 
util_range_add(&rbuffer->valid_buffer_range, box->x,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] r600g: fix CP DMA hazard with index buffer fetches

2016-05-27 Thread Marek Olšák
From: Marek Olšák 

R600-R700 used a bad workaround. Now only R600 has to use it.
---
 src/gallium/drivers/r600/evergreen_hw_context.c | 13 +++--
 src/gallium/drivers/r600/evergreend.h   |  1 +
 src/gallium/drivers/r600/r600_blit.c|  6 --
 src/gallium/drivers/r600/r600_hw_context.c  | 25 -
 src/gallium/drivers/r600/r600d.h|  1 +
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index f456696..14877ae 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -117,7 +117,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context 
*rctx,
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned reloc;
 
-   r600_need_cs_space(rctx, 10 + (rctx->b.flags ? 
R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+   r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? 
R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
 
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
@@ -148,9 +148,18 @@ void evergreen_cp_dma_clear_buffer(struct r600_context 
*rctx,
offset += byte_count;
}
 
+   /* CP DMA is executed in ME, but index buffers are read by PFP.
+* This ensures that ME (CP DMA) is idle before PFP starts fetching
+* indices. If we wanted to execute CP DMA in PFP, this packet
+* should precede it.
+*/
+   if (coher == R600_COHERENCY_SHADER) {
+   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cs, 0);
+   }
+
/* Invalidate the read caches. */
rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
 R600_CONTEXT_INV_VERTEX_CACHE |
 R600_CONTEXT_INV_TEX_CACHE;
 }
-
diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index c1c6169..457152e 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -88,6 +88,7 @@
 #defineWAIT_REG_MEM_EQUAL  3
 #define PKT3_MEM_WRITE 0x3D
 #define PKT3_INDIRECT_BUFFER   0x32
+#define PKT3_PFP_SYNC_ME  0x42 /* r7xx+ */
 #define PKT3_SURFACE_SYNC  0x43
 #define PKT3_ME_INITIALIZE 0x44
 #define PKT3_COND_WRITE0x45
diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index 9230b40..9f309d8 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -519,12 +519,6 @@ static void r600_copy_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst
} else {
util_resource_copy_region(ctx, dst, 0, dstx, 0, 0, src, 0, 
src_box);
}
-
-   /* The index buffer (VGT) doesn't seem to see the result of the copying.
-* Can we somehow flush the index buffer cache? Starting a new IB seems
-* to do the trick. */
-   if (rctx->b.chip_class <= R700)
-   rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 }
 
 /**
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 1f7bed8..5d6200d 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -403,7 +403,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned src_reloc, dst_reloc;
 
-   r600_need_cs_space(rctx, 10 + (rctx->b.flags ? 
R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+   r600_need_cs_space(rctx, 2 + 10 + (rctx->b.flags ? 
R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
 
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
@@ -438,10 +438,25 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
dst_offset += byte_count;
}
 
-   /* Invalidate the read caches. */
-   rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
-R600_CONTEXT_INV_VERTEX_CACHE |
-R600_CONTEXT_INV_TEX_CACHE;
+   /* CP DMA is executed in ME, but index buffers are read by PFP.
+* This ensures that ME (CP DMA) is idle before PFP starts fetching
+* indices. If we wanted to execute CP DMA in PFP, this packet
+* should precede it.
+*
+* R6xx is out of luck, as it doesn't have the packet.
+* Starting a new IB has the same effect.
+*/
+   if (rctx->b.chip_class >= R700) {
+   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cs, 0);
+
+   /* Invalidate the read caches. */
+  

[Mesa-dev] [PATCH v2 01/13] glsl: Add glsl LowerCsDerivedVariables option

2016-05-27 Thread Jordan Justen
v2:
 * Move lower flag to context constants. (Ken)

Signed-off-by: Jordan Justen 
Reviewed-by: Kenneth Graunke  (v1)
---
 src/compiler/glsl/builtin_variables.cpp  | 29 ++---
 src/compiler/glsl/glsl_parser_extras.cpp |  2 +-
 src/compiler/glsl/ir.h   |  3 ++-
 src/mesa/drivers/dri/i965/brw_context.c  |  1 +
 src/mesa/main/mtypes.h   |  3 +++
 src/mesa/state_tracker/st_extensions.c   |  4 +++-
 6 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/compiler/glsl/builtin_variables.cpp 
b/src/compiler/glsl/builtin_variables.cpp
index d8b6f6e..22d16de 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -1201,8 +1201,15 @@ builtin_variable_generator::generate_cs_special_vars()
 "gl_LocalInvocationID");
add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups");
-   add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
-   add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
+   if (state->ctx->Const.LowerCsDerivedVariables) {
+  add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
+  add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
+   } else {
+  add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+   uvec3_t, "gl_GlobalInvocationID");
+  add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+   uint_t, "gl_LocalInvocationIndex");
+   }
 }
 
 
@@ -1431,16 +1438,16 @@ initialize_cs_derived_variables(gl_shader *shader,
  * These are initialized in the main function.
  */
 void
-_mesa_glsl_initialize_derived_variables(gl_shader *shader)
+_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
+gl_shader *shader)
 {
/* We only need to set CS variables currently. */
-   if (shader->Stage != MESA_SHADER_COMPUTE)
-  return;
+   if (shader->Stage == MESA_SHADER_COMPUTE &&
+   ctx->Const.LowerCsDerivedVariables) {
+  ir_function_signature *const main_sig =
+ _mesa_get_main_function_signature(shader);
 
-   ir_function_signature *const main_sig =
-  _mesa_get_main_function_signature(shader);
-   if (main_sig == NULL)
-  return;
-
-   initialize_cs_derived_variables(shader, main_sig);
+  if (main_sig != NULL)
+ initialize_cs_derived_variables(shader, main_sig);
+   }
 }
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 843998d..3d08508 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -1907,7 +1907,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
   }
}
 
-   _mesa_glsl_initialize_derived_variables(shader);
+   _mesa_glsl_initialize_derived_variables(ctx, shader);
 
delete state->symbols;
ralloc_free(state);
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index e8efd27..93716c4 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -2562,7 +2562,8 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
struct _mesa_glsl_parse_state *state);
 
 extern void
-_mesa_glsl_initialize_derived_variables(gl_shader *shader);
+_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
+gl_shader *shader);
 
 extern void
 _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 552e5ec..1477c8b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -565,6 +565,7 @@ brw_initialize_context_constants(struct brw_context *brw)
   ctx->Const.MaxClipPlanes = 8;
 
ctx->Const.LowerTessLevel = true;
+   ctx->Const.LowerCsDerivedVariables = true;
ctx->Const.PrimitiveRestartForPatches = true;
 
ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2233526..d0f3760 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3763,6 +3763,9 @@ struct gl_constants
GLuint MaxTessControlTotalOutputComponents;
bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */
bool PrimitiveRestartForPatches;
+   bool LowerCsDerivedVariables;/**< Lower gl_GlobalInvocationID and
+ *   gl_LocalInvocationIndex based on
+ *   other builtin variables. */
 };
 
 
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 68e6601..8f249bb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -309,11 +309,13 @@ void st_init_limits(struc

[Mesa-dev] [PATCH v2 07/13] i965: Add CS push constant info to brw_cs_prog_data

2016-05-27 Thread Jordan Justen
We need information about push constants in a few places for the GL
driver, and another couple places for the vulkan driver.

When we add support for uploading both a common (cross-thread) set of
push constants, combined with the previous per-thread push constant
data, things are going to get even more complicated. To simplify
things, we add push constant info into the cs prog_data struct.

The cross-thread constant support is added as of Haswell. To support
it we need to make sure all push constants with uniform values are
added to earlier registers. The register that varies per thread and
holds the thread invocation's unique local ID needs to be added last.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_compiler.h | 12 +++
 src/mesa/drivers/dri/i965/brw_fs.cpp | 57 
 2 files changed, 69 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index 778c036..e0abde0 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -424,6 +424,12 @@ struct brw_wm_prog_data {
int urb_setup[VARYING_SLOT_MAX];
 };
 
+struct brw_push_const_block {
+   unsigned dwords; /* Dword count, not reg aligned */
+   unsigned regs;
+   unsigned size;   /* Bytes, register aligned */
+};
+
 struct brw_cs_prog_data {
struct brw_stage_prog_data base;
 
@@ -437,6 +443,12 @@ struct brw_cs_prog_data {
int thread_local_id_index;
 
struct {
+  struct brw_push_const_block cross_thread;
+  struct brw_push_const_block per_thread;
+  struct brw_push_const_block total;
+   } push;
+
+   struct {
   /** @{
* surface indices the CS-specific surfaces
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 6e4a5d6..673b59d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6195,6 +6195,61 @@ fs_visitor::emit_cs_work_group_id_setup()
 }
 
 static void
+fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
+{
+   block->dwords = dwords;
+   block->regs = DIV_ROUND_UP(dwords, 8);
+   block->size = block->regs * 32;
+}
+
+static void
+cs_fill_push_const_info(const struct brw_device_info *devinfo,
+struct brw_cs_prog_data *cs_prog_data)
+{
+   const struct brw_stage_prog_data *prog_data =
+  (struct brw_stage_prog_data*) cs_prog_data;
+   bool fill_thread_id =
+  cs_prog_data->thread_local_id_index >= 0 &&
+  cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;
+   bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;
+
+   /* The thread ID should be stored in the last param dword */
+   assert(prog_data->nr_params > 0 || !fill_thread_id);
+   assert(!fill_thread_id ||
+  cs_prog_data->thread_local_id_index ==
+ (int)prog_data->nr_params - 1);
+
+   unsigned cross_thread_dwords, per_thread_dwords;
+   if (cross_thread_supported && fill_thread_id) {
+  /* Fill all but the last register with cross-thread payload */
+  cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
+  per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
+  assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
+   } else if (cross_thread_supported && !fill_thread_id) {
+  /* Fill all data using cross-thread payload */
+  cross_thread_dwords = prog_data->nr_params;
+  per_thread_dwords = 0u;
+   } else {
+  cross_thread_dwords = 0u;
+  per_thread_dwords = prog_data->nr_params;
+   }
+
+   fill_push_const_block_info(&cs_prog_data->push.cross_thread, 
cross_thread_dwords);
+   fill_push_const_block_info(&cs_prog_data->push.per_thread, 
per_thread_dwords);
+
+   unsigned total_dwords =
+  (cs_prog_data->push.per_thread.size * cs_prog_data->threads +
+   cs_prog_data->push.cross_thread.size) / 4;
+   fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);
+
+   assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
+  cs_prog_data->push.per_thread.size == 0);
+   assert(cs_prog_data->push.cross_thread.dwords +
+  cs_prog_data->push.per_thread.dwords ==
+  prog_data->nr_params);
+}
+
+static void
 cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
 {
cs_prog_data->simd_size = size;
@@ -6252,6 +6307,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
   } else {
  cfg = v8.cfg;
  cs_set_simd_size(prog_data, 8);
+ cs_fill_push_const_info(compiler->devinfo, prog_data);
  prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
   }
}
@@ -6277,6 +6333,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
   } else {
  cfg = v16.cfg;
  cs_set_simd_size(prog_data, 16);
+ cs_fill_push_const_info(compiler->devinfo, prog_data);

[Mesa-dev] [PATCH v2 02/13] nir: Make lowering gl_LocalInvocationIndex optional

2016-05-27 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/compiler/nir/nir.c |  4 
 src/compiler/nir/nir.h |  2 ++
 src/compiler/nir/nir_gather_info.c |  1 +
 src/compiler/nir/nir_intrinsics.h  |  1 +
 src/compiler/nir/nir_lower_system_values.c | 16 
 src/mesa/drivers/dri/i965/brw_compiler.c   |  3 ++-
 6 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 2741eb6..3c8b4e0 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1752,6 +1752,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
   return nir_intrinsic_load_sample_mask_in;
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
   return nir_intrinsic_load_local_invocation_id;
+   case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
+  return nir_intrinsic_load_local_invocation_index;
case SYSTEM_VALUE_WORK_GROUP_ID:
   return nir_intrinsic_load_work_group_id;
case SYSTEM_VALUE_NUM_WORK_GROUPS:
@@ -1801,6 +1803,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
   return SYSTEM_VALUE_SAMPLE_MASK_IN;
case nir_intrinsic_load_local_invocation_id:
   return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+   case nir_intrinsic_load_local_invocation_index:
+  return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
case nir_intrinsic_load_num_work_groups:
   return SYSTEM_VALUE_NUM_WORK_GROUPS;
case nir_intrinsic_load_work_group_id:
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 2e1bdfb..20f6520 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1682,6 +1682,8 @@ typedef struct nir_shader_compiler_options {
 
/* Indicates that the driver only has zero-based vertex id */
bool vertex_id_zero_based;
+
+   bool lower_cs_local_index_from_id;
 } nir_shader_compiler_options;
 
 typedef struct nir_shader_info {
diff --git a/src/compiler/nir/nir_gather_info.c 
b/src/compiler/nir/nir_gather_info.c
index 7900fd1..15a9a4f 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -44,6 +44,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader 
*shader)
case nir_intrinsic_load_primitive_id:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_local_invocation_index:
case nir_intrinsic_load_work_group_id:
case nir_intrinsic_load_num_work_groups:
   shader->info.system_values_read |=
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index bd00fbb..aeb6038 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -299,6 +299,7 @@ SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)
 SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)
 SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)
+SYSTEM_VALUE(local_invocation_index, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
 SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index 8310e38..3ca8e08 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -48,7 +48,7 @@ convert_block(nir_block *block, nir_builder *b)
 
   b->cursor = nir_after_instr(&load_var->instr);
 
-  nir_ssa_def *sysval;
+  nir_ssa_def *sysval = NULL;
   switch (var->data.location) {
   case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
  /* From the GLSL man page for gl_GlobalInvocationID:
@@ -74,6 +74,12 @@ convert_block(nir_block *block, nir_builder *b)
   }
 
   case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
+ /* If lower_cs_local_index_from_id is true, then we derive the local
+  * index from the local id.
+  */
+ if (!b->shader->options->lower_cs_local_index_from_id)
+break;
+
  /* From the GLSL man page for gl_LocalInvocationIndex:
   *
   *"The value of gl_LocalInvocationIndex is equal to
@@ -111,12 +117,14 @@ convert_block(nir_block *block, nir_builder *b)
 nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
  break;
 
-  default: {
+  default:
+ break;
+  }
+
+  if (sysval == NULL) {
  nir_intrinsic_op sysval_op =
 nir_intrinsic_from_system_value(var->data.location);
  sysval = nir_load_system_value(b, sysval_op, 0);
- break;
-  } /* default */
   }
 
   nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c 
b/src/mesa/drivers/dri/i965/brw_compiler.c
index a4855a0..bb06733 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -40,7 +40,8 @@
.lower_fdiv = true,   

[Mesa-dev] [PATCH v2 00/13] i965: Rework CS local IDs for gen7+

2016-05-27 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa hsw-cs-cross-thread-constants-v2

v2:
 * Add v1 feedback (as noted in patch commit messaged)
 * Add vulkan support

Tested with curro's simd32 CS series. The IDs appear to be working
with simd32, and the UE4 elemental ran with INTEL_DEBUG=do32. (Tested
on Broadwell)

The v1 cover letter has some additional information:

https://lists.freedesktop.org/archives/mesa-dev/2016-May/117952.html

Jordan Justen (13):
  glsl: Add glsl LowerCsDerivedVariables option
  nir: Make lowering gl_LocalInvocationIndex optional
  i965: Add nir channel_num system value
  i965: Add nir based intrinsic lowering and thread ID uniform
  i965: Track and place CS thread ID uniform
  i965: Store number of threads in brw_cs_prog_data
  i965: Add CS push constant info to brw_cs_prog_data
  anv: Add push constant uniforms and lower uniforms
  squash-fwd i965: Support new thread ID push constant & cross-thread
constants
  squash i965: Use nir to lower cs-derived variables
  squash i965: Run the intrinsics lowering pass
  squash i965: Remove old CS local ID handling
  squash anv: Support new local ID generation & cross-thread constants

 src/compiler/glsl/builtin_variables.cpp|  29 ++--
 src/compiler/glsl/glsl_parser_extras.cpp   |   2 +-
 src/compiler/glsl/ir.h |   3 +-
 src/compiler/nir/nir.c |   4 +
 src/compiler/nir/nir.h |   2 +
 src/compiler/nir/nir_gather_info.c |   1 +
 src/compiler/nir/nir_intrinsics.h  |   2 +
 src/compiler/nir/nir_lower_system_values.c |  16 +-
 src/intel/vulkan/anv_cmd_buffer.c  |  52 ---
 src/intel/vulkan/anv_pipeline.c|  36 -
 src/intel/vulkan/anv_private.h |   1 -
 src/intel/vulkan/gen7_cmd_buffer.c |  10 +-
 src/intel/vulkan/gen8_cmd_buffer.c |  13 +-
 src/intel/vulkan/genX_cmd_buffer.c |   4 +-
 src/intel/vulkan/genX_pipeline.c   |  12 +-
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_compiler.c   |   3 +-
 src/mesa/drivers/dri/i965/brw_compiler.h   |  22 ++-
 src/mesa/drivers/dri/i965/brw_defines.h|   3 +
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 207 ++---
 src/mesa/drivers/dri/i965/brw_fs.h |   1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  22 ++-
 src/mesa/drivers/dri/i965/brw_nir.h|   1 +
 src/mesa/drivers/dri/i965/brw_nir_intrinsics.c | 163 +++
 src/mesa/drivers/dri/i965/brw_program.c|   1 +
 src/mesa/drivers/dri/i965/gen7_cs_state.c  | 122 +++
 src/mesa/main/mtypes.h |   3 +
 src/mesa/state_tracker/st_extensions.c |   4 +-
 28 files changed, 490 insertions(+), 250 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_nir_intrinsics.c

-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 05/13] i965: Track and place CS thread ID uniform

2016-05-27 Thread Jordan Justen
This thread ID uniform will be used to compute the
gl_LocalInvocationIndex and gl_LocalInvocationID values.

It is important for this uniform to be added in the last push constant
register. fs_visitor::assign_constant_locations is updated to make
sure this happens.

The reason this is important is that the cross-thread push constant
registers are loaded first, and the per-thread push constant registers
are loaded after that. (Broadwell adds another push constant upload
mechanism which reverses this order, but we are ignoring this for
now.)

v2:
 * Add variable in intrinsics lowering pass
 * Make sure the ID is pushed last in assign_constant_locations, and
   that we save a spot for the ID in the push constants

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp | 49 +---
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index a8fb486..f8379bc 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -433,6 +433,7 @@ struct brw_cs_prog_data {
bool uses_barrier;
bool uses_num_work_groups;
unsigned local_invocation_id_regs;
+   int thread_local_id_index;
 
struct {
   /** @{
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bb2caa5..82b6781 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2086,6 +2086,10 @@ fs_visitor::assign_constant_locations()
bool contiguous[uniforms];
memset(contiguous, 0, sizeof(contiguous));
 
+   int thread_local_id_index =
+  (stage == MESA_SHADER_COMPUTE) ?
+  ((brw_cs_prog_data*)stage_prog_data)->thread_local_id_index : -1;
+
/* First, we walk through the instructions and do two things:
 *
 *  1) Figure out which uniforms are live.
@@ -2130,6 +2134,9 @@ fs_visitor::assign_constant_locations()
   }
}
 
+   if (thread_local_id_index >= 0 && !is_live[thread_local_id_index])
+  thread_local_id_index = -1;
+
/* Only allow 16 registers (128 uniform components) as push constants.
 *
 * Just demote the end of the list.  We could probably do better
@@ -2158,6 +2165,9 @@ fs_visitor::assign_constant_locations()
 
int chunk_start = -1;
 
+   /* We may need to save a slot for the thread ID */
+   unsigned int saved_slots = thread_local_id_index >= 0 ? 1 : 0;
+
/* First push 64-bit uniforms to ensure they are properly aligned */
for (unsigned u = 0; u < uniforms; u++) {
   if (!is_live[u] || !is_live_64bit[u])
@@ -2166,8 +2176,8 @@ fs_visitor::assign_constant_locations()
   set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
  push_constant_loc, pull_constant_loc,
  &num_push_constants, &num_pull_constants,
- max_push_components, max_chunk_size,
- stage_prog_data);
+ max_push_components - saved_slots,
+ max_chunk_size, stage_prog_data);
 
}
 
@@ -2176,13 +2186,29 @@ fs_visitor::assign_constant_locations()
   if (!is_live[u] || is_live_64bit[u])
  continue;
 
+  /* Skip thread_local_id_index to put it in the last push register. */
+  if (thread_local_id_index == (int)u)
+ continue;
+
+  set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ push_constant_loc, pull_constant_loc,
+ &num_push_constants, &num_pull_constants,
+ max_push_components - saved_slots,
+ max_chunk_size, stage_prog_data);
+   }
+
+   if (thread_local_id_index >= 0) {
+  /* Add the CS thread ID uniform at the end */
+  unsigned u = thread_local_id_index;
   set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
  push_constant_loc, pull_constant_loc,
  &num_push_constants, &num_pull_constants,
- max_push_components, max_chunk_size,
- stage_prog_data);
+ max_push_components,
+ max_chunk_size, stage_prog_data);
+  assert(push_constant_loc[u] >= 0);
}
 
+
/* As the uniforms are going to be reordered, take the data from a temporary
 * copy of the original param[].
 */
@@ -2201,6 +2227,7 @@ fs_visitor::assign_constant_locations()
 * push_constant_loc[i] <= i and we can do it in one smooth loop without
 * having to make a copy.
 */
+   int new_thread_local_id_index = -1;
for (unsigned int i = 0; i < uniforms; i++) {
   const gl_constant_value *value = param[i];
 
@@ -2208,9 +2235,15 @@ fs_visitor::assi

[Mesa-dev] [PATCH v2 10/13] squash i965: Use nir to lower cs-derived variables

2016-05-27 Thread Jordan Justen
We added this support into nir a while ago in
a9e6213edd757980475167331bda15c3970a538d for Mesa's Intel vulkan
driver as part of the SPIR-V support, so we can use it for the i965
driver as well.

Signed-off-by: Jordan Justen 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_context.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 1477c8b..552e5ec 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -565,7 +565,6 @@ brw_initialize_context_constants(struct brw_context *brw)
   ctx->Const.MaxClipPlanes = 8;
 
ctx->Const.LowerTessLevel = true;
-   ctx->Const.LowerCsDerivedVariables = true;
ctx->Const.PrimitiveRestartForPatches = true;
 
ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 04/13] i965: Add nir based intrinsic lowering and thread ID uniform

2016-05-27 Thread Jordan Justen
We add a lowering pass for nir intrinsics. This pass can replace nir
intrinsics with driver specific nir lower code.

We lower the gl_LocalInvocationIndex intrinsic based on a uniform
which is loaded with a thread specific ID.

We also lower the gl_LocalInvocationID based on
gl_LocalInvocationIndex.

v2:
 * Create variable during lowering pass. (Ken)

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_nir.h|   1 +
 src/mesa/drivers/dri/i965/brw_nir_intrinsics.c | 163 +
 3 files changed, 165 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_nir_intrinsics.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index d8711ed..f448551 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -46,6 +46,7 @@ i965_compiler_FILES = \
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_nir_attribute_workarounds.c \
+   brw_nir_intrinsics.c \
brw_nir_opt_peephole_ffma.c \
brw_packed_float.c \
brw_predicated_break.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h 
b/src/mesa/drivers/dri/i965/brw_nir.h
index 409e49a..b944681 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -91,6 +91,7 @@ void brw_nir_analyze_boolean_resolves(nir_shader *nir);
 nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler,
nir_shader *nir);
 
+bool brw_nir_lower_intrinsics(nir_shader *nir);
 void brw_nir_lower_vs_inputs(nir_shader *nir,
  const struct brw_device_info *devinfo,
  bool is_scalar,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c 
b/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c
new file mode 100644
index 000..e49b435
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_intrinsics.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "compiler/nir/nir_builder.h"
+
+struct lower_intrinsics_state {
+   nir_shader *nir;
+   nir_function_impl *impl;
+   bool progress;
+   nir_builder builder;
+   nir_variable *cs_thread_id;
+};
+
+static nir_variable *
+get_cs_local_id_uniform(struct lower_intrinsics_state *state)
+{
+   if (state->cs_thread_id == NULL) {
+  int location = -1;
+  nir_foreach_variable(var, &state->nir->uniforms) {
+ location = MAX2(location, var->data.location);
+  }
+
+  state->cs_thread_id =
+ nir_variable_create(state->nir, nir_var_uniform, glsl_uint_type(),
+ "cs_thread_local_id");
+  state->cs_thread_id->data.location = location + 1;
+   }
+
+   return state->cs_thread_id;
+}
+
+static bool
+lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
+  nir_block *block)
+{
+   bool progress = false;
+   nir_builder *b = &state->builder;
+   nir_shader *nir = state->nir;
+
+   nir_foreach_instr_safe(instr, block) {
+  if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+  nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
+
+  b->cursor = nir_after_instr(&intrinsic->instr);
+
+  nir_ssa_def *sysval;
+  switch (intrinsic->intrinsic) {
+  case nir_intrinsic_load_local_invocation_index: {
+ /* We construct the local invocation index from:
+  *
+  *gl_LocalInvocationIndex =
+  *   cs_thread_local_id + channel_num;
+  */
+ nir_variable *id_var = get_cs_local_id_uniform(state);
+ nir_ssa_def *channel =
+nir_load_system_value(b, nir_intrinsic_load_channel_num, 0);
+ sysval = nir_iadd(b, channel

[Mesa-dev] [PATCH v2 06/13] i965: Store number of threads in brw_cs_prog_data

2016-05-27 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_compiler.h  |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 13 +++--
 src/mesa/drivers/dri/i965/gen7_cs_state.c | 32 ++-
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index f8379bc..778c036 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -430,6 +430,7 @@ struct brw_cs_prog_data {
GLuint dispatch_grf_start_reg_16;
unsigned local_size[3];
unsigned simd_size;
+   unsigned threads;
bool uses_barrier;
bool uses_num_work_groups;
unsigned local_invocation_id_regs;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 82b6781..6e4a5d6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6194,6 +6194,15 @@ fs_visitor::emit_cs_work_group_id_setup()
return reg;
 }
 
+static void
+cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
+{
+   cs_prog_data->simd_size = size;
+   unsigned group_size = cs_prog_data->local_size[0] *
+  cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+   cs_prog_data->threads = (group_size + size - 1) / size;
+}
+
 const unsigned *
 brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
@@ -6242,7 +6251,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
  fail_msg = v8.fail_msg;
   } else {
  cfg = v8.cfg;
- prog_data->simd_size = 8;
+ cs_set_simd_size(prog_data, 8);
  prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
   }
}
@@ -6267,7 +6276,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
  }
   } else {
  cfg = v16.cfg;
- prog_data->simd_size = 16;
+ cs_set_simd_size(prog_data, 16);
  prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
   }
}
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c 
b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 7f484dd..619edfb 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -33,17 +33,6 @@
 #include "program/prog_statevars.h"
 #include "compiler/glsl/ir_uniform.h"
 
-static unsigned
-get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
-{
-   const unsigned simd_size = cs_prog_data->simd_size;
-   unsigned group_size = cs_prog_data->local_size[0] *
-  cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
-
-   return (group_size + simd_size - 1) / simd_size;
-}
-
-
 static void
 brw_upload_cs_state(struct brw_context *brw)
 {
@@ -79,7 +68,6 @@ brw_upload_cs_state(struct brw_context *brw)
   (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
unsigned push_constant_regs = reg_aligned_constant_size / 32;
-   unsigned threads = get_cs_thread_count(cs_prog_data);
 
uint32_t dwords = brw->gen < 8 ? 8 : 9;
BEGIN_BATCH(dwords);
@@ -129,7 +117,8 @@ brw_upload_cs_state(struct brw_context *brw)
 *
 * Note: The constant data is built in brw_upload_cs_push_constants below.
 */
-   const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
+   const uint32_t vfe_curbe_allocation =
+  push_constant_regs * cs_prog_data->threads;
OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
  SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
OUT_BATCH(0);
@@ -141,7 +130,7 @@ brw_upload_cs_state(struct brw_context *brw)
   BEGIN_BATCH(4);
   OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
   OUT_BATCH(0);
-  OUT_BATCH(ALIGN(reg_aligned_constant_size * threads, 64));
+  OUT_BATCH(ALIGN(reg_aligned_constant_size * cs_prog_data->threads, 64));
   OUT_BATCH(stage_state->push_const_offset);
   ADVANCE_BATCH();
}
@@ -163,9 +152,9 @@ brw_upload_cs_state(struct brw_context *brw)
desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
const uint32_t media_threads =
   brw->gen >= 8 ?
-  SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
-  SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
-   assert(threads <= brw->max_cs_threads);
+  SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+  SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
+   assert(cs_prog_data->threads <= brw->max_cs_threads);
 
assert(prog_data->total_shared <= 64 * 1024);
uint32_t slm_size = 0;
@@ -247,21 +236,20 @@ brw_upload_cs_push_constants(struct brw_context *brw,
   const unsigned param_aligned_count =
  reg_aligned_constant_size / sizeof(*param);
 
-  unsigned threads = get_cs_thread_count(cs_prog_data);
-
   param = (

[Mesa-dev] [PATCH v2 09/13] squash-fwd i965: Support new thread ID push constant & cross-thread constants

2016-05-27 Thread Jordan Justen
The cross thread constant support appears on Haswell. It allows us to
upload a set of uniform data for all threads without duplicating it
per thread.

We also support per-thread data which allows us to store a per-thread
ID in one of the uniforms that can be used to calculate the
gl_LocalInvocationIndex and gl_LocalInvocationID variables.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_defines.h   |   3 +
 src/mesa/drivers/dri/i965/gen7_cs_state.c | 102 +++---
 2 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 31b3336..54a7fd0 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2947,6 +2947,9 @@ enum brw_wm_barycentric_interp_mode {
 # define MEDIA_GPGPU_THREAD_COUNT_MASK  INTEL_MASK(7, 0)
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT0
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
+/* GEN7 DW6, GEN8+ DW7 */
+# define CROSS_THREAD_READ_LENGTH_SHIFT 0
+# define CROSS_THREAD_READ_LENGTH_MASK  INTEL_MASK(7, 0)
 #define MEDIA_STATE_FLUSH   0x7004
 #define GPGPU_WALKER0x7105
 /* GEN7 DW0 */
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c 
b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 619edfb..d26cf87 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -42,7 +42,6 @@ brw_upload_cs_state(struct brw_context *brw)
uint32_t offset;
uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 8 * 4, 64, &offset);
-   struct gl_program *prog = (struct gl_program *) brw->compute_program;
struct brw_stage_state *stage_state = &brw->cs.base;
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
@@ -59,16 +58,6 @@ brw_upload_cs_state(struct brw_context *brw)
 
prog_data->binding_table.size_bytes,
 32, &stage_state->bind_bo_offset);
 
-   unsigned local_id_dwords = 0;
-
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
-  local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
-
-   unsigned push_constant_data_size =
-  (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
-   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
-   unsigned push_constant_regs = reg_aligned_constant_size / 32;
-
uint32_t dwords = brw->gen < 8 ? 8 : 9;
BEGIN_BATCH(dwords);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
@@ -118,7 +107,8 @@ brw_upload_cs_state(struct brw_context *brw)
 * Note: The constant data is built in brw_upload_cs_push_constants below.
 */
const uint32_t vfe_curbe_allocation =
-  push_constant_regs * cs_prog_data->threads;
+  ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+cs_prog_data->push.cross_thread.regs, 2);
OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
  SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
OUT_BATCH(0);
@@ -126,11 +116,11 @@ brw_upload_cs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
 
-   if (reg_aligned_constant_size > 0) {
+   if (cs_prog_data->push.total.size > 0) {
   BEGIN_BATCH(4);
   OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
   OUT_BATCH(0);
-  OUT_BATCH(ALIGN(reg_aligned_constant_size * cs_prog_data->threads, 64));
+  OUT_BATCH(ALIGN(cs_prog_data->push.total.size, 64));
   OUT_BATCH(stage_state->push_const_offset);
   ADVANCE_BATCH();
}
@@ -149,7 +139,8 @@ brw_upload_cs_state(struct brw_context *brw)
desc[dw++] = stage_state->sampler_offset |
   ((stage_state->sampler_count + 3) / 4);
desc[dw++] = stage_state->bind_bo_offset;
-   desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
+   desc[dw++] = SET_FIELD(cs_prog_data->push.per_thread.regs,
+  MEDIA_CURBE_READ_LENGTH);
const uint32_t media_threads =
   brw->gen >= 8 ?
   SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
@@ -171,6 +162,10 @@ brw_upload_cs_state(struct brw_context *brw)
   SET_FIELD(slm_size, MEDIA_SHARED_LOCAL_MEMORY_SIZE) |
   media_threads;
 
+   desc[dw++] =
+  SET_FIELD(cs_prog_data->push.cross_thread.regs,
+CROSS_THREAD_READ_LENGTH);
+
BEGIN_BATCH(4);
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
OUT_BATCH(0);
@@ -213,10 +208,6 @@ brw_upload_cs_push_constants(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
const struct brw_stage_prog_data *prog_data =
   (struct brw_stage_prog_data*) cs_prog_data;
-   unsigned local_id_dwords = 0;
-
-   

[Mesa-dev] [PATCH v2 08/13] anv: Add push constant uniforms and lower uniforms

2016-05-27 Thread Jordan Justen
This will be important when we start adding a uniform for the CS
thread local invocation index.

Signed-off-by: Jordan Justen 
---
 src/intel/vulkan/anv_pipeline.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index d63e50e..8021348 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -78,6 +78,31 @@ void anv_DestroyShaderModule(
anv_free2(&device->alloc, pAllocator, module);
 }
 
+static void
+anv_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
+{
+   if (is_scalar) {
+  nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+   type_size_scalar_bytes);
+  nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes);
+   } else {
+  nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+   type_size_vec4_bytes);
+  nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes);
+   }
+}
+
+static void
+add_nir_push_constant_uniforms(nir_shader *shader)
+{
+   for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
+  char *name = ralloc_asprintf(shader, "push%02d", i);
+  nir_variable *var =
+ nir_variable_create(shader, nir_var_uniform, glsl_uint_type(), name);
+  var->data.location = i;
+   }
+}
+
 #define SPIR_V_MAGIC_NUMBER 0x07230203
 
 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
@@ -168,7 +193,8 @@ anv_shader_compile_to_nir(struct anv_device *device,
 
   nir_lower_io_to_temporaries(entry_point->shader, entry_point, true, 
false);
 
-  nir_lower_system_values(nir);
+  add_nir_push_constant_uniforms(nir);
+
   nir_validate_shader(nir);
}
 
@@ -177,6 +203,10 @@ anv_shader_compile_to_nir(struct anv_device *device,
 
nir = brw_preprocess_nir(compiler, nir);
 
+   nir_lower_system_values(nir);
+   const bool is_scalar = compiler->scalar_stage[nir->stage];
+   anv_nir_lower_uniforms(nir, is_scalar);
+
nir_shader_gather_info(nir, entry_point->impl);
 
nir_variable_mode indirect_mask = 0;
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 03/13] i965: Add nir channel_num system value

2016-05-27 Thread Jordan Justen
v2:
 * simd16/32 fixes (curro)

Signed-off-by: Jordan Justen 
---
 src/compiler/nir/nir_intrinsics.h|  1 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +++
 2 files changed, 16 insertions(+)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index aeb6038..6f86c9f 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -304,6 +304,7 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
 SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
 
 /*
  * Load operations pull data from some piece of GPU memory.  All load
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 954bfab..c9e4b3c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3881,6 +3881,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_load_channel_num: {
+  fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
+  dest = retype(dest, BRW_REGISTER_TYPE_UD);
+  const fs_builder allbld8 = bld.group(8, 0).exec_all();
+  allbld8.MOV(tmp, brw_imm_v(0x76543210));
+  if (dispatch_width > 8)
+ allbld8.ADD(byte_offset(tmp, 16), tmp, brw_imm_uw(8u));
+  if (dispatch_width > 16) {
+ const fs_builder allbld16 = bld.group(16, 0).exec_all();
+ allbld16.ADD(byte_offset(tmp, 32), tmp, brw_imm_uw(16u));
+  }
+  bld.MOV(dest, tmp);
+  break;
+   }
+
default:
   unreachable("unknown intrinsic");
}
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 11/13] squash i965: Run the intrinsics lowering pass

2016-05-27 Thread Jordan Justen
This pass replaces the local id and local index intrinsics with i965
specific nir code.

It relies on the gl_i965_cs_thread_local_id uniform variable which
actually varies per thread to provide a thread local id.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_program.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 792f81b..a317bd2 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -103,6 +103,7 @@ brw_create_nir(struct brw_context *brw,
}
 
NIR_PASS(progress, nir, nir_lower_system_values);
+   NIR_PASS(progress, nir, brw_nir_lower_intrinsics);
NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 
if (shader_prog) {
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: make use of NVC0_CB_AUX_XXX_SIZE as much as possible

2016-05-27 Thread Ilia Mirkin
TBH I don't like this. The way it is now, there's an obvious
correlation between the numbers uploaded, and the for loops/etc which
actually stick the data into the pushbuf. After your change, it's not
at all clear, and should those numbers become disconnected it'll be
difficult to track down.

If you REALLY want to do this, please throw STATIC_ASSERT's all over
the place ensuring that the values are what they are now. But I'd just
as soon leave things as they are now.

  -ilia


On Fri, May 27, 2016 at 4:42 AM, Samuel Pitoiset
 wrote:
> This avoids using magic numbers for the driver constant buffer areas
> and might also prevent using wrong sizes and offsets.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c|  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c|  4 ++--
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c |  6 +++---
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c|  2 +-
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c| 14 +++---
>  6 files changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index 832c085..7574a95 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -235,7 +235,7 @@ nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
> PUSH_DATA (push, 2048);
> PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> -   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
> +   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + (NVC0_CB_AUX_BUF_SIZE / 4));
> PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
>
> for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> index 27cbbc4..bb7fa7f 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> @@ -1830,7 +1830,7 @@ nvc0_hw_sm_upload_input(struct nvc0_context *nvc0, 
> struct nvc0_hw_query *hq)
>PUSH_DATAh(push, address + NVC0_CB_AUX_MP_INFO);
>PUSH_DATA (push, address + NVC0_CB_AUX_MP_INFO);
>BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> -  PUSH_DATA (push, 3 * 4);
> +  PUSH_DATA (push, NVC0_CB_AUX_MP_SIZE);
>PUSH_DATA (push, 0x1);
>BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 3);
>PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
> @@ -1839,7 +1839,7 @@ nvc0_hw_sm_upload_input(struct nvc0_context *nvc0, 
> struct nvc0_hw_query *hq)
>PUSH_DATA (push, 2048);
>PUSH_DATAh(push, address);
>PUSH_DATA (push, address);
> -  BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 3);
> +  BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + (NVC0_CB_AUX_MP_SIZE / 4));
>PUSH_DATA (push, NVC0_CB_AUX_MP_INFO);
> }
> PUSH_DATA (push, (hq->bo->offset + hq->base_offset));
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 6541241..de28fb0 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -964,7 +964,7 @@ nvc0_screen_create(struct nouveau_device *dev)
>PUSH_DATA (push, (15 << 4) | 1);
>if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
>   unsigned j;
> - BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
> + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + (NVC0_CB_AUX_UNK_SIZE / 4));
>   PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
>   for (j = 0; j < 8; ++j)
>  PUSH_DATA(push, j);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index a77486d..09f0862 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -336,9 +336,9 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, 
> unsigned s)
> PUSH_DATA (push, 2048);
> PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> -   BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
> +   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + (NVC0_CB_AUX_UCP_SIZE / 4));
> PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
> -   PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
> +   PUSH_DATAp(push, &nvc0->clip.ucp[0][0], (NVC0_CB_AUX_UCP_SIZE / 4));
>  }
>
>  static inline void
> @@ -506,7 +506,7 @@ nvc0_validate_buffers(struct nvc0_context *nvc0)
>PUSH_DATA (push, 2048);
>PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
>P

[Mesa-dev] [PATCH v2 13/13] squash anv: Support new local ID generation & cross-thread constants

2016-05-27 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/intel/vulkan/anv_cmd_buffer.c  | 52 --
 src/intel/vulkan/anv_pipeline.c|  4 +++
 src/intel/vulkan/anv_private.h |  1 -
 src/intel/vulkan/gen7_cmd_buffer.c | 10 ++--
 src/intel/vulkan/gen8_cmd_buffer.c | 13 --
 src/intel/vulkan/genX_cmd_buffer.c |  4 +--
 src/intel/vulkan/genX_pipeline.c   | 12 ++---
 7 files changed, 42 insertions(+), 54 deletions(-)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index bba24e8..464b56a 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1059,24 +1059,14 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer 
*cmd_buffer)
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
-   const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
-   const unsigned push_constant_data_size =
-  (local_id_dwords + prog_data->nr_params) * 4;
-   const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 
32);
-   const unsigned param_aligned_count =
-  reg_aligned_constant_size / sizeof(uint32_t);
-
/* If we don't actually have any push constants, bail. */
-   if (reg_aligned_constant_size == 0)
+   if (cs_prog_data->push.total.size == 0)
   return (struct anv_state) { .offset = 0 };
 
-   const unsigned threads = pipeline->cs_thread_width_max;
-   const unsigned total_push_constants_size =
-  reg_aligned_constant_size * threads;
const unsigned push_constant_alignment =
   cmd_buffer->device->info.gen < 8 ? 32 : 64;
const unsigned aligned_total_push_constants_size =
-  ALIGN(total_push_constants_size, push_constant_alignment);
+  ALIGN(cs_prog_data->push.total.size, push_constant_alignment);
struct anv_state state =
   anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
  aligned_total_push_constants_size,
@@ -1085,21 +1075,33 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer 
*cmd_buffer)
/* Walk through the param array and fill the buffer with data */
uint32_t *u32_map = state.map;
 
-   brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads,
-reg_aligned_constant_size);
-
-   /* Setup uniform data for the first thread */
-   for (unsigned i = 0; i < prog_data->nr_params; i++) {
-  uint32_t offset = (uintptr_t)prog_data->param[i];
-  u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset);
+   if (cs_prog_data->push.cross_thread.size > 0) {
+  assert(cs_prog_data->thread_local_id_index < 0 ||
+ cs_prog_data->thread_local_id_index >=
+cs_prog_data->push.cross_thread.dwords);
+  for (unsigned i = 0;
+   i < cs_prog_data->push.cross_thread.dwords;
+   i++) {
+ uint32_t offset = (uintptr_t)prog_data->param[i];
+ u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
+  }
}
 
-   /* Copy uniform data from the first thread to every other thread */
-   const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t);
-   for (unsigned t = 1; t < threads; t++) {
-  memcpy(&u32_map[t * param_aligned_count + local_id_dwords],
- &u32_map[local_id_dwords],
- uniform_data_size);
+   if (cs_prog_data->push.per_thread.size > 0) {
+  for (unsigned t = 0; t < cs_prog_data->threads; t++) {
+ uint32_t *t_u32_map =
+&u32_map[8 * t + cs_prog_data->push.cross_thread.dwords];
+ for (unsigned si = cs_prog_data->push.cross_thread.dwords, di = 0;
+  si < prog_data->nr_params;
+  si++, di++) {
+if (si != cs_prog_data->thread_local_id_index) {
+   uint32_t offset = (uintptr_t)prog_data->param[si];
+   t_u32_map[di] = *(uint32_t *)((uint8_t *)data + offset);
+} else {
+   t_u32_map[di] = t * cs_prog_data->simd_size;
+}
+ }
+  }
}
 
if (!cmd_buffer->device->info.has_llc)
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 8021348..0a61e3a 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -204,6 +204,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
nir = brw_preprocess_nir(compiler, nir);
 
nir_lower_system_values(nir);
+   brw_nir_lower_intrinsics(nir);
const bool is_scalar = compiler->scalar_stage[nir->stage];
anv_nir_lower_uniforms(nir, is_scalar);
 
@@ -368,6 +369,9 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
   pipeline->needs_data_cache = true;
}
 
+   if (stage == MESA_SHADER_COMPUTE)
+  prog_data->nr_params++; /* The CS Thread ID uniform */
+
if (nir->info.num_ssbos > 0)
   pipeline->needs_data_cache = true;
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan

Re: [Mesa-dev] [PATCH 1/2] nvc0: drop unused surfaces formats conversion builtins

2016-05-27 Thread Ilia Mirkin
But it will be used in the future, when we need to support dynamic
formats, with OpenCL. I'd rather leave this in.

  -ilia

On Fri, May 27, 2016 at 4:14 AM, Samuel Pitoiset
 wrote:
> This codegen lib code is no longer used for Kepler since we convert
> the formats directly in the lowering pass.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/codegen/lib/gk104.asm | 459 
> --
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   |  52 ---
>  2 files changed, 511 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/lib/gk104.asm 
> b/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
> index cd65b54..ef51406 100644
> --- a/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
> +++ b/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
> @@ -87,465 +87,6 @@ gk104_div_s32:
> $p2 cvt s32 $r1 neg s32 $r1
> long ret
>
> -// SULDP [for each format]
> -// $r4d: address
> -// $r2: surface info (format)
> -// $p0: access predicate
> -// $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg)
> -//
> -// RGBA32
> -$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
> -long ret
> -// RGBA16_UNORM
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
> -cvt rn f32 $r3 u16 1 $r1
> -cvt rn f32 $r2 u16 0 $r1
> -mul f32 $r3 $r3 0x37800074
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -cvt rn f32 $r1 u16 1 $r0
> -mul f32 $r2 $r2 0x37800074
> -cvt rn f32 $r0 u16 0 $r0
> -mul f32 $r1 $r1 0x37800074
> -mul f32 $r0 $r0 0x37800074
> -long ret
> -// RGBA16_SNORM
> -$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
> -cvt rn f32 $r3 s16 1 $r1
> -cvt rn f32 $r2 s16 0 $r1
> -mul f32 $r3 $r3 0x38000187
> -cvt rn f32 $r1 s16 1 $r0
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -mul f32 $r2 $r2 0x38000187
> -cvt rn f32 $r0 s16 0 $r0
> -mul f32 $r1 $r1 0x38000187
> -mul f32 $r0 $r0 0x38000187
> -long ret
> -// RGBA16_SINT
> -$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
> -cvt s32 $r3 s16 1 $r1
> -cvt s32 $r2 s16 0 $r1
> -cvt s32 $r1 s16 1 $r0
> -cvt s32 $r0 s16 0 $r0
> -long ret
> -// RGBA16_UINT
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
> -cvt u32 $r3 u16 1 $r1
> -cvt u32 $r2 u16 0 $r1
> -cvt u32 $r1 u16 1 $r0
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -cvt u32 $r0 u16 0 $r0
> -long ret
> -// RGBA16_FLOAT
> -$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
> -cvt f32 $r3 f16 $r1 1
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -cvt f32 $r2 f16 $r1 0
> -cvt f32 $r1 f16 $r0 1
> -cvt f32 $r0 f16 $r0 0
> -long ret
> -// RG32_FLOAT
> -$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
> -long mov b32 $r2 0x
> -long mov b32 $r3 0x3f80
> -long ret
> -// RG32_xINT
> -$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
> -long mov b32 $r2 0x
> -long mov b32 $r3 0x0001
> -long ret
> -// RGB10A2_UNORM
> -$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
> -ext u32 $r1 $r0 0x0a0a
> -long mov b32 $r3 0x3f80
> -ext u32 $r2 $r0 0x0a14
> -long and b32 $r0 $r0 0x3ff
> -cvt rn f32 $r2 u16 0 $r2
> -cvt rn f32 $r1 u16 0 $r1
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -mul f32 $r2 $r2 0x3a802007
> -cvt rn f32 $r0 u16 0 $r0
> -mul f32 $r1 $r1 0x3a802007
> -mul f32 $r0 $r0 0x3a802007
> -long ret
> -// RGB10A2_UINT
> -$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
> -set $p1 0x1 $p1 xor not $p2
> -sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> -$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
> -$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
> -ext u32 $r1 $r0 0x0a0a
> -long mov b32 $r3 0x0001
> -ext u32 $r2 $r0 0x0a14
> -long and b32

Re: [Mesa-dev] [PATCH 2/2] nvc0: remove outdated surfaces validation code for GK104

2016-05-27 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin 

On Fri, May 27, 2016 at 4:14 AM, Samuel Pitoiset
 wrote:
> This code was used for validating surfaces with compute but now we use
> pipe_image_view instead. Anyway, surfaces support should be
> re-introduced properly once OpenCL happens.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 70 
> -
>  1 file changed, 70 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index 7334c5f..2e3554d 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -223,76 +223,6 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
> }
>  }
>
> -/* Will be removed once images are completely done. */
> -#if 0
> -static void
> -nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
> -{
> -   struct nvc0_screen *screen = nvc0->screen;
> -   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> -   struct nv50_surface *sf;
> -   struct nv04_resource *res;
> -   uint32_t mask;
> -   unsigned i;
> -   const unsigned t = 1;
> -   uint64_t address;
> -
> -   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
> -
> -   mask = nvc0->surfaces_dirty[t];
> -   while (mask) {
> -  i = ffs(mask) - 1;
> -  mask &= ~(1 << i);
> -
> -  /*
> -   * NVE4's surface load/store instructions receive all the information
> -   * directly instead of via binding points, so we have to supply them.
> -   */
> -  BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
> -  PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
> -  PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
> -  BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
> -  PUSH_DATA (push, 64);
> -  PUSH_DATA (push, 1);
> -  BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
> -  PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
> -
> -  nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
> -
> -  sf = nv50_surface(nvc0->surfaces[t][i]);
> -  if (sf) {
> - res = nv04_resource(sf->base.texture);
> -
> - if (sf->base.writable)
> -BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
> - else
> -BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
> -  }
> -   }
> -   if (nvc0->surfaces_dirty[t]) {
> -  BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
> -  PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
> -   }
> -
> -   /* re-reference non-dirty surfaces */
> -   mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
> -   while (mask) {
> -  i = ffs(mask) - 1;
> -  mask &= ~(1 << i);
> -
> -  sf = nv50_surface(nvc0->surfaces[t][i]);
> -  res = nv04_resource(sf->base.texture);
> -
> -  if (sf->base.writable)
> - BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
> -  else
> - BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
> -   }
> -
> -   nvc0->surfaces_dirty[t] = 0;
> -}
> -#endif
> -
>  /* Thankfully, textures with samplers follow the normal rules. */
>  static void
>  nve4_compute_validate_samplers(struct nvc0_context *nvc0)
> --
> 2.8.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >