[Mesa-dev] [PATCH v2] nir: Fix output swizzle in get_mul_for_src

2015-05-28 Thread Iago Toral Quiroga
When we compute the output swizzle we want to consider the number of
components in the add operation. So far we were using the writemask
of the multiplication for this instead, which is not correct.
---
 src/glsl/nir/nir_opt_peephole_ffma.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c 
b/src/glsl/nir/nir_opt_peephole_ffma.c
index b430eac..798506b 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def)
 }
 
 static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+get_mul_for_src(nir_alu_src *src, int num_components,
+uint8_t swizzle[4], bool *negate, bool *abs)
 {
assert(src->src.is_ssa && !src->abs && !src->negate);
 
@@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool 
*negate, bool *abs)
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
-  alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+  alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, 
abs);
   break;
 
case nir_op_fneg:
-  alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+  alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, 
abs);
   *negate = !*negate;
   break;
 
case nir_op_fabs:
-  alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+  alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, 
abs);
   *negate = false;
   *abs = true;
   break;
@@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool 
*negate, bool *abs)
if (!alu)
   return NULL;
 
-   for (unsigned i = 0; i < 4; i++) {
-  if (!(alu->dest.write_mask & (1 << i)))
- break;
-
+   for (unsigned i = 0; i < num_components; i++)
   swizzle[i] = swizzle[src->swizzle[i]];
-   }
 
return alu;
 }
@@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void 
*void_state)
  negate = false;
  abs = false;
 
- mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+ mul = get_mul_for_src(&add->src[add_mul_src],
+   add->dest.dest.ssa.num_components,
+   swizzle, &negate, &abs);
 
  if (mul != NULL)
 break;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC][PATCH] gallivm: Avoid conflict with LLVM's definition of DEBUG

2015-05-28 Thread Michel Dänzer
From: Michel Dänzer 

Fixes build failure with --enable-debug:

  Compiling src/gallium/auxiliary/gallivm/lp_bld_misc.cpp ...
In file included from llvm/include/llvm/Object/RelocVisitor.h:23:0,
 from llvm/include/llvm/DebugInfo/DIContext.h:21,
 from llvm/include/llvm/ExecutionEngine/RuntimeDyld.h:21,
 from llvm/include/llvm/ExecutionEngine/ExecutionEngine.h:18,
 from src/gallium/auxiliary/gallivm/lp_bld_misc.cpp:56:
llvm/include/llvm/Support/Debug.h:92:0: warning: "DEBUG" redefined
 #define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
 ^
:0:0: note: this is the location of the previous definition
:0:7: error: expected identifier before numeric constant
llvm/include/llvm/Support/COFF.h:541:5: note: in expansion of macro ‘DEBUG’
 DEBUG,
 ^
:0:7: error: expected ‘}’ before numeric constant
llvm/include/llvm/Support/COFF.h:541:5: note: in expansion of macro ‘DEBUG’
 DEBUG,
 ^
:0:7: error: expected unqualified-id before numeric constant
llvm/include/llvm/Support/COFF.h:541:5: note: in expansion of macro ‘DEBUG’
 DEBUG,
 ^
In file included from llvm/include/llvm/Object/COFF.h:19:0,
 from llvm/include/llvm/Object/RelocVisitor.h:20,
 from llvm/include/llvm/DebugInfo/DIContext.h:21,
 from llvm/include/llvm/ExecutionEngine/RuntimeDyld.h:21,
 from llvm/include/llvm/ExecutionEngine/ExecutionEngine.h:18,
 from src/gallium/auxiliary/gallivm/lp_bld_misc.cpp:56:
llvm/include/llvm/Support/COFF.h:684:1: error: expected declaration before ‘}’ 
token
 } // End namespace llvm.
 ^

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90621
Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=23628
Signed-off-by: Michel Dänzer 
---

Mesa/Gallium community: Does this look like a reasonable way to address
this issue?

LLVM community: Is it necessary to expose the LLVM DEBUG macro in public
headers?

 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index ffed9e6..7763da7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -50,6 +50,14 @@
 
 #include 
 
+/* Save our definition of DEBUG and undefine it to avoid conflict with LLVM's
+ * definition
+ */
+#ifdef DEBUG
+#define MESA_DEBUG DEBUG
+#undef DEBUG
+#endif
+
 #include 
 #include 
 #include 
@@ -70,6 +78,12 @@
 #include 
 #include 
 
+/* Undefine LLVM's definition of DEBUG and restore our definition (if any) */
+#undef DEBUG
+#ifdef MESA_DEBUG
+#define DEBUG MESA_DEBUG
+#endif
+
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] clover: fix event handling of buffer operations

2015-05-28 Thread Grigori Goronzy
Wrap MapBuffer and MapImage as hard_event actions, like other
operations. This enables correct profiling. Also make sure to wait
for events to finish when blocking is requested by the caller.
---
 src/gallium/state_trackers/clover/api/transfer.cpp | 50 --
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
b/src/gallium/state_trackers/clover/api/transfer.cpp
index fdb9405..4986f53 100644
--- a/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -270,6 +270,18 @@ namespace {
src_obj->resource(q), src_orig);
   };
}
+
+   ///
+   /// Resource mapping
+   ///
+   template
+   std::function
+   map_resource_op(command_queue &q, T obj, cl_map_flags flags, bool blocking,
+   const vector_t &origin, const vector_t ®ion, void **map) 
{
+  return [=, &q](event &) {
+ *map = obj->resource(q).add_map(q, flags, blocking, origin, region);
+  };
+   }
 }
 
 CLOVER_API cl_int
@@ -363,6 +375,10 @@ clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem 
d_mem, cl_bool blocking,
region));
 
ret_object(rd_ev, hev);
+
+   if (blocking)
+  hev().wait();
+
return CL_SUCCESS;
 
 } catch (error &e) {
@@ -400,6 +416,10 @@ clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem 
d_mem, cl_bool blocking,
region));
 
ret_object(rd_ev, hev);
+
+   if (blocking)
+  hev().wait();
+
return CL_SUCCESS;
 
 } catch (error &e) {
@@ -505,6 +525,10 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
region));
 
ret_object(rd_ev, hev);
+
+   if (blocking)
+  hev().wait();
+
return CL_SUCCESS;
 
 } catch (error &e) {
@@ -539,6 +563,10 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
region));
 
ret_object(rd_ev, hev);
+
+   if (blocking)
+  hev().wait();
+
return CL_SUCCESS;
 
 } catch (error &e) {
@@ -665,10 +693,17 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
validate_object(q, mem, obj_origin, obj_pitch, region);
validate_map_flags(mem, flags);
 
-   void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
+   void *map = nullptr;
+   auto hev = create(
+  q, CL_COMMAND_MAP_BUFFER, deps,
+  map_resource_op(q, &mem, flags, blocking, obj_origin, region, &map));
 
-   ret_object(rd_ev, create(q, CL_COMMAND_MAP_BUFFER, deps));
+   ret_object(rd_ev, hev);
ret_error(r_errcode, CL_SUCCESS);
+
+   if (blocking)
+  hev().wait();
+
return map;
 
 } catch (error &e) {
@@ -693,10 +728,17 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
validate_object(q, img, origin, region);
validate_map_flags(img, flags);
 
-   void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
+   void *map = nullptr;
+   auto hev = create(
+  q, CL_COMMAND_MAP_IMAGE, deps,
+  map_resource_op(q, &img, flags, blocking, origin, region, &map));
 
-   ret_object(rd_ev, create(q, CL_COMMAND_MAP_IMAGE, deps));
+   ret_object(rd_ev, hev);
ret_error(r_errcode, CL_SUCCESS);
+
+   if (blocking)
+  hev().wait();
+
return map;
 
 } catch (error &e) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] clover: check clEnqueueMap* for map errors

2015-05-28 Thread Grigori Goronzy
Mapping can fail, and this should be handled. Return the proper error
code and abort the associated event in this case.
---
 src/gallium/state_trackers/clover/api/transfer.cpp | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
b/src/gallium/state_trackers/clover/api/transfer.cpp
index 4986f53..275059c 100644
--- a/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -699,11 +699,17 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
   map_resource_op(q, &mem, flags, blocking, obj_origin, region, &map));
 
ret_object(rd_ev, hev);
-   ret_error(r_errcode, CL_SUCCESS);
 
if (blocking)
   hev().wait();
 
+   if (!map) {
+  hev().abort(CL_MAP_FAILURE);
+  ret_error(r_errcode, CL_MAP_FAILURE);
+  return NULL;
+   }
+
+   ret_error(r_errcode, CL_SUCCESS);
return map;
 
 } catch (error &e) {
@@ -734,11 +740,17 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
   map_resource_op(q, &img, flags, blocking, origin, region, &map));
 
ret_object(rd_ev, hev);
-   ret_error(r_errcode, CL_SUCCESS);
 
if (blocking)
   hev().wait();
 
+   if (!map) {
+  hev().abort(CL_MAP_FAILURE);
+  ret_error(r_errcode, CL_MAP_FAILURE);
+  return NULL;
+   }
+
+   ret_error(r_errcode, CL_SUCCESS);
return map;
 
 } catch (error &e) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/15] egl: add eglGetSyncAttrib

2015-05-28 Thread Marek Olšák
Hi Chad,

A new patch is attached.

Marek

On Wed, May 27, 2015 at 8:59 PM, Chad Versace  wrote:
> On Wed 13 May 2015, Marek Olšák wrote:
>> From: Marek Olšák 
>>
>> ---
>>  src/egl/main/eglapi.c  | 14 +-
>>  src/egl/main/eglapi.h  |  2 +-
>>  src/egl/main/eglsync.c |  2 +-
>>  src/egl/main/eglsync.h |  2 +-
>>  4 files changed, 16 insertions(+), 4 deletions(-)
>>
>
> I see two issues below.
>
>> diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
>> index 544f7e4..6457798 100644
>> --- a/src/egl/main/eglapi.c
>> +++ b/src/egl/main/eglapi.c
>
>
>> @@ -1558,6 +1559,17 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, 
>> EGLint attribute, EGLint *valu
>>  }
>>
>>
>> +EGLBoolean EGLAPIENTRY
>> +eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint 
>> *value)
>> +{
>> +   EGLAttrib attrib = *value;
>> +   EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
>> +
>> +   *value = attrib;
>> +   return result;
>> +}
>
> This change violates the EGL_KHR_fence_sync spec, which says this
> regarding eglGetSyncAttribKHR:
>
> If any error occurs, <*value> is not modified.
>
> I think it's a good idea to add this quote to the function body so that
> future devs don't accidentally make the same mistake.
>
>
>> diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h
>> index 44f589d..fdd3b05 100644
>> --- a/src/egl/main/eglapi.h
>> +++ b/src/egl/main/eglapi.h
>> @@ -91,7 +91,7 @@ typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, 
>> _EGLDisplay *dpy, _EGLSy
>>  typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, 
>> _EGLSync *sync, EGLint flags, EGLTime timeout);
>>  typedef EGLint (*WaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync 
>> *sync);
>>  typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, 
>> _EGLSync *sync, EGLenum mode);
>> -typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, 
>> _EGLSync *sync, EGLint attribute, EGLint *value);
>> +typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, 
>> _EGLSync *sync, EGLint attribute, EGLAttrib *value);
>
> I think changing this change is potentially dangerous, because
> post-patch GetSyncAttribKHR_t has the signature of eglGetSyncAttrib and
> *not* eglGetSyncAttribKHR. The code will compile, but this discrepancy
> looks like a honeypot for accidental function misuse.
>
> And...
>
>>  #ifdef EGL_NOK_swap_region
>> diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c
>> index 205cdc0..a09d991 100644
>> --- a/src/egl/main/eglsync.c
>> +++ b/src/egl/main/eglsync.c
>> @@ -142,7 +142,7 @@ _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum 
>> type,
>>
>>  EGLBoolean
>>  _eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
>> - EGLint attribute, EGLint *value)
>> + EGLint attribute, EGLAttrib *value)
>
> I have the same issue here. Let's avoid introducing confusing
> discrepancies between function names and their signatures. Just name
> this one and the typedef to '_eglGetSyncAttrib' and 'GetSyncAttrib_t'.
From f4fb01a5ebc5ffd11be78fcb9035767e8b0d593c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= 
Date: Tue, 12 May 2015 18:14:31 +0200
Subject: [PATCH] egl: add eglGetSyncAttrib (v2)

v2: - don't modify "value" in eglGetSyncAttribKHR after an error
- rename _egl_api::GetSyncAttribKHR -> GetSyncAttrib
- rename GetSyncAttribKHR_t -> GetSyncAttrib_t
- rename _eglGetSyncAttribKHR to _eglGetSyncAttrib
---
 src/egl/main/eglapi.c   | 25 ++---
 src/egl/main/eglapi.h   |  4 ++--
 src/egl/main/eglfallbacks.c |  2 +-
 src/egl/main/eglsync.c  |  4 ++--
 src/egl/main/eglsync.h  |  4 ++--
 5 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 03a55f1..9696869 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1427,8 +1427,8 @@ eglSignalSyncKHR(EGLDisplay dpy, EGLSync sync, EGLenum mode)
 }
 
 
-static EGLBoolean EGLAPIENTRY
-eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
+EGLBoolean EGLAPIENTRY
+eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value)
 {
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLSync *s = _eglLookupSync(sync, disp);
@@ -1438,12 +1438,30 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *valu
_EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
assert(disp->Extensions.KHR_reusable_sync ||
   disp->Extensions.KHR_fence_sync);
-   ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value);
+   ret = drv->API.GetSyncAttrib(drv, disp, s, attribute, value);
 
RETURN_EGL_EVAL(disp, ret);
 }
 
 
+static EGLBoolean EGLAPIENTRY
+eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
+{
+   EGLAttrib attrib = *value;
+   EGLBoolean result = eglGetSyncAttrib(dpy,

[Mesa-dev] [Bug 90621] Mesa fail to build from git

2015-05-28 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=90621

José Fonseca  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #1 from José Fonseca  ---
Should be fixed with

commit 09d6243aed016eed4518435c9885275dbb6d2aa9
Author: Jose Fonseca 
Date:   Thu May 28 10:11:36 2015 +0100

gallivm: Workaround LLVM PR23628.

Temporarily undefine DEBUG macro while including LLVM C++ headers,
leveraging the push/pop_macro pragmas, which are supported both by GCC
and MSVC.

https://bugs.freedesktop.org/show_bug.cgi?id=90621

Trivial.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC][PATCH] gallivm: Avoid conflict with LLVM's definition of DEBUG

2015-05-28 Thread Jose Fonseca

Hi Michel,

I'm sorry: I hadn't notice your review-request, and just pushed a 
similar fix.


I used push/pop_macro pragma instead. I think it's portable enough (at 
least gcc and msvc support it).  Your's is probable more portable 
though. I'm OK either way.



Like you said, this is a workaround. LLVM's reliance on DEBUG is a bad 
idea, as Mesa is not the only user of that macro -- several Microsoft 
SDKs use DEBUG for the same end --, so it's better not to rely on it.



Jose

On 28/05/15 09:09, Michel Dänzer wrote:

From: Michel Dänzer 

Fixes build failure with --enable-debug:

   Compiling src/gallium/auxiliary/gallivm/lp_bld_misc.cpp ...
In file included from llvm/include/llvm/Object/RelocVisitor.h:23:0,
  from llvm/include/llvm/DebugInfo/DIContext.h:21,
  from llvm/include/llvm/ExecutionEngine/RuntimeDyld.h:21,
  from llvm/include/llvm/ExecutionEngine/ExecutionEngine.h:18,
  from src/gallium/auxiliary/gallivm/lp_bld_misc.cpp:56:
llvm/include/llvm/Support/Debug.h:92:0: warning: "DEBUG" redefined
  #define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
  ^
:0:0: note: this is the location of the previous definition
:0:7: error: expected identifier before numeric constant
llvm/include/llvm/Support/COFF.h:541:5: note: in expansion of macro ‘DEBUG’
  DEBUG,
  ^
:0:7: error: expected ‘}’ before numeric constant
llvm/include/llvm/Support/COFF.h:541:5: note: in expansion of macro ‘DEBUG’
  DEBUG,
  ^
:0:7: error: expected unqualified-id before numeric constant
llvm/include/llvm/Support/COFF.h:541:5: note: in expansion of macro ‘DEBUG’
  DEBUG,
  ^
In file included from llvm/include/llvm/Object/COFF.h:19:0,
  from llvm/include/llvm/Object/RelocVisitor.h:20,
  from llvm/include/llvm/DebugInfo/DIContext.h:21,
  from llvm/include/llvm/ExecutionEngine/RuntimeDyld.h:21,
  from llvm/include/llvm/ExecutionEngine/ExecutionEngine.h:18,
  from src/gallium/auxiliary/gallivm/lp_bld_misc.cpp:56:
llvm/include/llvm/Support/COFF.h:684:1: error: expected declaration before ‘}’ 
token
  } // End namespace llvm.
  ^

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90621
Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=23628
Signed-off-by: Michel Dänzer 
---

Mesa/Gallium community: Does this look like a reasonable way to address
this issue?

LLVM community: Is it necessary to expose the LLVM DEBUG macro in public
headers?

  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index ffed9e6..7763da7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -50,6 +50,14 @@

  #include 

+/* Save our definition of DEBUG and undefine it to avoid conflict with LLVM's
+ * definition
+ */
+#ifdef DEBUG
+#define MESA_DEBUG DEBUG
+#undef DEBUG
+#endif
+
  #include 
  #include 
  #include 
@@ -70,6 +78,12 @@
  #include 
  #include 

+/* Undefine LLVM's definition of DEBUG and restore our definition (if any) */
+#undef DEBUG
+#ifdef MESA_DEBUG
+#define DEBUG MESA_DEBUG
+#endif
+
  #include "pipe/p_config.h"
  #include "util/u_debug.h"
  #include "util/u_cpu_detect.h"



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Do not use NoFramePointerElim with LLVM 3.7

2015-05-28 Thread Marek Olšák
The disassemble function does more harm than good and is often the
most often broken function after an LLVM update. Shouldn't we remove
it to make our lives easier?

Marek


On Wed, May 27, 2015 at 7:27 AM, Vinson Lee  wrote:
> TargetOptions::NoFramePointerElim was removed in llvm-3.7.0svn r238244
> "Remove NoFramePointerElim and NoFramePointerElimOverride from
> TargetOptions and remove ExecutionEngine's dependence on CodeGen. NFC."
>
> Signed-off-by: Vinson Lee 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 ++
>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> index be3e834..76c302f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> @@ -277,8 +277,10 @@ disassemble(const void* func, llvm::raw_ostream & Out)
> options.StackAlignmentOverride = 4;
>  #endif
>  #if defined(DEBUG) || defined(PROFILE)
> +#if HAVE_LLVM < 0x0307
> options.NoFramePointerElim = true;
>  #endif
> +#endif
> OwningPtr TM(T->createTargetMachine(Triple, 
> sys::getHostCPUName(), "", options));
>
> /*
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index 5e8a634..ffed9e6 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -439,8 +439,10 @@ 
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
>  #if HAVE_LLVM < 0x0304
> options.NoFramePointerElimNonLeaf = true;
>  #endif
> +#if HAVE_LLVM < 0x0307
> options.NoFramePointerElim = true;
>  #endif
> +#endif
>
> builder.setEngineKind(EngineKind::JIT)
>.setErrorStr(&Error)
> --
> 2.4.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage

2015-05-28 Thread Marek Olšák
I don't understand. Using size_t should prevent the integer overflow.
Is there anything else wrong other than no fail path for malloc? I
also don't understand how calloc can help here.

Marek


On Wed, May 27, 2015 at 9:07 PM, Chad Versace  wrote:
> On Fri 15 May 2015, Emil Velikov wrote:
>> On 12/05/15 22:54, Marek Olšák wrote:
>> > From: Marek Olšák 
>> >
>> > ---
>> >  src/egl/main/eglapi.c | 38 ++
>> >  1 file changed, 38 insertions(+)
>> >
>> > diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
>> > index 6457798..34a113b 100644
>> > --- a/src/egl/main/eglapi.c
>> > +++ b/src/egl/main/eglapi.c
>> > @@ -251,6 +251,30 @@ _eglUnlockDisplay(_EGLDisplay *dpy)
>> >  }
>> >
>> >
>> > +static EGLint *
>> > +_eglConvertAttribsToInt(const EGLAttrib *attr_list)
>> > +{
>> > +   EGLint *int_attribs = NULL;
>> > +
>> > +   /* Convert attributes from EGLAttrib[] to EGLint[] */
>> > +   if (attr_list) {
>> > +  int i, size = 0;
>> > +
>> > +  while (attr_list[size] != EGL_NONE)
>> > + size += 2;
>> > +
>> > +  if (size) {
>> > + size += 1; /* add space for EGL_NONE */
>> > + int_attribs = malloc(size * sizeof(int_attribs[0]));
>> > +
>> > + for (i = 0; i < size; i++)
>> > +int_attribs[i] = attr_list[i];
>
>> In the unlikely event that malloc fails, it'll be nice to not crash.
>
> NAK.
>
> There is a stack overflow vulnerability here, even when malloc succeeds.
> An attacker can pass a very large but valid `EGLint *attrib_list` into
> an EGL entry point, forcing the size calculation given to malloc to
> overflow to a small positive integer.  Then _eglConvertAttribsToInt will
> blithely copy a portion (perhaps most) of the attacker's attrib list onto
> the stack!
>
> To prevent the stack overflow, _eglConvertAttribsToInt should use
> calloc() and abort if allocation fails.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/7] tgsi: add support for geometry shader streams.

2015-05-28 Thread Marek Olšák
Hi,

The trailing comma is allowed in array and structure initializers and
is ignored according to the standard.

Marek

On Wed, May 27, 2015 at 8:19 PM, Michael Schellenberger Costa
 wrote:
> Hi,
>
> Am 27/05/2015 um 09:45 schrieb Dave Airlie:
>> This adds support to retrieve the primitive counts
>> for each stream, along with the offset for each
>> primitive into the output array.
>>
>> It also adds support for parsing the stream argument
>> to the emit and end instructions.
>>
>> Signed-off-by: Dave Airlie 
>> ---
>>  src/gallium/auxiliary/tgsi/tgsi_exec.c | 59 
>> ++
>>  src/gallium/auxiliary/tgsi/tgsi_exec.h | 14 ++--
>>  2 files changed, 58 insertions(+), 15 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> index a098a82..f080385 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> @@ -706,7 +706,22 @@ enum tgsi_exec_datatype {
>>  #define TEMP_OUTPUT_C  TGSI_EXEC_TEMP_OUTPUT_C
>>  #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
>>  #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
>> -
>> +#define TEMP_PRIMITIVE_S1_I   TGSI_EXEC_TEMP_PRIMITIVE_S1_I
>> +#define TEMP_PRIMITIVE_S1_C   TGSI_EXEC_TEMP_PRIMITIVE_S1_C
>> +#define TEMP_PRIMITIVE_S2_I   TGSI_EXEC_TEMP_PRIMITIVE_S2_I
>> +#define TEMP_PRIMITIVE_S2_C   TGSI_EXEC_TEMP_PRIMITIVE_S2_C
>> +#define TEMP_PRIMITIVE_S3_I   TGSI_EXEC_TEMP_PRIMITIVE_S3_I
>> +#define TEMP_PRIMITIVE_S3_C   TGSI_EXEC_TEMP_PRIMITIVE_S3_C
>> +
>> +static const struct {
>> +   int idx;
>> +   int chan;
>> +} temp_prim_idxs[] = {
>> +   { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
>> +   { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
>> +   { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
>> +   { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
>> +};
>
> Is that last comma intentional?
>
> Regards
> Michael
>
>>
>>  /** The execution mask depends on the conditional mask and the loop mask */
>>  #define UPDATE_EXEC_MASK(MACH) \
>> @@ -1848,35 +1863,51 @@ exec_kill(struct tgsi_exec_machine *mach,
>>  }
>>
>>  static void
>> -emit_vertex(struct tgsi_exec_machine *mach)
>> +emit_vertex(struct tgsi_exec_machine *mach,
>> +const struct tgsi_full_instruction *inst)
>>  {
>> +   union tgsi_exec_channel r[1];
>> +   unsigned stream_id;
>> +   unsigned *prim_count;
>> /* FIXME: check for exec mask correctly
>> unsigned i;
>> for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
>>   if ((mach->ExecMask & (1 << i)))
>> */
>> +   IFETCH(&r[0], 0, TGSI_CHAN_X);
>> +   stream_id = r[0].u[0];
>> +   prim_count = 
>> &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
>> if (mach->ExecMask) {
>> -  if 
>> (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] 
>> >= mach->MaxOutputVertices)
>> +  if (mach->Primitives[stream_id][*prim_count] >= 
>> mach->MaxOutputVertices)
>>   return;
>>
>> +  mach->PrimitiveOffsets[stream_id][*prim_count] = 
>> mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
>>mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 
>> mach->NumOutputs;
>> -  
>> mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
>> +  mach->Primitives[stream_id][*prim_count]++;
>> }
>>  }
>>
>>  static void
>> -emit_primitive(struct tgsi_exec_machine *mach)
>> +emit_primitive(struct tgsi_exec_machine *mach,
>> +   const struct tgsi_full_instruction *inst)
>>  {
>> -   unsigned *prim_count = 
>> &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
>> +   unsigned *prim_count;
>> +   union tgsi_exec_channel r[1];
>> +   unsigned stream_id = 0;
>> /* FIXME: check for exec mask correctly
>> unsigned i;
>> for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
>>   if ((mach->ExecMask & (1 << i)))
>> */
>> +   if (inst) {
>> +  IFETCH(&r[0], 0, TGSI_CHAN_X);
>> +  stream_id = r[0].u[0];
>> +   }
>> +   prim_count = 
>> &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
>> if (mach->ExecMask) {
>>++(*prim_count);
>>debug_assert((*prim_count * mach->NumOutputs) < 
>> mach->MaxGeometryShaderOutputs);
>> -  mach->Primitives[*prim_count] = 0;
>> +  mach->Primitives[stream_id][*prim_count] = 0;
>> }
>>  }
>>
>> @@ -1885,9 +1916,9 @@ conditional_emit_primitive(struct tgsi_exec_machine 
>> *mach)
>>  {
>> if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
>>int emitted_verts =
>> - 
>> mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
>> + 
>> mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]];
>>if (emitted_verts) {
>> - emit_primitive(mach);
>> + emit_primitive(mach, NULL);
>>}
>> }
>>  }
>> @@ -4596,11 +4627,11 @@ exec_instruction(
>>break;
>>
>> case TG

Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Neil Roberts
Ben Widawsky  writes:

> AFAICT, there is no real way to make sure a send message with EOT is
> properly ignored from compact, nor can I see a way to actually encode
> EOT while compacting. Before the single send optimization we'd always
> bail because we hit the is_immediate && !is_compactable_immediate
> case. However, with single send, is_immediate is not true, and so we
> end up trying to compact the un-compactible.
>
> Without this, any compacting single send instruction will hang because
> the EOT isn't there. I am not sure how I didn't hit this when I
> originally enabled the optimization. I didn't check if some
> surrounding code changed.
>
> NOTE: This needs another piglit run or two before merge.
>
> I know Neil and Matt were both looking into this. I did a quick search
> and didn't see any patches out there to handle this. Please ignore if
> this has already been sent by someone. (Direct me to it and I will
> review it).
>
> Cc: Matt Turner 
> Cc: Neil Roberts 
> Cc: Mark Janes 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 69cb114..67f0b45 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info 
> *devinfo,
>  static bool
>  has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
>  {
> +   /* EOT can only be mapped on a send if the src1 is an immediate */

Can we really map EOT if the src1 is immediate?

> +   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
> +brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&

Is there any reason to limit this to send and sendc? If there's no way
to map EOT why not just to if (brw_inst_eot(...)) return true?

For what it's worth, I ran my original patch¹ through shader-db and it
didn't make any difference, which is good.

Do we not also need to fix the problem with the destination register
being used as a temporary? This was mentioned by Matt on IRC. Maybe he
is looking into it?

Regards,
- Neil

1. https://github.com/bpeel/mesa/commit/6abda467fa3ad7060127

> +   brw_inst_eot(devinfo, src))
> +  return true;
> +
> /* Check for instruction bits that don't map to any of the fields of the
>  * compacted instruction.  The instruction cannot be compacted if any of
>  * them are set.  They overlap with:
> -- 
> 2.4.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 4/6] i965: Implement INTEL_performance_query extension

2015-05-28 Thread Petri Latvala

On 05/06/2015 03:53 AM, Robert Bragg wrote:

+static struct brw_perf_query_counter gen7_pipeline_statistics[] = {
+
+   STAT(IA_VERTICES_COUNT,   "N vertices submitted"),
+   STAT(IA_PRIMITIVES_COUNT, "N primitives submitted"),
+   STAT(VS_INVOCATION_COUNT, "N vertex shader invocations"),
+   STAT(HS_INVOCATION_COUNT, "N hull shader invocations"),
+   STAT(DS_INVOCATION_COUNT, "N domain shader invocations"),
+   STAT(GS_INVOCATION_COUNT, "N geometry shader invocations"),
+   STAT(GS_PRIMITIVES_COUNT, "N geometry shader primitives emitted"),
+   STAT(CL_INVOCATION_COUNT, "N primitives entering clipping"),
+   STAT(CL_PRIMITIVES_COUNT, "N primitives leaving clipping"),
+
+   /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
+* "Invocation counter is 4 times actual.  WA: SW to divide HW reported
+*  PS Invocations value by 4."
+*
+* Prior to Haswell, invocation count was counted by the WM, and it
+* buggily counted invocations in units of subspans (2x2 unit). To get the
+* correct value, the CS multiplied this by 4. With HSW the logic moved,
+* and correctly emitted the number of pixel shader invocations, but,
+* whomever forgot to undo the multiply by 4.
+*/
+   SCALED_STAT(PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations"),
+
+   STAT(PS_DEPTH_COUNT,  "N z-pass fragments"),
+
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(0), "SO_NUM_PRIMS_WRITTEN (Stream 
0)",
+  "N stream-out (stream 0) primitives (total)"),
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(1), "SO_NUM_PRIMS_WRITTEN (Stream 
1)",
+  "N stream-out (stream 1) primitives (total)"),
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(2), "SO_NUM_PRIMS_WRITTEN (Stream 
2)",
+  "N stream-out (stream 2) primitives (total)"),
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(3), "SO_NUM_PRIMS_WRITTEN (Stream 
3)",
+  "N stream-out (stream 3) primitives (total)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(0), "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+  "N stream-out (stream 0) primitives (written)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(1), "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+  "N stream-out (stream 1) primitives (written)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(2), "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+  "N stream-out (stream 2) primitives (written)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(3), "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+  "N stream-out (stream 3) primitives (written)"),
+};
+


Copy-paste error? SO_PRIM_STORAGE_NEEDED gets reported as 
SO_NUM_PRIMS_WRITTEN.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage

2015-05-28 Thread Eirik Byrkjeflot Anonsen
Marek Olšák  writes:

> I don't understand. Using size_t should prevent the integer overflow.
> Is there anything else wrong other than no fail path for malloc? I
> also don't understand how calloc can help here.
>
> Marek

"size * sizeof(int_attribs[0])" may overflow and thus wrap to a small
number. Using calloc, you'd have "calloc(size, sizeof(int_attribs[0]))",
moving the overflow inside calloc(). So if calloc() does its job
properly, it will protect against it.

eirik


> On Wed, May 27, 2015 at 9:07 PM, Chad Versace  wrote:
>> On Fri 15 May 2015, Emil Velikov wrote:
>>> On 12/05/15 22:54, Marek Olšák wrote:
>>> > From: Marek Olšák 
>>> >
>>> > ---
>>> >  src/egl/main/eglapi.c | 38 ++
>>> >  1 file changed, 38 insertions(+)
>>> >
>>> > diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
>>> > index 6457798..34a113b 100644
>>> > --- a/src/egl/main/eglapi.c
>>> > +++ b/src/egl/main/eglapi.c
>>> > @@ -251,6 +251,30 @@ _eglUnlockDisplay(_EGLDisplay *dpy)
>>> >  }
>>> >
>>> >
>>> > +static EGLint *
>>> > +_eglConvertAttribsToInt(const EGLAttrib *attr_list)
>>> > +{
>>> > +   EGLint *int_attribs = NULL;
>>> > +
>>> > +   /* Convert attributes from EGLAttrib[] to EGLint[] */
>>> > +   if (attr_list) {
>>> > +  int i, size = 0;
>>> > +
>>> > +  while (attr_list[size] != EGL_NONE)
>>> > + size += 2;
>>> > +
>>> > +  if (size) {
>>> > + size += 1; /* add space for EGL_NONE */
>>> > + int_attribs = malloc(size * sizeof(int_attribs[0]));
>>> > +
>>> > + for (i = 0; i < size; i++)
>>> > +int_attribs[i] = attr_list[i];
>>
>>> In the unlikely event that malloc fails, it'll be nice to not crash.
>>
>> NAK.
>>
>> There is a stack overflow vulnerability here, even when malloc succeeds.
>> An attacker can pass a very large but valid `EGLint *attrib_list` into
>> an EGL entry point, forcing the size calculation given to malloc to
>> overflow to a small positive integer.  Then _eglConvertAttribsToInt will
>> blithely copy a portion (perhaps most) of the attacker's attrib list onto
>> the stack!
>>
>> To prevent the stack overflow, _eglConvertAttribsToInt should use
>> calloc() and abort if allocation fails.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] clover: implement CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE

2015-05-28 Thread Grigori Goronzy
Work-group size should always be aligned to subgroup size; this is a
basic requirement, otherwise some work-items will be no-operation.

It might make sense to refine the value according to a kernel's
resource usage, but that's a possible optimization for the future.
---
 src/gallium/state_trackers/clover/api/kernel.cpp  | 2 +-
 src/gallium/state_trackers/clover/core/device.cpp | 5 +
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
b/src/gallium/state_trackers/clover/api/kernel.cpp
index 05cc392..857a152 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -169,7 +169,7 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id 
d_dev,
   break;
 
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
-  buf.as_scalar() = 1;
+  buf.as_scalar() = dev.subgroup_size();
   break;
 
case CL_KERNEL_PRIVATE_MEM_SIZE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 42b45b7..c42d1d2 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -185,6 +185,11 @@ device::max_block_size() const {
return { v.begin(), v.end() };
 }
 
+cl_uint
+device::subgroup_size() const {
+   return get_compute_param(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
+}
+
 std::string
 device::device_name() const {
return pipe->get_name(pipe);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index de5fc6b..2857847 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -67,6 +67,7 @@ namespace clover {
   bool has_doubles() const;
 
   std::vector max_block_size() const;
+  cl_uint subgroup_size() const;
   std::string device_name() const;
   std::string vendor_name() const;
   enum pipe_shader_ir ir_format() const;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium: add PIPE_COMPUTE_CAP_SUBGROUP_SIZE

2015-05-28 Thread Grigori Goronzy
We need this to implement OpenCL's
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.
---
 src/gallium/docs/source/screen.rst |  2 ++
 src/gallium/drivers/ilo/ilo_screen.c   |  8 
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  4 
 src/gallium/drivers/radeon/r600_pipe_common.c  |  6 ++
 src/gallium/drivers/radeon/r600_pipe_common.h  | 20 
 src/gallium/include/pipe/p_defines.h   |  3 ++-
 6 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 416ef2d..32c1e87 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -382,6 +382,8 @@ pipe_screen::get_compute_param.
   Value type: ``uint32_t``
 * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
   non-zero means yes, zero means no. Value type: ``uint32_t``
+* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
+  threads. Also known as wavefront size, warp size or SIMD width.
 
 .. _pipe_bind:
 
diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
b/src/gallium/drivers/ilo/ilo_screen.c
index b0fed73..f2a18b2 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -195,6 +195,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
   uint32_t max_clock_frequency;
   uint32_t max_compute_units;
   uint32_t images_supported;
+  uint32_t subgroup_size;
} val;
const void *ptr;
int size;
@@ -286,6 +287,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
   ptr = &val.images_supported;
   size = sizeof(val.images_supported);
   break;
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+  /* best case is SIMD32 */
+  val.subgroup_size = 32;
+
+  ptr = &val.subgroup_size;
+  size = sizeof(val.subgroup_size);
+  break;
default:
   ptr = NULL;
   size = 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 1ca997a..f6bef83 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -340,6 +340,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   enum pipe_compute_cap param, void *data)
 {
uint64_t *data64 = (uint64_t *)data;
+   uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
 
switch (param) {
@@ -371,6 +372,9 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
   data64[0] = 4096;
   return 8;
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+  data32[0] = 32;
+  return 4;
default:
   return 0;
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 42e681d..5494cb3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -637,6 +637,12 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
break; /* unused */
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+   if (ret) {
+   uint32_t *subgroup_size = ret;
+   *subgroup_size = r600_wavefront_size(rscreen->family);
+   }
+   return sizeof(uint32_t);
}
 
 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 6ce81d3..51fd016 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -570,6 +570,26 @@ static inline unsigned r600_tex_aniso_filter(unsigned 
filter)
 /* else */return 4;
 }
 
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+   switch (family) {
+   case CHIP_RV610:
+   case CHIP_RS780:
+   case CHIP_RV620:
+   case CHIP_RS880:
+   return 16;
+   case CHIP_RV630:
+   case CHIP_RV635:
+   case CHIP_RV730:
+   case CHIP_RV710:
+   case CHIP_PALM:
+   case CHIP_CEDAR:
+   return 32;
+   default:
+   return 64;
+   }
+}
+
 #define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, 
fmt, ##args); \
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 8fabf5e..b50ae2b 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -699,7 +699,8 @@ enum pipe_compute_cap
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
-   PIPE_COMPUTE_CAP_

Re: [Mesa-dev] [PATCH 1/2] configure.ac: enable building GLES1 and GLES2 by default

2015-05-28 Thread Emil Velikov
On 27/05/15 16:59, Matt Turner wrote:
> On Wed, May 27, 2015 at 4:53 AM, Emil Velikov  
> wrote:
>> On 27 May 2015 at 11:23, Dave Airlie  wrote:
 Wow, I hadn't expected such a hateful comment on GLES1.

 Does anyone else want to convince me that GLES1 should burn in hell?
>>>
>>> So I dug around,
>>>
>>> commit 4c06853833996d990eb76b195ca5d6838c6f3d6b
>>> Author: Adam Jackson 
>>> Date:   Wed May 8 18:03:21 2013 -0400
>>>
>>> Switch to Mesa master (pre 9.2)
>>>
>>> - Fix llvmpipe on big-endian and enable llvmpipe everywhere
>>> - Build vdpau drivers for r600/radeonsi/nouveau
>>> - Enable hardware floating-point texture support
>>> - Drop GLESv1, nothing's using it, let's not start
>>>
>>> So at least in Fedora 2 years ago, we realised there was no GLES1
>>> users in the distro,
>>> and we didn't want to encourage any.
>>>
>>> I suppose some users might exist outside the classic Linux distro
>>> world. i.e. android, embedded land.
>>>
>> At least three other distros have GLES1 - Arch, Debian and OpenSUSE.
>> So imho one should just leave the decision to depreciate/kill it off
>> to the distros ?
> 
> No one is suggesting deleting GLES1.
> 
Ack - I'm aware of that. I'm not sure which part of my statement led
came out as the opposite. If you can point me out as such that'll be
appreciated.

> I think distributions are going to inevitably ship things they have no
> reason to, regardless of our defaults, often because they don't know.
> The best we can do is guide their hands. If GLES1 is unused, we should
> note that in configure.ac to better communicate it to the
> distributions.
> 
No objection on the hand-holding, but the idea that Mesa knows which
distro does not ship/have GLES1 compatible software sounds a bit
strange. So I was pointing out that some (be that by mistake or not)
still have/use it.

At the end of the day, do as you please. The argument of "distro X
defaults to Y, so mesa should do the same", just seems strange to me.

Similar to Marek, I do wonder about the hostility towards GLES1. Sure it
is a bit old, and the API is not the most flexible/useful but there are
dozens (hundreds?) of projects that fit the criteria :)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Do not use NoFramePointerElim with LLVM 3.7

2015-05-28 Thread Roland Scheidegger
I use this sometimes for debugging things in llvmpipe, so I don't think
that's a good option. Without it getting the actual assembly of the
shaders would be quite annoying.
It is unfortunately true that (due to the heavy use of the unstable C++
API) it breaks very often, but as long as it isn't possible to do that
with llvm's C API it seems unavoidable.

Roland

Am 28.05.2015 um 12:01 schrieb Marek Olšák:
> The disassemble function does more harm than good and is often the
> most often broken function after an LLVM update. Shouldn't we remove
> it to make our lives easier?
> 
> Marek
> 
> 
> On Wed, May 27, 2015 at 7:27 AM, Vinson Lee  wrote:
>> TargetOptions::NoFramePointerElim was removed in llvm-3.7.0svn r238244
>> "Remove NoFramePointerElim and NoFramePointerElimOverride from
>> TargetOptions and remove ExecutionEngine's dependence on CodeGen. NFC."
>>
>> Signed-off-by: Vinson Lee 
>> ---
>>  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 ++
>>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 2 ++
>>  2 files changed, 4 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
>> b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
>> index be3e834..76c302f 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
>> @@ -277,8 +277,10 @@ disassemble(const void* func, llvm::raw_ostream & Out)
>> options.StackAlignmentOverride = 4;
>>  #endif
>>  #if defined(DEBUG) || defined(PROFILE)
>> +#if HAVE_LLVM < 0x0307
>> options.NoFramePointerElim = true;
>>  #endif
>> +#endif
>> OwningPtr TM(T->createTargetMachine(Triple, 
>> sys::getHostCPUName(), "", options));
>>
>> /*
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
>> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>> index 5e8a634..ffed9e6 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
>> @@ -439,8 +439,10 @@ 
>> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
>>  #if HAVE_LLVM < 0x0304
>> options.NoFramePointerElimNonLeaf = true;
>>  #endif
>> +#if HAVE_LLVM < 0x0307
>> options.NoFramePointerElim = true;
>>  #endif
>> +#endif
>>
>> builder.setEngineKind(EngineKind::JIT)
>>.setErrorStr(&Error)
>> --
>> 2.4.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=oJN3RQI6auvaC8t4GPlTvSe8h-WZYhBL7xSRhbwFgDI&s=Ht4F7y_dMUmK34b_5vMdCKunj7WvDKU8-8HWca89Ei0&e=
>>  
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=oJN3RQI6auvaC8t4GPlTvSe8h-WZYhBL7xSRhbwFgDI&s=Ht4F7y_dMUmK34b_5vMdCKunj7WvDKU8-8HWca89Ei0&e=
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/15] egl: add eglCreateImage

2015-05-28 Thread Marek Olšák
A new patch is attached. Please review.

Marek

On Wed, May 27, 2015 at 9:07 PM, Chad Versace  wrote:
> On Fri 15 May 2015, Emil Velikov wrote:
>> On 12/05/15 22:54, Marek Olšák wrote:
>> > From: Marek Olšák 
>> >
>> > ---
>> >  src/egl/main/eglapi.c | 38 ++
>> >  1 file changed, 38 insertions(+)
>> >
>> > diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
>> > index 6457798..34a113b 100644
>> > --- a/src/egl/main/eglapi.c
>> > +++ b/src/egl/main/eglapi.c
>> > @@ -251,6 +251,30 @@ _eglUnlockDisplay(_EGLDisplay *dpy)
>> >  }
>> >
>> >
>> > +static EGLint *
>> > +_eglConvertAttribsToInt(const EGLAttrib *attr_list)
>> > +{
>> > +   EGLint *int_attribs = NULL;
>> > +
>> > +   /* Convert attributes from EGLAttrib[] to EGLint[] */
>> > +   if (attr_list) {
>> > +  int i, size = 0;
>> > +
>> > +  while (attr_list[size] != EGL_NONE)
>> > + size += 2;
>> > +
>> > +  if (size) {
>> > + size += 1; /* add space for EGL_NONE */
>> > + int_attribs = malloc(size * sizeof(int_attribs[0]));
>> > +
>> > + for (i = 0; i < size; i++)
>> > +int_attribs[i] = attr_list[i];
>
>> In the unlikely event that malloc fails, it'll be nice to not crash.
>
> NAK.
>
> There is a stack overflow vulnerability here, even when malloc succeeds.
> An attacker can pass a very large but valid `EGLint *attrib_list` into
> an EGL entry point, forcing the size calculation given to malloc to
> overflow to a small positive integer.  Then _eglConvertAttribsToInt will
> blithely copy a portion (perhaps most) of the attacker's attrib list onto
> the stack!
>
> To prevent the stack overflow, _eglConvertAttribsToInt should use
> calloc() and abort if allocation fails.
From acaa7cefac908d31f50c197f02413249aa3bf517 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= 
Date: Tue, 12 May 2015 20:42:05 +0200
Subject: [PATCH 1/2] egl: add eglCreateImage (v2)

v2: - use calloc
- return BAD_ALLOC if calloc fails
---
 src/egl/main/eglapi.c | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 9696869..e4fd44e 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -251,6 +251,31 @@ _eglUnlockDisplay(_EGLDisplay *dpy)
 }
 
 
+static EGLint *
+_eglConvertAttribsToInt(const EGLAttrib *attr_list)
+{
+   EGLint *int_attribs = NULL;
+
+   /* Convert attributes from EGLAttrib[] to EGLint[] */
+   if (attr_list) {
+  int i, size = 0;
+
+  while (attr_list[size] != EGL_NONE)
+ size += 2;
+
+  size += 1; /* add space for EGL_NONE */
+
+  int_attribs = calloc(size, sizeof(int_attribs[0]));
+  if (!int_attribs)
+ return NULL;
+
+  for (i = 0; i < size; i++)
+ int_attribs[i] = attr_list[i];
+   }
+   return int_attribs;
+}
+
+
 /**
  * This is typically the first EGL function that an application calls.
  * It associates a private _EGLDisplay object to the native display.
@@ -1255,6 +1280,22 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target,
 }
 
 
+EGLImage EGLAPIENTRY
+eglCreateImage(EGLDisplay dpy, EGLContext ctx, EGLenum target,
+   EGLClientBuffer buffer, const EGLAttrib *attr_list)
+{
+   EGLImage image;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attr_list);
+
+   if (attr_list && !int_attribs)
+  RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_IMAGE);
+
+   image = eglCreateImageKHR(dpy, ctx, target, buffer, int_attribs);
+   free(int_attribs);
+   return image;
+}
+
+
 EGLBoolean EGLAPIENTRY
 eglDestroyImage(EGLDisplay dpy, EGLImage image)
 {
@@ -1751,6 +1792,7 @@ eglGetProcAddress(const char *procname)
   { "eglClientWaitSync", (_EGLProc) eglClientWaitSync },
   { "eglGetSyncAttrib", (_EGLProc) eglGetSyncAttrib },
   { "eglWaitSync", (_EGLProc) eglWaitSync },
+  { "eglCreateImage", (_EGLProc) eglCreateImage },
   { "eglDestroyImage", (_EGLProc) eglDestroyImage },
 #ifdef EGL_MESA_drm_display
   { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA },
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Do not use NoFramePointerElim with LLVM 3.7

2015-05-28 Thread Jose Fonseca
For the record, we also feel the pain, and I did look into the LLVM C 
API for disassembly, but the problem is that we only get the function 
start address -- we don't know where it finishes --, and the LLVM C API 
doesn't expose enough capabilities to describe jump/return instructions, 
which we rely upon to guess where a function ends.


Another approach would be to get the functions start-stop addresses 
separately -- via the DelegatingJITMemoryManager somehow.




BTW, this LLVM breakage has nothing to do with the disassembly. It just 
happens to be  on the same file.  It would have been broken even without 
it.


In fact, doesn't this mean that we'll stumble into 
http://llvm.org/PR21435 again?
We must disable frame pointer elimination *somehow*, given upstream LLVM 
didn't bother fixing the bug.



Jose


On 28/05/15 12:56, Roland Scheidegger wrote:

I use this sometimes for debugging things in llvmpipe, so I don't think
that's a good option. Without it getting the actual assembly of the
shaders would be quite annoying.
It is unfortunately true that (due to the heavy use of the unstable C++
API) it breaks very often, but as long as it isn't possible to do that
with llvm's C API it seems unavoidable.

Roland

Am 28.05.2015 um 12:01 schrieb Marek Olšák:

The disassemble function does more harm than good and is often the
most often broken function after an LLVM update. Shouldn't we remove
it to make our lives easier?

Marek


On Wed, May 27, 2015 at 7:27 AM, Vinson Lee  wrote:

TargetOptions::NoFramePointerElim was removed in llvm-3.7.0svn r238244
"Remove NoFramePointerElim and NoFramePointerElimOverride from
TargetOptions and remove ExecutionEngine's dependence on CodeGen. NFC."

Signed-off-by: Vinson Lee 
---
  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 ++
  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 2 ++
  2 files changed, 4 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index be3e834..76c302f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -277,8 +277,10 @@ disassemble(const void* func, llvm::raw_ostream & Out)
 options.StackAlignmentOverride = 4;
  #endif
  #if defined(DEBUG) || defined(PROFILE)
+#if HAVE_LLVM < 0x0307
 options.NoFramePointerElim = true;
  #endif
+#endif
 OwningPtr TM(T->createTargetMachine(Triple, sys::getHostCPUName(), 
"", options));

 /*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 5e8a634..ffed9e6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -439,8 +439,10 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
  #if HAVE_LLVM < 0x0304
 options.NoFramePointerElimNonLeaf = true;
  #endif
+#if HAVE_LLVM < 0x0307
 options.NoFramePointerElim = true;
  #endif
+#endif

 builder.setEngineKind(EngineKind::JIT)
.setErrorStr(&Error)
--
2.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=oJN3RQI6auvaC8t4GPlTvSe8h-WZYhBL7xSRhbwFgDI&s=Ht4F7y_dMUmK34b_5vMdCKunj7WvDKU8-8HWca89Ei0&e=

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=oJN3RQI6auvaC8t4GPlTvSe8h-WZYhBL7xSRhbwFgDI&s=Ht4F7y_dMUmK34b_5vMdCKunj7WvDKU8-8HWca89Ei0&e=



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=j--Khc7uwEjgb39x21uUdm65cn47ORER1E2LLAosy28&s=KOd3_0hp1Z6nppy1NmIibOIP05Q_sl7IY3G7yoFrt9M&e=



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/15] egl: add new platform functions

2015-05-28 Thread Marek Olšák
A new patch is attached. Please review.

Marek

On Wed, May 13, 2015 at 12:54 AM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> These are just wrappers around the existing extension functions.
> ---
>  src/egl/main/eglapi.c | 45 +
>  1 file changed, 45 insertions(+)
>
> diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
> index 34a113b..e3b8ec2 100644
> --- a/src/egl/main/eglapi.c
> +++ b/src/egl/main/eglapi.c
> @@ -325,6 +325,18 @@ eglGetPlatformDisplayEXT(EGLenum platform, void 
> *native_display,
> return _eglGetDisplayHandle(dpy);
>  }
>
> +EGLDisplay EGLAPIENTRY
> +eglGetPlatformDisplay(EGLenum platform, void *native_display,
> +  const EGLAttrib *attrib_list)
> +{
> +   EGLDisplay display;
> +   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
> +
> +   display = eglGetPlatformDisplayEXT(platform, native_display, int_attribs);
> +   free(int_attribs);
> +   return display;
> +}
> +
>  /**
>   * Copy the extension into the string and update the string pointer.
>   */
> @@ -752,6 +764,21 @@ eglCreatePlatformWindowSurfaceEXT(EGLDisplay dpy, 
> EGLConfig config,
>  }
>
>
> +EGLSurface EGLAPIENTRY
> +eglCreatePlatformWindowSurface(EGLDisplay dpy, EGLConfig config,
> +   void *native_window,
> +   const EGLAttrib *attrib_list)
> +{
> +   EGLSurface surface;
> +   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
> +
> +   surface = eglCreatePlatformWindowSurfaceEXT(dpy, config, native_window,
> +   int_attribs);
> +   free(int_attribs);
> +   return surface;
> +}
> +
> +
>  static EGLSurface
>  _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
>void *native_pixmap, const EGLint *attrib_list)
> @@ -806,6 +833,21 @@ eglCreatePlatformPixmapSurfaceEXT(EGLDisplay dpy, 
> EGLConfig config,
>
>
>  EGLSurface EGLAPIENTRY
> +eglCreatePlatformPixmapSurface(EGLDisplay dpy, EGLConfig config,
> +   void *native_pixmap,
> +   const EGLAttrib *attrib_list)
> +{
> +   EGLSurface surface;
> +   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
> +
> +   surface = eglCreatePlatformPixmapSurfaceEXT(dpy, config, native_pixmap,
> +   int_attribs);
> +   free(int_attribs);
> +   return surface;
> +}
> +
> +
> +EGLSurface EGLAPIENTRY
>  eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
>  const EGLint *attrib_list)
>  {
> @@ -1188,6 +1230,9 @@ eglGetProcAddress(const char *procname)
>{ "eglGetSyncAttrib", (_EGLProc) eglGetSyncAttrib },
>{ "eglCreateImage", (_EGLProc) eglCreateImage },
>{ "eglDestroyImage", (_EGLProc) eglDestroyImage },
> +  { "eglGetPlatformDisplay", (_EGLProc) eglGetPlatformDisplay },
> +  { "eglCreatePlatformWindowSurface", (_EGLProc) 
> eglCreatePlatformWindowSurface },
> +  { "eglCreatePlatformPixmapSurface", (_EGLProc) 
> eglCreatePlatformPixmapSurface },
>{ "eglWaitSync", (_EGLProc) eglWaitSync },
>  #endif /* _EGL_GET_CORE_ADDRESSES */
>  #ifdef EGL_MESA_drm_display
> --
> 2.1.0
>
From e97b020d22500f6876563e62076a9ee80622f395 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= 
Date: Tue, 12 May 2015 21:06:41 +0200
Subject: [PATCH 2/2] egl: add new platform functions (v2)

These are just wrappers around the existing extension functions.

v2: return BAD_ALLOC if _eglConvertAttribsToInt fails
---
 src/egl/main/eglapi.c | 54 +++
 1 file changed, 54 insertions(+)

diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index e4fd44e..b9e37c4 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -326,6 +326,21 @@ eglGetPlatformDisplayEXT(EGLenum platform, void *native_display,
return _eglGetDisplayHandle(dpy);
 }
 
+EGLDisplay EGLAPIENTRY
+eglGetPlatformDisplay(EGLenum platform, void *native_display,
+  const EGLAttrib *attrib_list)
+{
+   EGLDisplay display;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
+
+   if (attrib_list && !int_attribs)
+  RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, NULL);
+
+   display = eglGetPlatformDisplayEXT(platform, native_display, int_attribs);
+   free(int_attribs);
+   return display;
+}
+
 /**
  * Copy the extension into the string and update the string pointer.
  */
@@ -752,6 +767,24 @@ eglCreatePlatformWindowSurfaceEXT(EGLDisplay dpy, EGLConfig config,
 }
 
 
+EGLSurface EGLAPIENTRY
+eglCreatePlatformWindowSurface(EGLDisplay dpy, EGLConfig config,
+   void *native_window,
+   const EGLAttrib *attrib_list)
+{
+   EGLSurface surface;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
+
+   if (attrib_list && !int_attribs)
+  RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_

Re: [Mesa-dev] [PATCH v2] nir: Fix output swizzle in get_mul_for_src

2015-05-28 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 
On May 28, 2015 12:06 AM, "Iago Toral Quiroga"  wrote:

> When we compute the output swizzle we want to consider the number of
> components in the add operation. So far we were using the writemask
> of the multiplication for this instead, which is not correct.
> ---
>  src/glsl/nir/nir_opt_peephole_ffma.c | 19 +--
>  1 file changed, 9 insertions(+), 10 deletions(-)
>
> diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c
> b/src/glsl/nir/nir_opt_peephole_ffma.c
> index b430eac..798506b 100644
> --- a/src/glsl/nir/nir_opt_peephole_ffma.c
> +++ b/src/glsl/nir/nir_opt_peephole_ffma.c
> @@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def)
>  }
>
>  static nir_alu_instr *
> -get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool
> *abs)
> +get_mul_for_src(nir_alu_src *src, int num_components,
> +uint8_t swizzle[4], bool *negate, bool *abs)
>  {
> assert(src->src.is_ssa && !src->abs && !src->negate);
>
> @@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4],
> bool *negate, bool *abs)
> switch (alu->op) {
> case nir_op_imov:
> case nir_op_fmov:
> -  alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
> +  alu = get_mul_for_src(&alu->src[0], num_components, swizzle,
> negate, abs);
>break;
>
> case nir_op_fneg:
> -  alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
> +  alu = get_mul_for_src(&alu->src[0], num_components, swizzle,
> negate, abs);
>*negate = !*negate;
>break;
>
> case nir_op_fabs:
> -  alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
> +  alu = get_mul_for_src(&alu->src[0], num_components, swizzle,
> negate, abs);
>*negate = false;
>*abs = true;
>break;
> @@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4],
> bool *negate, bool *abs)
> if (!alu)
>return NULL;
>
> -   for (unsigned i = 0; i < 4; i++) {
> -  if (!(alu->dest.write_mask & (1 << i)))
> - break;
> -
> +   for (unsigned i = 0; i < num_components; i++)
>swizzle[i] = swizzle[src->swizzle[i]];
> -   }
>
> return alu;
>  }
> @@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void
> *void_state)
>   negate = false;
>   abs = false;
>
> - mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate,
> &abs);
> + mul = get_mul_for_src(&add->src[add_mul_src],
> +   add->dest.dest.ssa.num_components,
> +   swizzle, &negate, &abs);
>
>   if (mul != NULL)
>  break;
> --
> 1.9.1
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Matt Turner
On Thu, May 28, 2015 at 3:31 AM, Neil Roberts  wrote:
> Ben Widawsky  writes:
>
>> AFAICT, there is no real way to make sure a send message with EOT is
>> properly ignored from compact, nor can I see a way to actually encode
>> EOT while compacting. Before the single send optimization we'd always
>> bail because we hit the is_immediate && !is_compactable_immediate
>> case. However, with single send, is_immediate is not true, and so we
>> end up trying to compact the un-compactible.
>>
>> Without this, any compacting single send instruction will hang because
>> the EOT isn't there. I am not sure how I didn't hit this when I
>> originally enabled the optimization. I didn't check if some
>> surrounding code changed.
>>
>> NOTE: This needs another piglit run or two before merge.
>>
>> I know Neil and Matt were both looking into this. I did a quick search
>> and didn't see any patches out there to handle this. Please ignore if
>> this has already been sent by someone. (Direct me to it and I will
>> review it).
>>
>> Cc: Matt Turner 
>> Cc: Neil Roberts 
>> Cc: Mark Janes 
>> Signed-off-by: Ben Widawsky 
>> ---
>>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 ++
>>  1 file changed, 6 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
>> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
>> index 69cb114..67f0b45 100644
>> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
>> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
>> @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info 
>> *devinfo,
>>  static bool
>>  has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
>>  {
>> +   /* EOT can only be mapped on a send if the src1 is an immediate */
>
> Can we really map EOT if the src1 is immediate?

That's a good question. I think in practice we can but only under some
very rare circumstances.

I'd like to study the compaction code today a little and try to
understand how EOT is falling through the cracks.

>> +   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
>> +brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
>
> Is there any reason to limit this to send and sendc? If there's no way
> to map EOT why not just to if (brw_inst_eot(...)) return true?
>
> For what it's worth, I ran my original patch¹ through shader-db and it
> didn't make any difference, which is good.
>
> Do we not also need to fix the problem with the destination register
> being used as a temporary? This was mentioned by Matt on IRC. Maybe he
> is looking into it?

Yes, we need to do that as well. I'm looking into it.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Do not use NoFramePointerElim with LLVM 3.7

2015-05-28 Thread Roland Scheidegger
Am 28.05.2015 um 14:28 schrieb Jose Fonseca:
> For the record, we also feel the pain, and I did look into the LLVM C
> API for disassembly, but the problem is that we only get the function
> start address -- we don't know where it finishes --, and the LLVM C API
> doesn't expose enough capabilities to describe jump/return instructions,
> which we rely upon to guess where a function ends.
> 
> Another approach would be to get the functions start-stop addresses
> separately -- via the DelegatingJITMemoryManager somehow.
> 
> 
> 
> BTW, this LLVM breakage has nothing to do with the disassembly. It just
> happens to be  on the same file.  It would have been broken even without
> it.
> 
> In fact, doesn't this mean that we'll stumble into
> http://llvm.org/PR21435 again?
> We must disable frame pointer elimination *somehow*, given upstream LLVM
> didn't bother fixing the bug.


Oh indeed. I'm not sure if this still happens it seems the decision to
eliminate frame pointer is now per function.
http://permalink.gmane.org/gmane.comp.compilers.llvm.cvs/248931

Not sure though if this means that bug got fixed as a side effect or
not, if not we sure need to do something about it.

Roland

> 
> 
> Jose
> 
> 
> On 28/05/15 12:56, Roland Scheidegger wrote:
>> I use this sometimes for debugging things in llvmpipe, so I don't think
>> that's a good option. Without it getting the actual assembly of the
>> shaders would be quite annoying.
>> It is unfortunately true that (due to the heavy use of the unstable C++
>> API) it breaks very often, but as long as it isn't possible to do that
>> with llvm's C API it seems unavoidable.
>>
>> Roland
>>
>> Am 28.05.2015 um 12:01 schrieb Marek Olšák:
>>> The disassemble function does more harm than good and is often the
>>> most often broken function after an LLVM update. Shouldn't we remove
>>> it to make our lives easier?
>>>
>>> Marek
>>>
>>>
>>> On Wed, May 27, 2015 at 7:27 AM, Vinson Lee 
>>> wrote:
 TargetOptions::NoFramePointerElim was removed in llvm-3.7.0svn r238244
 "Remove NoFramePointerElim and NoFramePointerElimOverride from
 TargetOptions and remove ExecutionEngine's dependence on CodeGen. NFC."

 Signed-off-by: Vinson Lee 
 ---
   src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 ++
   src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 2 ++
   2 files changed, 4 insertions(+)

 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 index be3e834..76c302f 100644
 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 @@ -277,8 +277,10 @@ disassemble(const void* func, llvm::raw_ostream
 & Out)
  options.StackAlignmentOverride = 4;
   #endif
   #if defined(DEBUG) || defined(PROFILE)
 +#if HAVE_LLVM < 0x0307
  options.NoFramePointerElim = true;
   #endif
 +#endif
  OwningPtr TM(T->createTargetMachine(Triple,
 sys::getHostCPUName(), "", options));

  /*
 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
 b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
 index 5e8a634..ffed9e6 100644
 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
 +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
 @@ -439,8 +439,10 @@
 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
   #if HAVE_LLVM < 0x0304
  options.NoFramePointerElimNonLeaf = true;
   #endif
 +#if HAVE_LLVM < 0x0307
  options.NoFramePointerElim = true;
   #endif
 +#endif

  builder.setEngineKind(EngineKind::JIT)
 .setErrorStr(&Error)
 -- 
 2.4.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=oJN3RQI6auvaC8t4GPlTvSe8h-WZYhBL7xSRhbwFgDI&s=Ht4F7y_dMUmK34b_5vMdCKunj7WvDKU8-8HWca89Ei0&e=

>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=oJN3RQI6auvaC8t4GPlTvSe8h-WZYhBL7xSRhbwFgDI&s=Ht4F7y_dMUmK34b_5vMdCKunj7WvDKU8-8HWca89Ei0&e=
>>>
>>>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=j--Khc7uwEjgb39x21uUdm65cn47ORER1E2LLAosy28&s=KOd3_0hp1Z6nppy1NmIibOIP05Q_sl7I

[Mesa-dev] [PATCH] gallivm: Disable frame pointer omission on LLVM 3.7.

2015-05-28 Thread Jose Fonseca
---
 src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 7b906c2..384ea86 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -533,6 +533,16 @@ gallivm_compile_module(struct gallivm_state *gallivm)
   if (0) {
  debug_printf("optimizing func %s...\n", LLVMGetValueName(func));
   }
+
+   /* Disable frame pointer omission on debug/profile builds */
+   /* XXX: And workaround http://llvm.org/PR21435 */
+#if HAVE_LLVM >= 0x0307 && \
+(defined(DEBUG) || defined(PROFILE) || \
+ defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+  LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", 
"true");
+  LLVMAddTargetDependentFunctionAttr(func, 
"no-frame-pointer-elim-non-leaf", "true");
+#endif
+
   LLVMRunFunctionPassManager(gallivm->passmgr, func);
   func = LLVMGetNextFunction(func);
}
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Disable frame pointer omission on LLVM 3.7.

2015-05-28 Thread Roland Scheidegger
Am 28.05.2015 um 16:35 schrieb Jose Fonseca:
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_init.c
> index 7b906c2..384ea86 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
> @@ -533,6 +533,16 @@ gallivm_compile_module(struct gallivm_state *gallivm)
>if (0) {
>   debug_printf("optimizing func %s...\n", LLVMGetValueName(func));
>}
> +
> +   /* Disable frame pointer omission on debug/profile builds */
> +   /* XXX: And workaround http://llvm.org/PR21435 */
> +#if HAVE_LLVM >= 0x0307 && \
> +(defined(DEBUG) || defined(PROFILE) || \
> + defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
> +  LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", 
> "true");
> +  LLVMAddTargetDependentFunctionAttr(func, 
> "no-frame-pointer-elim-non-leaf", "true");
> +#endif
> +
>LLVMRunFunctionPassManager(gallivm->passmgr, func);
>func = LLVMGetNextFunction(func);
> }
> 

Reviewed-by: Roland Scheidegger 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Disable frame pointer omission on LLVM 3.7.

2015-05-28 Thread Jose Fonseca

On 28/05/15 15:37, Roland Scheidegger wrote:

Am 28.05.2015 um 16:35 schrieb Jose Fonseca:

---
  src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 7b906c2..384ea86 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -533,6 +533,16 @@ gallivm_compile_module(struct gallivm_state *gallivm)
if (0) {
   debug_printf("optimizing func %s...\n", LLVMGetValueName(func));
}
+
+   /* Disable frame pointer omission on debug/profile builds */
+   /* XXX: And workaround http://llvm.org/PR21435 */
+#if HAVE_LLVM >= 0x0307 && \
+(defined(DEBUG) || defined(PROFILE) || \
+ defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+  LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", 
"true");
+  LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim-non-leaf", 
"true");
+#endif
+
LLVMRunFunctionPassManager(gallivm->passmgr, func);
func = LLVMGetNextFunction(func);
 }



Reviewed-by: Roland Scheidegger 


Thanks.

BTW, while testing this change I noticed that disassembly is busted with 
LLVM 3.7 -- it keeps dumping the same instruction over and over again. 
Tried to fix but's not trivial.


Rather than trying to keep patching things up, I'm going to try using 
the C API.


Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Ben Widawsky
On Thu, May 28, 2015 at 07:00:38AM -0700, Matt Turner wrote:
> On Thu, May 28, 2015 at 3:31 AM, Neil Roberts  wrote:
> > Ben Widawsky  writes:
> >
> >> AFAICT, there is no real way to make sure a send message with EOT is
> >> properly ignored from compact, nor can I see a way to actually encode
> >> EOT while compacting. Before the single send optimization we'd always
> >> bail because we hit the is_immediate && !is_compactable_immediate
> >> case. However, with single send, is_immediate is not true, and so we
> >> end up trying to compact the un-compactible.
> >>
> >> Without this, any compacting single send instruction will hang because
> >> the EOT isn't there. I am not sure how I didn't hit this when I
> >> originally enabled the optimization. I didn't check if some
> >> surrounding code changed.
> >>
> >> NOTE: This needs another piglit run or two before merge.
> >>
> >> I know Neil and Matt were both looking into this. I did a quick search
> >> and didn't see any patches out there to handle this. Please ignore if
> >> this has already been sent by someone. (Direct me to it and I will
> >> review it).
> >>
> >> Cc: Matt Turner 
> >> Cc: Neil Roberts 
> >> Cc: Mark Janes 
> >> Signed-off-by: Ben Widawsky 
> >> ---
> >>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 ++
> >>  1 file changed, 6 insertions(+)
> >>
> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> >> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> >> index 69cb114..67f0b45 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> >> @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info 
> >> *devinfo,
> >>  static bool
> >>  has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
> >>  {
> >> +   /* EOT can only be mapped on a send if the src1 is an immediate */
> >
> > Can we really map EOT if the src1 is immediate?
> 
> That's a good question. I think in practice we can but only under some
> very rare circumstances.
> 

That's right, I meant to remove this before submitting the patch. For all
intents, we can't do it.

> I'd like to study the compaction code today a little and try to
> understand how EOT is falling through the cracks.
> 

Sure, see my first paragraph above, I explain why we don't hit it today, but I'd
very much appreciate you double checking.

> >> +   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
> >> +brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
> >
> > Is there any reason to limit this to send and sendc? If there's no way
> > to map EOT why not just to if (brw_inst_eot(...)) return true?
> >
> > For what it's worth, I ran my original patch¹ through shader-db and it
> > didn't make any difference, which is good.
> >
> > Do we not also need to fix the problem with the destination register
> > being used as a temporary? This was mentioned by Matt on IRC. Maybe he
> > is looking into it?
> 
> Yes, we need to do that as well. I'm looking into it.

Yep, this solves a different problem I believe.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Ben Widawsky
On Thu, May 28, 2015 at 11:31:40AM +0100, Neil Roberts wrote:
> Ben Widawsky  writes:
> 
> > AFAICT, there is no real way to make sure a send message with EOT is
> > properly ignored from compact, nor can I see a way to actually encode
> > EOT while compacting. Before the single send optimization we'd always
> > bail because we hit the is_immediate && !is_compactable_immediate
> > case. However, with single send, is_immediate is not true, and so we
> > end up trying to compact the un-compactible.
> >
> > Without this, any compacting single send instruction will hang because
> > the EOT isn't there. I am not sure how I didn't hit this when I
> > originally enabled the optimization. I didn't check if some
> > surrounding code changed.
> >
> > NOTE: This needs another piglit run or two before merge.
> >
> > I know Neil and Matt were both looking into this. I did a quick search
> > and didn't see any patches out there to handle this. Please ignore if
> > this has already been sent by someone. (Direct me to it and I will
> > review it).
> >
> > Cc: Matt Turner 
> > Cc: Neil Roberts 
> > Cc: Mark Janes 
> > Signed-off-by: Ben Widawsky 
> > ---
> >  src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> > b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> > index 69cb114..67f0b45 100644
> > --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> > +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> > @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info 
> > *devinfo,
> >  static bool
> >  has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
> >  {
> > +   /* EOT can only be mapped on a send if the src1 is an immediate */
> 
> Can we really map EOT if the src1 is immediate?
> 
> > +   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
> > +brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
> 
> Is there any reason to limit this to send and sendc? If there's no way
> to map EOT why not just to if (brw_inst_eot(...)) return true?

Well eot just means bit 127, and in certain circumstances outside of send, I
believe it's valid to have the bit set (the src1 imm being the one example I can
find, if you are smart about it). We can defer to Matt since he said he'd look a
bit today.

> 
> For what it's worth, I ran my original patch¹ through shader-db and it
> didn't make any difference, which is good.

I didn't see that patch, this was the patch that Mark originally ran through
piglit as well (http://otc-mesa-ci.jf.intel.com/job/Leeroy/99272/). I'm not
opposed to this simpler solution if Matt determines it's equally viable and or
superior in some way. To me it seemed a bit more readable and accurate to only
check EOT when it was actually an EOT (the sends).

> 
> Do we not also need to fix the problem with the destination register
> being used as a temporary? This was mentioned by Matt on IRC. Maybe he
> is looking into it?
> 

Yep. I have no clue on that one.

> Regards,
> - Neil
> 
> 1. https://github.com/bpeel/mesa/commit/6abda467fa3ad7060127
> 
> > +   brw_inst_eot(devinfo, src))
> > +  return true;
> > +
> > /* Check for instruction bits that don't map to any of the fields of the
> >  * compacted instruction.  The instruction cannot be compacted if any of
> >  * them are set.  They overlap with:

Apologies for not realizing you wrote the same patch...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: Use the LLVM's C disassembly interface.

2015-05-28 Thread Jose Fonseca
It doesn't do everything we want.  In particular it doesn't allow to
detect jumps or return opcodes.  Currently we detect the x86's RET
opcode.

Even though it's worse for LLVM 3.3, it's an improvement for LLVM 3.7,
which was totally busted.
---
 scons/llvm.py  |   4 +-
 src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 260 -
 2 files changed, 40 insertions(+), 224 deletions(-)

diff --git a/scons/llvm.py b/scons/llvm.py
index 17278df..c59b8cb 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -120,6 +120,7 @@ def generate(env):
 ])
 elif llvm_version >= distutils.version.LooseVersion('3.5'):
 env.Prepend(LIBS = [
+'LLVMMCDisassembler',
 'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld',
 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen',
 'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMX86Desc',
@@ -132,6 +133,7 @@ def generate(env):
 ])
 else:
 env.Prepend(LIBS = [
+'LLVMMCDisassembler',
 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
@@ -189,7 +191,7 @@ def generate(env):
 if '-fno-rtti' in cxxflags:
 env.Append(CXXFLAGS = ['-fno-rtti'])
 
-components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter']
+components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 
'mcdisassembler']
 
 env.ParseConfig('llvm-config --libs ' + ' '.join(components))
 env.ParseConfig('llvm-config --ldflags')
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 76c302f..64fb044 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -28,40 +28,12 @@
 #include 
 
 #include 
-#include 
-#include 
+#include 
 #include 
 #include 
-
-#if HAVE_LLVM >= 0x0306
-#include 
-#else
-#include 
-#endif
-
-#include 
-#include 
-
 #include 
-
 #include 
 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#if HAVE_LLVM >= 0x0305
-#define OwningPtr std::unique_ptr
-#else
-#include 
-#endif
-
-#if HAVE_LLVM >= 0x0305
-#include 
-#endif
-
 #include "util/u_math.h"
 #include "util/u_debug.h"
 
@@ -133,7 +105,7 @@ lp_get_module_id(LLVMModuleRef module)
 extern "C" void
 lp_debug_dump_value(LLVMValueRef value)
 {
-#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || 
defined(PIPE_OS_EMBDDED)
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || 
defined(PIPE_OS_EMBEDDED)
raw_debug_ostream os;
llvm::unwrap(value)->print(os);
os.flush();
@@ -143,44 +115,16 @@ lp_debug_dump_value(LLVMValueRef value)
 }
 
 
-#if HAVE_LLVM < 0x0306
-
-/*
- * MemoryObject wrapper around a buffer of memory, to be used by MC
- * disassembler.
- */
-class BufferMemoryObject:
-   public llvm::MemoryObject
+static const char *
+disassemblerSymbolLookupCB(void *DisInfo,
+   uint64_t ReferenceValue,
+   uint64_t *ReferenceType,
+   uint64_t ReferencePC,
+   const char **ReferenceName)
 {
-private:
-   const uint8_t *Bytes;
-   uint64_t Length;
-public:
-   BufferMemoryObject(const uint8_t *bytes, uint64_t length) :
-  Bytes(bytes), Length(length)
-   {
-   }
-
-   uint64_t getBase() const
-   {
-  return 0;
-   }
-
-   uint64_t getExtent() const
-   {
-  return Length;
-   }
-
-   int readByte(uint64_t addr, uint8_t *byte) const
-   {
-  if (addr > getExtent())
- return -1;
-  *byte = Bytes[addr];
-  return 0;
-   }
-};
-
-#endif /* HAVE_LLVM < 0x0306 */
+   // TODO: Maybe this can be used to guess jumps
+   return NULL;
+}
 
 
 /*
@@ -193,8 +137,6 @@ public:
 static size_t
 disassemble(const void* func, llvm::raw_ostream & Out)
 {
-   using namespace llvm;
-
const uint8_t *bytes = (const uint8_t *)func;
 
/*
@@ -202,101 +144,23 @@ disassemble(const void* func, llvm::raw_ostream & Out)
 */
const uint64_t extent = 96 * 1024;
 
-   uint64_t max_pc = 0;
-
/*
 * Initialize all used objects.
 */
 
-   std::string Triple = sys::getDefaultTargetTriple();
-
-   std::string Error;
-   const Target *T = TargetRegistry::lookupTarget(Triple, Error);
-
-#if HAVE_LLVM >= 0x0304
-   OwningPtr 
AsmInfo(T->createMCAsmInfo(*T->createMCRegInfo(Triple), Triple));
-#else
-   OwningPtr AsmInfo(T->createMCAsmInfo(Triple));
-#endif
-
-   if (!AsmInfo) {
-  Out << "error: no assembly info for target " << Triple << "\n";
-  Out.flush();
-  return 0;
-   }
-
-   unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-
-   OwningPtr MRI(T->createMCRegInfo(Triple));
-   if (!MRI) {
-  Out << "error: no register info for target " <<

[Mesa-dev] [PATCH 5/6] i965/gen9: Set HALIGN_16 for all aux buffers

2015-05-28 Thread Ben Widawsky
Just like the previous patch, but for the GEN9 constraints.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index a1ac0cf..89030aa 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -655,6 +655,11 @@ intel_miptree_create(struct brw_context *brw,
 
assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0);
assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
+
+   if (brw->gen >= 9 && num_samples > 1)
+  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+
+
mt = intel_miptree_create_layout(brw, target, format,
 first_level, last_level, width0,
 height0, depth0, num_samples,
-- 
2.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] i965/gen8+: Add aux buffer alignment assertions

2015-05-28 Thread Ben Widawsky
This helped find the incorrect HALIGN values from the previous patches.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 672fc70..c8965db 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -178,6 +178,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
if (mt->mcs_mt) {
   aux_mt = mt->mcs_mt;
   aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
+  assert(brw->gen < 9 || mt->align_w == 16);
+  assert(brw->gen < 8 || mt->num_samples > 0 || mt->align_w == 16);
}
 
uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
@@ -391,6 +393,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
if (mt->mcs_mt) {
   aux_mt = mt->mcs_mt;
   aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
+  assert(brw->gen < 9 || mt->align_w == 16);
+  assert(brw->gen < 8 || mt->num_samples > 0 || mt->align_w == 16);
}
 
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
-- 
2.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces

2015-05-28 Thread Ben Widawsky
This restriction was attempted in this commit:
commit 47053464630888f819ef8cc44278f1a1220159b9
Author: Anuj Phogat 
Date:   Fri Feb 13 11:21:21 2015 -0800

   i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT

However, the commit itself doesn't achieve the desired goal as determined by the
asserts which the next patch adds. mcs_mt is never a value because we're in the
process of allocating the mcs_mt miptree when we get to this function. I didn't
check, but perhaps this would work with blorp, however, meta clears allocate the
miptree structure (which AFAICT needs the alignment also) way before it
allocates using meta clears where the renderbuffer is allocated way before the
aux buffer.

The restriction is referenced in a few places, but the most concise one [IMO]
from the spec is for Gen9. Gen8 8 loosens the restriction in that it only
requires this for non-msrt surface.

   When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 must
   be used.

With the code before the miptree layout flag rework (patches preceding this),
accomplishing this workaround is very difficult.

Cc: Anuj Phogat 
Cc: Neil Roberts 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 +++-
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 72b02a2..b51c7c7 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -41,8 +41,13 @@
 
 static unsigned int
 intel_horizontal_texture_alignment_unit(struct brw_context *brw,
-struct intel_mipmap_tree *mt)
+struct intel_mipmap_tree *mt,
+uint32_t layout_flags)
 {
+
+   if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16)
+  return 16;
+
/**
 * From the "Alignment Unit Size" section of various specs, namely:
 * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
@@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context 
*brw,
if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
   return 8;
 
-   if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
-  return 16;
-
return 4;
 }
 
@@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
 }
 
 void
-brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
+brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt,
+   uint32_t layout_flags)
 {
bool multisampled = mt->num_samples > 1;
bool gen6_hiz_or_stencil = false;
@@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct 
intel_mipmap_tree *mt)
  mt->align_w = 128 / mt->cpp;
  mt->align_h = 32;
   }
+  assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
} else {
-  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
+  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt, 
layout_flags);
   mt->align_h =
  intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
}
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 75ee19a..a1ac0cf 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw,
if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
   mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 
-   brw_miptree_layout(brw, mt);
+   if (intel_miptree_is_fast_clear_capable(brw, mt))
+  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+
+   brw_miptree_layout(brw, mt, layout_flags);
 
if (mt->disable_aux_buffers)
   assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
@@ -722,6 +725,7 @@ intel_miptree_create(struct brw_context *brw,
 
 
if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
+  assert(mt->num_samples > 1);
   if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
  intel_miptree_release(&mt);
  return NULL;
@@ -734,8 +738,10 @@ intel_miptree_create(struct brw_context *brw,
 * clear actually occurs.
 */
if (intel_is_non_msrt_mcs_tile_supported(brw, mt->tiling) &&
-   intel_miptree_is_fast_clear_capable(brw, mt))
+   intel_miptree_is_fast_clear_capable(brw, mt)) {
   mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
+  assert(brw->gen < 8 || mt->align_w == 16);
+   }
 
return mt;
 }
@@ -1446,6 +1452,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
unsigned mcs_height =
   ALIGN(mt->logical_height0, height_divisor) / height_divisor;
assert(mt->logical_depth0 == 1);
+   uint32_t layout_flags 

[Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-28 Thread Ben Widawsky
I think pretty much everyone agrees that having more than a single bool as a
function argument is bordering on a bad idea. What sucks about the current
code is in several instances it's necessary to propagate these boolean
selections down to lower layers of the code. This requires plumbing (mechanical,
but still churn) pretty much all of the miptree functions each time.  By
introducing the flags paramater, it is possible to add miptree constraints very
easily.

The use of this, as is already the case, is sometimes we have some information
at the time we create the miptree that needs to be known all the way at the
lowest levels of the create/allocation, disable_aux_buffers is currently one
such example. There will be another example coming up in a few patches.

Cc: Chad Versace 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
 src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
 src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
 src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
 src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
 8 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index aebed72..1b3a72f 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context 
*ctx,
  image->height,
  1,
  image->pitch,
- true /*disable_aux_buffers*/);
+ MIPTREE_LAYOUT_DISABLE_AUX);
if (!irb->mt)
   return;
 
@@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
  intel_image->base.Base.Level,
  intel_image->base.Base.Level,
  width, height, depth,
- true,
  irb->mt->num_samples,
  INTEL_MIPTREE_TILING_ANY,
- false);
+ MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
 
if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
   intel_miptree_alloc_hiz(brw, new_mt);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 24a5c3d..b243f8a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
 GLuint width0,
 GLuint height0,
 GLuint depth0,
-bool for_bo,
 GLuint num_samples,
-bool force_all_slices_at_each_lod,
-bool disable_aux_buffers)
+uint32_t layout_flags)
 {
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
if (!mt)
@@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->logical_height0 = height0;
mt->logical_depth0 = depth0;
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
-   mt->disable_aux_buffers = disable_aux_buffers;
+   mt->disable_aux_buffers = !!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX);
exec_list_make_empty(&mt->hiz_map);
 
/* The cpp is bytes per (1, blockheight)-sized block for compressed
@@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->physical_height0 = height0;
mt->physical_depth0 = depth0;
 
-   if (!for_bo &&
+   if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
_mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
(brw->must_use_separate_stencil ||
(brw->has_separate_stencil &&
  intel_miptree_wants_hiz_buffer(brw, mt {
-  const bool force_all_slices_at_each_lod = brw->gen == 6;
+  uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+  if (brw->gen == 6)
+ stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
   mt->stencil_mt = intel_miptree_create(brw,
 mt->target,
 MESA_FORMAT_S_UINT8,
@@ -436,10 +436,10 @@ intel_miptree_create_layout(struct brw_context *brw,
 mt->logical_width0,
 mt->logical_height0,
 mt->logical_depth0,
-true,
   

[Mesa-dev] [PATCH 0/6] Begin reworking mipmap tree layout + HALIGN fixes gen8+

2015-05-28 Thread Ben Widawsky
Initially I wanted to just fix our boats HALIGN code for mcs surfaces on gen8+
(the 5th patch in the series). However, I soon realized why the original
implementation was wrong... the mipmap tree layout/creation is very entangled.

This patch series' primary purpose is to fix the HALIGN constraints for gen8+,
but it begins to rework the mipmap tree code (patch 1, and 3). I have more
patches which further clean things up, but as they do not actually fix anything,
I'll work on them and send them later/separately.

We have data that this fixes some number of hangs (not all) on SKL
(https://bugs.freedesktop.org/show_bug.cgi?id=90561). I don't remember seeing
any fixes on BDW, but I do know reverting Anuj's original implementation does
nothing to piglit either.

Cc: Anuj Phogat 
Cc: Jordan Justen 

Ben Widawsky (6):
  i965: Consolidate certain miptree params to flags
  i965/gen9: Only allow Y-Tiled MCS buffers
  i965: Extract tiling from fast clear decision
  i965/gen8: Correct HALIGN for AUX surfaces
  i965/gen9: Set HALIGN_16 for all aux buffers
  i965/gen8+: Add aux buffer alignment assertions

 src/mesa/drivers/dri/i965/brw_tex_layout.c |  16 +--
 src/mesa/drivers/dri/i965/gen8_surface_state.c |   4 +
 src/mesa/drivers/dri/i965/intel_fbo.c  |   5 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 137 +++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  24 +++--
 src/mesa/drivers/dri/i965/intel_pixel_draw.c   |   2 +-
 src/mesa/drivers/dri/i965/intel_tex.c  |   8 +-
 src/mesa/drivers/dri/i965/intel_tex.h  |   2 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c|  14 ++-
 src/mesa/drivers/dri/i965/intel_tex_validate.c |   3 +-
 10 files changed, 127 insertions(+), 88 deletions(-)

-- 
2.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] i965/gen9: Only allow Y-Tiled MCS buffers

2015-05-28 Thread Ben Widawsky
For GEN9, much of the logic to use X-Tiled buffers has been stripped out. It is
still supported in some places, but it's never desirable. Unfortunately we don't
yet have the ability to have Y-Tiled scanout (see:
http://patchwork.freedesktop.org/patch/46984/),

NOTE: This patch shouldn't actually do anything since SKL doesn't yet use fast
clears, and that's the only case we can get to this function (by way of
intel_update_winsys_renderbuffer_miptree)

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b243f8a..68d405c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -193,6 +193,8 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context 
*brw,
   return false;
}
 
+   if (brw->gen >= 9 && mt->tiling != I915_TILING_Y)
+  return false;
if (mt->tiling != I915_TILING_X &&
mt->tiling != I915_TILING_Y)
   return false;
-- 
2.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] i965: Extract tiling from fast clear decision

2015-05-28 Thread Ben Widawsky
There are several constraints when determining if one can fast clear a surface.
Some of these are alignment, pixel density, tiling formats, and others that vary
by generation. The helper function which exists today does a suitable job,
however it conflates "BO properties" with "Miptree properties" when using
tiling. I consider the former to be attributes of the physical surface, things
which are determined through BO allocation, and the latter being attributes
which are derived from the API, and having nothing to do with the underlying
surface.

Tiling properties are a distinct operation from the creation of a miptree, and
by removing this, we gain flexibility throughout the code to make determinations
about when we can or cannot fast clear strictly on the miptree.

To signify this change, I've also renamed the function to indicate it is a
distinction made on the miptree. I am torn as to whether or not it was a good
idea to remove "non_msrt" since it's a really nice thing for grep.

Cc: Chad Versace 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 +++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 11 
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 68d405c..75ee19a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -158,15 +158,33 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
}
 }
 
+/**
+ * Determine the BO backing the miptree has a suitable tile format. For Gen7,
+ * and 8 this means any tiled format.
+ *
+ * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
+ * beneath the "Fast Color Clear" bullet (p326):
+ *
+ * - Support is limited to tiled render targets.
+ */
+bool
+intel_is_non_msrt_mcs_tile_supported(struct brw_context *brw,
+ unsigned tiling)
+{
+   if (brw->gen >= 9)
+  return tiling == I915_TILING_Y;
+
+   return tiling != I915_TILING_NONE;
+}
 
 /**
  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
- * can be used.
+ * can be used. This doesn't (and should not) inquire about the BO properties 
of
+ * the buffer.
  *
  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
  * beneath the "Fast Color Clear" bullet (p326):
  *
- * - Support is limited to tiled render targets.
  * - Support is for non-mip-mapped and non-array surface types only.
  *
  * And then later, on p327:
@@ -175,8 +193,8 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
  *   64bpp, and 128bpp.
  */
 bool
-intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
-   struct intel_mipmap_tree *mt)
+intel_miptree_is_fast_clear_capable(struct brw_context *brw,
+struct intel_mipmap_tree *mt)
 {
/* MCS support does not exist prior to Gen7 */
if (brw->gen < 7)
@@ -193,11 +211,6 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context 
*brw,
   return false;
}
 
-   if (brw->gen >= 9 && mt->tiling != I915_TILING_Y)
-  return false;
-   if (mt->tiling != I915_TILING_X &&
-   mt->tiling != I915_TILING_Y)
-  return false;
if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
   return false;
if (mt->first_level != 0 || mt->last_level != 0)
@@ -720,7 +733,8 @@ intel_miptree_create(struct brw_context *brw,
 * Allocation of the MCS miptree will be deferred until the first fast
 * clear actually occurs.
 */
-   if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
+   if (intel_is_non_msrt_mcs_tile_supported(brw, mt->tiling) &&
+   intel_miptree_is_fast_clear_capable(brw, mt))
   mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 
return mt;
@@ -819,7 +833,8 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context 
*intel,
 * Allocation of the MCS miptree will be deferred until the first fast
 * clear actually occurs.
 */
-   if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
+   if (intel_is_non_msrt_mcs_tile_supported(intel, singlesample_mt->tiling) &&
+   intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
   singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 
if (num_samples == 0) {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 4722353..ccb6d72 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -514,15 +514,16 @@ enum intel_miptree_tiling_mode {
INTEL_MIPTREE_TILING_NONE,
 };
 
-bool
-intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
-   struct intel_mipmap_tree *mt);
-
 void
 intel_get_non_msrt_mcs_alignment(struct brw_c

Re: [Mesa-dev] [PATCH] gallivm: Use the LLVM's C disassembly interface.

2015-05-28 Thread Roland Scheidegger
Looks ok, it's massively simpler and shouldn't break as often.

Reviewed-by: Roland Scheidegger 

Am 28.05.2015 um 17:57 schrieb Jose Fonseca:
> It doesn't do everything we want.  In particular it doesn't allow to
> detect jumps or return opcodes.  Currently we detect the x86's RET
> opcode.
> 
> Even though it's worse for LLVM 3.3, it's an improvement for LLVM 3.7,
> which was totally busted.
> ---
>  scons/llvm.py  |   4 +-
>  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 260 
> -
>  2 files changed, 40 insertions(+), 224 deletions(-)
> 
> diff --git a/scons/llvm.py b/scons/llvm.py
> index 17278df..c59b8cb 100644
> --- a/scons/llvm.py
> +++ b/scons/llvm.py
> @@ -120,6 +120,7 @@ def generate(env):
>  ])
>  elif llvm_version >= distutils.version.LooseVersion('3.5'):
>  env.Prepend(LIBS = [
> +'LLVMMCDisassembler',
>  'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld',
>  'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen',
>  'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMX86Desc',
> @@ -132,6 +133,7 @@ def generate(env):
>  ])
>  else:
>  env.Prepend(LIBS = [
> +'LLVMMCDisassembler',
>  'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
>  'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
>  'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
> @@ -189,7 +191,7 @@ def generate(env):
>  if '-fno-rtti' in cxxflags:
>  env.Append(CXXFLAGS = ['-fno-rtti'])
>  
> -components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter']
> +components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 
> 'mcdisassembler']
>  
>  env.ParseConfig('llvm-config --libs ' + ' '.join(components))
>  env.ParseConfig('llvm-config --ldflags')
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> index 76c302f..64fb044 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> @@ -28,40 +28,12 @@
>  #include 
>  
>  #include 
> -#include 
> -#include 
> +#include 
>  #include 
>  #include 
> -
> -#if HAVE_LLVM >= 0x0306
> -#include 
> -#else
> -#include 
> -#endif
> -
> -#include 
> -#include 
> -
>  #include 
> -
>  #include 
>  
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -
> -#if HAVE_LLVM >= 0x0305
> -#define OwningPtr std::unique_ptr
> -#else
> -#include 
> -#endif
> -
> -#if HAVE_LLVM >= 0x0305
> -#include 
> -#endif
> -
>  #include "util/u_math.h"
>  #include "util/u_debug.h"
>  
> @@ -133,7 +105,7 @@ lp_get_module_id(LLVMModuleRef module)
>  extern "C" void
>  lp_debug_dump_value(LLVMValueRef value)
>  {
> -#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || 
> defined(PIPE_OS_EMBDDED)
> +#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || 
> defined(PIPE_OS_EMBEDDED)
> raw_debug_ostream os;
> llvm::unwrap(value)->print(os);
> os.flush();
> @@ -143,44 +115,16 @@ lp_debug_dump_value(LLVMValueRef value)
>  }
>  
>  
> -#if HAVE_LLVM < 0x0306
> -
> -/*
> - * MemoryObject wrapper around a buffer of memory, to be used by MC
> - * disassembler.
> - */
> -class BufferMemoryObject:
> -   public llvm::MemoryObject
> +static const char *
> +disassemblerSymbolLookupCB(void *DisInfo,
> +   uint64_t ReferenceValue,
> +   uint64_t *ReferenceType,
> +   uint64_t ReferencePC,
> +   const char **ReferenceName)
>  {
> -private:
> -   const uint8_t *Bytes;
> -   uint64_t Length;
> -public:
> -   BufferMemoryObject(const uint8_t *bytes, uint64_t length) :
> -  Bytes(bytes), Length(length)
> -   {
> -   }
> -
> -   uint64_t getBase() const
> -   {
> -  return 0;
> -   }
> -
> -   uint64_t getExtent() const
> -   {
> -  return Length;
> -   }
> -
> -   int readByte(uint64_t addr, uint8_t *byte) const
> -   {
> -  if (addr > getExtent())
> - return -1;
> -  *byte = Bytes[addr];
> -  return 0;
> -   }
> -};
> -
> -#endif /* HAVE_LLVM < 0x0306 */
> +   // TODO: Maybe this can be used to guess jumps
> +   return NULL;
> +}
>  
>  
>  /*
> @@ -193,8 +137,6 @@ public:
>  static size_t
>  disassemble(const void* func, llvm::raw_ostream & Out)
>  {
> -   using namespace llvm;
> -
> const uint8_t *bytes = (const uint8_t *)func;
>  
> /*
> @@ -202,101 +144,23 @@ disassemble(const void* func, llvm::raw_ostream & Out)
>  */
> const uint64_t extent = 96 * 1024;
>  
> -   uint64_t max_pc = 0;
> -
> /*
>  * Initialize all used objects.
>  */
>  
> -   std::string Triple = sys::getDefaultTargetTriple();
> -
> -   std::string Error;
> -   const Target *T = TargetRegistry::lookupTarget(Triple, Error);
> 

[Mesa-dev] [PATCH] i965/vec4: Fix the source register for indexed samplers

2015-05-28 Thread Neil Roberts
Previously when setting up the sample instruction for an indirect
sampler the vec4 backend was directly passing the pseudo opcode's
src0. However this isn't actually set to a valid register because
instead the MRF registers are used as the source so it would end up
passing null as src0.

This patch makes it call gen6_resolve_implied_move before setting up
the indirect message. This is similar to what is done for constant
sampler numbers in brw_SAMPLE.

The Piglit tests for sampler array indexing didn't pick this up
because they were using a texture with a solid colour so it didn't
matter what texture coordinates were actually used. I've posted a
patch for Piglit to make the tests more thorough here:

http://lists.freedesktop.org/archives/piglit/2015-May/016127.html

With that patch the tests for gs and vs are currently failing on
Ivybridge, but this patch fixes them. There are no other changes to a
Piglit run on Ivybridge.

On Skylake the gs tests were failing even without the Piglit patch
because Skylake needs the source registers to work correctly in order
to send a message header to select SIMD4x2 mode.
---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index ef77b8d..20d096c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -422,6 +422,9 @@ vec4_generator::generate_tex(vec4_instruction *inst,
 
   brw_pop_insn_state(p);
 
+  if (inst->base_mrf != -1)
+ gen6_resolve_implied_move(p, &src, inst->base_mrf);
+
   /* dst = send(offset, a0.0 | ) */
   brw_inst *insn = brw_send_indirect_message(
  p, BRW_SFID_SAMPLER, dst, src, addr);
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] i965: Extract tiling from fast clear decision

2015-05-28 Thread Jordan Justen
On 2015-05-28 10:21:31, Ben Widawsky wrote:
> There are several constraints when determining if one can fast clear a 
> surface.
> Some of these are alignment, pixel density, tiling formats, and others that 
> vary
> by generation. The helper function which exists today does a suitable job,
> however it conflates "BO properties" with "Miptree properties" when using
> tiling. I consider the former to be attributes of the physical surface, things
> which are determined through BO allocation, and the latter being attributes
> which are derived from the API, and having nothing to do with the underlying
> surface.
> 
> Tiling properties are a distinct operation from the creation of a miptree, and
> by removing this, we gain flexibility throughout the code to make 
> determinations
> about when we can or cannot fast clear strictly on the miptree.
> 
> To signify this change, I've also renamed the function to indicate it is a
> distinction made on the miptree. I am torn as to whether or not it was a good
> idea to remove "non_msrt" since it's a really nice thing for grep.
> 
> Cc: Chad Versace 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 
> +++
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 11 
>  2 files changed, 32 insertions(+), 16 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 68d405c..75ee19a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -158,15 +158,33 @@ intel_get_non_msrt_mcs_alignment(struct brw_context 
> *brw,
> }
>  }
>  
> +/**
> + * Determine the BO backing the miptree has a suitable tile format. For Gen7,

"Determine if" ...

I kind of thought patch 2 & 3 could be swapped, but it doesn't seem
too important.

1-3 Reviewed-by: Jordan Justen 

> + * and 8 this means any tiled format.
> + *
> + * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 
> Target(s)",
> + * beneath the "Fast Color Clear" bullet (p326):
> + *
> + * - Support is limited to tiled render targets.
> + */
> +bool
> +intel_is_non_msrt_mcs_tile_supported(struct brw_context *brw,
> + unsigned tiling)
> +{
> +   if (brw->gen >= 9)
> +  return tiling == I915_TILING_Y;
> +
> +   return tiling != I915_TILING_NONE;
> +}
>  
>  /**
>   * For a single-sampled render target ("non-MSRT"), determine if an MCS 
> buffer
> - * can be used.
> + * can be used. This doesn't (and should not) inquire about the BO 
> properties of
> + * the buffer.
>   *
>   * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 
> Target(s)",
>   * beneath the "Fast Color Clear" bullet (p326):
>   *
> - * - Support is limited to tiled render targets.
>   * - Support is for non-mip-mapped and non-array surface types only.
>   *
>   * And then later, on p327:
> @@ -175,8 +193,8 @@ intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
>   *   64bpp, and 128bpp.
>   */
>  bool
> -intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
> -   struct intel_mipmap_tree *mt)
> +intel_miptree_is_fast_clear_capable(struct brw_context *brw,
> +struct intel_mipmap_tree *mt)
>  {
> /* MCS support does not exist prior to Gen7 */
> if (brw->gen < 7)
> @@ -193,11 +211,6 @@ intel_is_non_msrt_mcs_buffer_supported(struct 
> brw_context *brw,
>return false;
> }
>  
> -   if (brw->gen >= 9 && mt->tiling != I915_TILING_Y)
> -  return false;
> -   if (mt->tiling != I915_TILING_X &&
> -   mt->tiling != I915_TILING_Y)
> -  return false;
> if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
>return false;
> if (mt->first_level != 0 || mt->last_level != 0)
> @@ -720,7 +733,8 @@ intel_miptree_create(struct brw_context *brw,
>  * Allocation of the MCS miptree will be deferred until the first fast
>  * clear actually occurs.
>  */
> -   if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
> +   if (intel_is_non_msrt_mcs_tile_supported(brw, mt->tiling) &&
> +   intel_miptree_is_fast_clear_capable(brw, mt))
>mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
>  
> return mt;
> @@ -819,7 +833,8 @@ intel_update_winsys_renderbuffer_miptree(struct 
> brw_context *intel,
>  * Allocation of the MCS miptree will be deferred until the first fast
>  * clear actually occurs.
>  */
> -   if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
> +   if (intel_is_non_msrt_mcs_tile_supported(intel, singlesample_mt->tiling) 
> &&
> +   intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
>singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
>  
> if (num_samples == 0) {
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> b/src/mesa/drivers/dri/i965/intel_m

Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces

2015-05-28 Thread Jordan Justen
On 2015-05-28 10:21:32, Ben Widawsky wrote:
> This restriction was attempted in this commit:
> commit 47053464630888f819ef8cc44278f1a1220159b9
> Author: Anuj Phogat 
> Date:   Fri Feb 13 11:21:21 2015 -0800
> 
>i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT
> 
> However, the commit itself doesn't achieve the desired goal as determined by 
> the
> asserts which the next patch adds.

next-ish :)

> mcs_mt is never a value because we're in the
> process of allocating the mcs_mt miptree when we get to this function. I 
> didn't
> check, but perhaps this would work with blorp, however, meta clears allocate 
> the
> miptree structure (which AFAICT needs the alignment also) way before it
> allocates using meta clears where the renderbuffer is allocated way before the
> aux buffer.
> 
> The restriction is referenced in a few places, but the most concise one [IMO]
> from the spec is for Gen9. Gen8 8 loosens the restriction in that it only

bonus 8

4-6 Reviewed-by: Jordan Justen 

> requires this for non-msrt surface.
> 
>When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 
> must
>be used.
> 
> With the code before the miptree layout flag rework (patches preceding this),
> accomplishing this workaround is very difficult.
> 
> Cc: Anuj Phogat 
> Cc: Neil Roberts 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 +++-
>  3 files changed, 25 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
> b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> index 72b02a2..b51c7c7 100644
> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> @@ -41,8 +41,13 @@
>  
>  static unsigned int
>  intel_horizontal_texture_alignment_unit(struct brw_context *brw,
> -struct intel_mipmap_tree *mt)
> +struct intel_mipmap_tree *mt,
> +uint32_t layout_flags)
>  {
> +
> +   if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16)
> +  return 16;
> +
> /**
>  * From the "Alignment Unit Size" section of various specs, namely:
>  * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
> @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context 
> *brw,
> if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
>return 8;
>  
> -   if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
> -  return 16;
> -
> return 4;
>  }
>  
> @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
>  }
>  
>  void
> -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
> +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt,
> +   uint32_t layout_flags)
>  {
> bool multisampled = mt->num_samples > 1;
> bool gen6_hiz_or_stencil = false;
> @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct 
> intel_mipmap_tree *mt)
>   mt->align_w = 128 / mt->cpp;
>   mt->align_h = 32;
>}
> +  assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
> } else {
> -  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
> +  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt, 
> layout_flags);
>mt->align_h =
>   intel_vertical_texture_alignment_unit(brw, mt->format, 
> multisampled);
> }
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 75ee19a..a1ac0cf 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw,
> if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
>mt->array_layout = ALL_SLICES_AT_EACH_LOD;
>  
> -   brw_miptree_layout(brw, mt);
> +   if (intel_miptree_is_fast_clear_capable(brw, mt))
> +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
> +
> +   brw_miptree_layout(brw, mt, layout_flags);
>  
> if (mt->disable_aux_buffers)
>assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
> @@ -722,6 +725,7 @@ intel_miptree_create(struct brw_context *brw,
>  
>  
> if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
> +  assert(mt->num_samples > 1);
>if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
>   intel_miptree_release(&mt);
>   return NULL;
> @@ -734,8 +738,10 @@ intel_miptree_create(struct brw_context *brw,
>  * clear actually occurs.
>  */
> if (intel_is_non_msrt_mcs_tile_supported(brw, mt->tiling) &&
> -   intel_miptree_is_fast_clear_capable(brw, mt))
> +   intel_miptree_is_fast_clear_capable(brw, mt)) {
>mt->fast_cl

[Mesa-dev] Ideas on loop unrolling, loop examples, and my GSoC-blog

2015-05-28 Thread Thomas Helland
Hi everyone,

For those interested I will be writing a blog at:
https://hellthom.wordpress.com/
where I will be documenting my progress in GSoC (at least bi-weekly).
It also has some info about me in the "about" section.
If there is anything else you want to know, or think I should add, just holla.

Now for the actual reason for the mail. I'm seeking opinions and thoughts on the
loop unrolling part of my GSoC project.

First, to get started, are some examples of loops from my shader-db:

for (i = -fsize.x; i <= fsize.x; i++)
{{
color += texture2D(tex, tc + i * texscale.xy, 0.0);
}} // EndBox1/2.


for (i.x = -fsize.x; i.x <= fsize.x; i.x++)
{
for (i.y = -fsize.y; i.y <= fsize.y; i.y++)
{
color += texture2D(tex, tc + i * texscale.xy, 0.0);
}} // EndBox1/2.


for (int rep1 = 0; rep1 < ps_i0.x; rep1++) {
t1_ps.zw = (t0_ps.yy * t1_ps.xy) + t0_ps.xz;
t3_ps = texture2D(g_samScreenImage, t1_ps.zw);
t2_ps.xyz = t2_ps.xyz + t3_ps.xyz;
t0_ps.y = t0_ps.y + ps_c0.x;
}


for (int i = 0; i < LIGHT_COUNT; i++){
vec3 lightVec = invRadius[i] * (lightPos[i] - gl_Vertex.xyz);

#ifdef SHADOWS
shadowVec[i] = -lightVec;
#endif
lVec[i].x = dot(lightVec, tangent);
lVec[i].y = dot(lightVec, binormal);
lVec[i].z = dot(lightVec, normal);
}


for(i = 0.0, f = 1.0; i < ScaleSteps.w; ++i, f *= 0.5)
   RT += OffsetVector * (step(dp_textureGrad(Texture_Normal, RT.xy,
dPdx, dPdy).a, RT.z) * f - 0.5 * f);


for (int l_3 = 0; l_3 < 28; l_3++) {
vec4 tmpvar_10;
tmpvar_10.xy = vec2(1.2, 1.2);
tmpvar_10.zw = DiscKernel[l_3].zz;
vec4 tmpvar_11;
tmpvar_11 = (tmpvar_1.xyxy + ((DiscKernel[l_3].xyxy *
poissonScale_5.xyxy) / tmpvar_10));
vec4 tmpvar_12;
tmpvar_12 = texture2D (_MainTex, tmpvar_11.xy);
vec4 tmpvar_13;
tmpvar_13 = texture2D (_MainTex, tmpvar_11.zw);
if (((tmpvar_12.w + tmpvar_13.w) > 0.0)) {
  vec2 tmpvar_14;
  tmpvar_14.y = 1.0;
  tmpvar_14.x = (DiscKernel[l_3].z / 1.2);
  vec2 tmpvar_15;
  tmpvar_15.x = tmpvar_12.w;
  tmpvar_15.y = tmpvar_13.w;
  vec2 tmpvar_16;
  vec2 tmpvar_17;
  tmpvar_17 = clamp (((
(tmpvar_15 - (centerTap_7.ww * tmpvar_14))
   - vec2(-0.265, -0.265)) / vec2(0.265, 0.265)), 0.0, 1.0);
  tmpvar_16 = (tmpvar_17 * (tmpvar_17 * (3.0 -
(2.0 * tmpvar_17)
  )));
  sum_6 = (sum_6 + ((tmpvar_12 * tmpvar_16.x) + (tmpvar_13 * tmpvar_16.y)));
  sampleCount_4 = (sampleCount_4 + dot (tmpvar_16, vec2(1.0, 1.0)));
};
  };

Most of the loops are simple; array defines or lookups with a for loop.
No magic at all. Usually only one or two assignments in the loop body.
Then there are the nested loops from dota 2,
and there are also some cases of loops that have large bodies.
I have however not managed to grep up any loops apart from for-loops.
Seems for-loops is the goto loops, at least for my collection of shaders.

Some of the ideas and plans I've made so far:

Split the work into two parts:
First I will write a loop analysis pass. I think it makes sense to separate
this out into its own file, as the information it can gather can be usefull.
Things that come to my mind are loop invariant code motion
and strength reduction passes. Then i will write the actual
unroll pass when the analysis pass is done.

Add some new data to the metadata system.
At the very least an is_invariant variable. This should track if the
ssa-value is loop invariant. To avoid the issue with nested loops a
ssa-variable marked as invariant is only invariant in the loop it is present.
They are invariant if the operands are all in the preceding block,
if the operands are invariant in the same loop as we are in,
or a combination of the two.

There should probably also be an is_induction_variable flag
in the metadata, as this will be shared between loop unrolling
and a possible strength reduction pass (and probably others too).
I need to think a bit more about this though.
There should probably be some kind of relation between basic
induction variables (int i) and derived variables.

On the subject of loop analysis, does anyone happen to know of
a good paper on induction variable detection in ssa?
Loop invariants are simple enough, but induction variables are
a bit harder. Luckily there seems to be only linear induction
variables as far as I can detect. The (int i = 0; i < max; i++)
pattern is recurring a lot, with the random case of variables
that all fit in the form i + i*a, where a is loop invariant.
So I don't think we need a to complicated algorithm.

For the case with nested loops I was thinking we might want to
go for unroll-and-jam. I think I might have managed to somehow
loose an important paper I had about nested loops, but there where
some other possibilities for handling nested loops that might be
worth taking a closer look at. I recall there being a better
performing solution than unroll-and-squash.
But this is a bit up and into the future still.

One more thing;
Is there a limit where the loop body gets so large that we
want to decide tha

Re: [Mesa-dev] [PATCH 1/2] configure.ac: enable building GLES1 and GLES2 by default

2015-05-28 Thread Matt Turner
On Thu, May 28, 2015 at 5:07 AM, Emil Velikov  wrote:
> On 27/05/15 16:59, Matt Turner wrote:
>> On Wed, May 27, 2015 at 4:53 AM, Emil Velikov  
>> wrote:
>>> On 27 May 2015 at 11:23, Dave Airlie  wrote:
> Wow, I hadn't expected such a hateful comment on GLES1.
>
> Does anyone else want to convince me that GLES1 should burn in hell?

 So I dug around,

 commit 4c06853833996d990eb76b195ca5d6838c6f3d6b
 Author: Adam Jackson 
 Date:   Wed May 8 18:03:21 2013 -0400

 Switch to Mesa master (pre 9.2)

 - Fix llvmpipe on big-endian and enable llvmpipe everywhere
 - Build vdpau drivers for r600/radeonsi/nouveau
 - Enable hardware floating-point texture support
 - Drop GLESv1, nothing's using it, let's not start

 So at least in Fedora 2 years ago, we realised there was no GLES1
 users in the distro,
 and we didn't want to encourage any.

 I suppose some users might exist outside the classic Linux distro
 world. i.e. android, embedded land.

>>> At least three other distros have GLES1 - Arch, Debian and OpenSUSE.
>>> So imho one should just leave the decision to depreciate/kill it off
>>> to the distros ?
>>
>> No one is suggesting deleting GLES1.
>>
> Ack - I'm aware of that. I'm not sure which part of my statement led
> came out as the opposite. If you can point me out as such that'll be
> appreciated.

The part I quoted specifically -- "So imho one should just leave the
decision to depreciate/kill it off to the distros ?"

Since the thread is about defaults... I have no idea what you mean.

>> I think distributions are going to inevitably ship things they have no
>> reason to, regardless of our defaults, often because they don't know.
>> The best we can do is guide their hands. If GLES1 is unused, we should
>> note that in configure.ac to better communicate it to the
>> distributions.
>>
> No objection on the hand-holding, but the idea that Mesa knows which
> distro does not ship/have GLES1 compatible software sounds a bit
> strange. So I was pointing out that some (be that by mistake or not)
> still have/use it.

Let me give an example from my experience as a Gentoo developer:

Cairo has a number of backends -- Image, Skia, Xlib, PDF, SVG, OpenGL,
GLESv2, DirectFB, OpenVG, DRM, Cogl, etc. Users don't have to
understand what any of these are in order to want them, and they'll
file bugs requesting we enable them [1]. Unless distribution
maintainers are aware that the features are not useful, they'll likely
just turn them on.

Cairo recently added some text to configure.ac to tell users that the
OpenGL and GLESv2 backends are experimental:

--- The OpenGL surface backend feature is still under active development and
--- is included in this release only as a preview. It does NOT fully work yet
--- and incompatible changes may yet be made to OpenGL surface backend
--- specific API.

> At the end of the day, do as you please. The argument of "distro X
> defaults to Y, so mesa should do the same", just seems strange to me.

I don't think anyone is making that argument either...

> Similar to Marek, I do wonder about the hostility towards GLES1. Sure it
> is a bit old, and the API is not the most flexible/useful but there are
> dozens (hundreds?) of projects that fit the criteria :)

You seem to be making the argument that we should build GLESv1 by
default because distributions ship it. I'm claiming that distributions
shipping it are doing so without understanding that there are zero
known uses of GLESv1 on desktop Linux.

That said, I don't really care whether GLES v1 or v2 are built by default.

[1] https://bugs.gentoo.org/show_bug.cgi?id=428770
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-28 Thread Anuj Phogat
On Thu, May 28, 2015 at 10:21 AM, Ben Widawsky
 wrote:
> I think pretty much everyone agrees that having more than a single bool as a
> function argument is bordering on a bad idea. What sucks about the current
> code is in several instances it's necessary to propagate these boolean
> selections down to lower layers of the code. This requires plumbing 
> (mechanical,
> but still churn) pretty much all of the miptree functions each time.  By
> introducing the flags paramater, it is possible to add miptree constraints 
> very
> easily.
>
> The use of this, as is already the case, is sometimes we have some information
> at the time we create the miptree that needs to be known all the way at the
> lowest levels of the create/allocation, disable_aux_buffers is currently one
> such example. There will be another example coming up in a few patches.
>
> Cc: Chad Versace 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 
> +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
>  src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
>  src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
>  src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
>  src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
>  src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
>  8 files changed, 63 insertions(+), 66 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> b/src/mesa/drivers/dri/i965/intel_fbo.c
> index aebed72..1b3a72f 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context 
> *ctx,
>   image->height,
>   1,
>   image->pitch,
> - true /*disable_aux_buffers*/);
> + MIPTREE_LAYOUT_DISABLE_AUX);
> if (!irb->mt)
>return;
>
> @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
> *brw,
>   intel_image->base.Base.Level,
>   intel_image->base.Base.Level,
>   width, height, depth,
> - true,
>   irb->mt->num_samples,
>   INTEL_MIPTREE_TILING_ANY,
> - false);
> + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
>
> if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
>intel_miptree_alloc_hiz(brw, new_mt);
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 24a5c3d..b243f8a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
>  GLuint width0,
>  GLuint height0,
>  GLuint depth0,
> -bool for_bo,
>  GLuint num_samples,
> -bool force_all_slices_at_each_lod,
> -bool disable_aux_buffers)
> +uint32_t layout_flags)
>  {
> struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
> if (!mt)
> @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
> mt->logical_height0 = height0;
> mt->logical_depth0 = depth0;
> mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
> -   mt->disable_aux_buffers = disable_aux_buffers;
> +   mt->disable_aux_buffers = !!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX);
> exec_list_make_empty(&mt->hiz_map);
>
> /* The cpp is bytes per (1, blockheight)-sized block for compressed
> @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
> mt->physical_height0 = height0;
> mt->physical_depth0 = depth0;
>
> -   if (!for_bo &&
> +   if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
> _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
> (brw->must_use_separate_stencil ||
> (brw->has_separate_stencil &&
>   intel_miptree_wants_hiz_buffer(brw, mt {
> -  const bool force_all_slices_at_each_lod = brw->gen == 6;
> +  uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
> +  if (brw->gen == 6)
> + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
>mt->stencil_mt = intel_miptree_create(brw,
>  mt->target,
>  MESA_FORMAT_S_UINT8,
> @@ -436,10 +436,10 @@ intel_miptree_create_layout(struct brw_context *brw,
>   

Re: [Mesa-dev] [PATCH 2/5] mesa/es3.1: Enable ES 3.1 API and shading language version

2015-05-28 Thread Kenneth Graunke
On Wednesday, May 27, 2015 11:18:13 AM Ian Romanick wrote:
> On 05/27/2015 10:36 AM, Tapani wrote:
> > On 05/26/2015 10:37 PM, Ian Romanick wrote:
> >> From: Ian Romanick 
> >>
> >> This is a bit of a hack for now.  Several of the extensions required for
> >> OpenGL ES 3.1 have no support, at all, in Mesa.  However, with this
> >> patch and a patch to allow MESA_GL_VERSION_OVERRIDE to work with ES
> >> contexts, people can begin testing the ES "version" of the functionality
> >> that is supported.
> >>
> >> Signed-off-by: Ian Romanick 
> >> ---
> >>   src/mesa/main/getstring.c | 16 
> >>   src/mesa/main/version.c   | 18 +-
> >>   2 files changed, 29 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
> >> index 1b2c7f0..72d99ca 100644
> >> --- a/src/mesa/main/getstring.c
> >> +++ b/src/mesa/main/getstring.c
> >> @@ -72,10 +72,18 @@ shading_language_version(struct gl_context *ctx)
> >> break;
> >>case API_OPENGLES2:
> >> -  return (ctx->Version < 30)
> >> - ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"
> >> - : (const GLubyte *) "OpenGL ES GLSL ES 3.00";
> >> -
> >> +  switch (ctx->Version) {
> >> +  case 20:
> >> + return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16";
> > 
> > There's revision 17 (1.0.17) out there, should we have it here or rather
> > just 1.0?
> 
> We could bump it or leave it.  I don't think it matters much either way.
>  Section 6.1.5 (String Queries) of the OpenGL ES 2.0.25 spec says:
> 
> "The SHADING_LANGUAGE_VERSION string is laid out as follows:
> 
> "OpenGL ES GLSL ES N.M vendor-specific information"
> 
> The version number is either of the form major_number.minor_number
> or major_number.minor_number.release_number, where the numbers all
> have one or more digits. The release number and vendor specific
> information are optional.  However, if present, then they pertain
> to the server and their format and contents are implementation-
> dependent."

I think I'd rather just advertise 1.0 - I don't think 1.0.X communicates
any real useful information to applications.  We don't do that for
regular GLSL.

*shrug*

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] softpipe: fix offset wrapping calculations (v2)

2015-05-28 Thread Dave Airlie
Roland pointed out my previous attempt was lacking, so I enhanced the
texwrap piglit test, and tested them. This fixes the offset calculations
in a number of areas by adding the offset first, it also fixes the fastpaths,
which I forgot to address in the previous commit.

v2: try and avoid divides in most paths, the repeat mirror path
really was ugly no matter which way I went, so I left it having
the divide.
Also fix the gather lod calculation bug.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 146 +--
 1 file changed, 68 insertions(+), 78 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 4ac3498..1010b63 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -145,14 +145,14 @@ wrap_nearest_clamp(float s, unsigned size, int offset, 
int *icoord)
 {
/* s limited to [0,1] */
/* i limited to [0,size-1] */
+   s *= size;
+   s += offset;
if (s <= 0.0F)
   *icoord = 0;
-   else if (s >= 1.0F)
+   else if (s >= size)
   *icoord = size - 1;
else
-  *icoord = util_ifloor(s * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
+  *icoord = util_ifloor(s);
 }
 
 
@@ -161,17 +161,18 @@ wrap_nearest_clamp_to_edge(float s, unsigned size, int 
offset, int *icoord)
 {
/* s limited to [min,max] */
/* i limited to [0, size-1] */
-   const float min = 1.0F / (2.0F * size);
-   const float max = 1.0F - min;
+   const float min = 0.5F;
+   const float max = (float)size - 0.5F;
+
+   s *= size;
+   s += offset;
 
if (s < min)
   *icoord = 0;
else if (s > max)
   *icoord = size - 1;
else
-  *icoord = util_ifloor(s * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
+  *icoord = util_ifloor(s);
 }
 
 
@@ -180,26 +181,30 @@ wrap_nearest_clamp_to_border(float s, unsigned size, int 
offset, int *icoord)
 {
/* s limited to [min,max] */
/* i limited to [-1, size] */
-   const float min = -1.0F / (2.0F * size);
-   const float max = 1.0F - min;
+   const float min = -0.5F;
+   const float max = size + 0.5F;
+
+   s *= size;
+   s += offset;
if (s <= min)
   *icoord = -1;
else if (s >= max)
   *icoord = size;
else
-  *icoord = util_ifloor(s * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
+  *icoord = util_ifloor(s);
 }
 
-
 static void
 wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
 {
const float min = 1.0F / (2.0F * size);
const float max = 1.0F - min;
-   const int flr = util_ifloor(s);
-   float u = frac(s);
+   int flr;
+   float u;
+
+   s += (float)offset / size;
+   flr = util_ifloor(s);
+   u = frac(s);
if (flr & 1)
   u = 1.0F - u;
if (u < min)
@@ -208,8 +213,6 @@ wrap_nearest_mirror_repeat(float s, unsigned size, int 
offset, int *icoord)
   *icoord = size - 1;
else
   *icoord = util_ifloor(u * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
 }
 
 
@@ -218,15 +221,13 @@ wrap_nearest_mirror_clamp(float s, unsigned size, int 
offset, int *icoord)
 {
/* s limited to [0,1] */
/* i limited to [0,size-1] */
-   const float u = fabsf(s);
+   const float u = fabsf(s * size + offset);
if (u <= 0.0F)
   *icoord = 0;
-   else if (u >= 1.0F)
+   else if (u >= size)
   *icoord = size - 1;
else
-  *icoord = util_ifloor(u * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
+  *icoord = util_ifloor(u);
 }
 
 
@@ -235,36 +236,33 @@ wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, 
int offset, int *icoor
 {
/* s limited to [min,max] */
/* i limited to [0, size-1] */
-   const float min = 1.0F / (2.0F * size);
-   const float max = 1.0F - min;
-   const float u = fabsf(s);
+   const float min = 0.5F;
+   const float max = (float)size - 0.5F;
+   const float u = fabsf(s * size + offset);
+
if (u < min)
   *icoord = 0;
else if (u > max)
   *icoord = size - 1;
else
-  *icoord = util_ifloor(u * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
+  *icoord = util_ifloor(u);
 }
 
 
 static void
 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int 
*icoord)
 {
-   /* s limited to [min,max] */
-   /* i limited to [0, size-1] */
-   const float min = -1.0F / (2.0F * size);
-   const float max = 1.0F - min;
-   const float u = fabsf(s);
+   /* u limited to [-0.5, size-0.5] */
+   const float min = -0.5F;
+   const float max = (float)size + 0.5F;
+   const float u = fabsf(s * size + offset);
+
if (u < min)
   *icoord = -1;
else if (u > max)
   *icoord = size;
else
-  *icoord = util_ifloor(u * size);
-   if (offset)
-  *icoord = CLAMP(*icoord + offset, 0, size - 1);
+  *icoord = util_ifloor(u);
 }
 
 
@@ -29

[Mesa-dev] PIPE_SHADER_* vs TGSI_PROCESSOR_*

2015-05-28 Thread Marek Olšák
Hi,

Would people be okay with removing the TGSI_PROCESSOR_ enums and
replacing all usages with PIPE_SHADER_*?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] mesa: use _mesa_has_geometry_shader in get_programiv

2015-05-28 Thread Marek Olšák
From: Marek Olšák 

---
 src/mesa/main/shaderapi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index a04b287..e06942b 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -532,7 +532,7 @@ get_programiv(struct gl_context *ctx, GLuint program, 
GLenum pname,
/* True if geometry shaders (of the form that was adopted into GLSL 1.50
 * and GL 3.2) are available in this context
 */
-   const bool has_core_gs = _mesa_is_desktop_gl(ctx) && ctx->Version >= 32;
+   const bool has_core_gs = _mesa_has_geometry_shaders(ctx);
 
/* Are uniform buffer objects available in this context?
 */
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] mesa: remove useless gl_compute_program_state::Current

2015-05-28 Thread Marek Olšák
From: Marek Olšák 

This is for user assembly shaders only (not GLSL). We won't support those.
---
 src/mesa/main/mtypes.h | 2 --
 src/mesa/program/program.c | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 96ef060..47be72d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2314,8 +2314,6 @@ struct gl_fragment_program_state
  */
 struct gl_compute_program_state
 {
-   struct gl_compute_program *Current;  /**< user-bound compute program */
-
/** Currently enabled and valid program (including internal programs
 * and compiled shader programs).
 */
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index f0a47ac..1167adf 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -97,8 +97,6 @@ _mesa_init_program(struct gl_context *ctx)
assert(ctx->FragmentProgram.Current);
ctx->FragmentProgram.Cache = _mesa_new_program_cache();
 
-   _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
-
/* XXX probably move this stuff */
ctx->ATIFragmentShader.Enabled = GL_FALSE;
ctx->ATIFragmentShader.Current = ctx->Shared->DefaultFragmentShader;
@@ -117,7 +115,6 @@ _mesa_free_program_data(struct gl_context *ctx)
_mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache);
_mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
_mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache);
-   _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
 
/* XXX probably move this stuff */
if (ctx->ATIFragmentShader.Current) {
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] mesa: remove unused gl_config::colorIndexMode

2015-05-28 Thread Marek Olšák
From: Marek Olšák 

---
 src/mesa/main/mtypes.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 47be72d..1ecadfe 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -398,7 +398,6 @@ struct gl_config
 {
GLboolean rgbMode;
GLboolean floatMode;
-   GLboolean colorIndexMode;  /* XXX is this used anywhere? */
GLuint doubleBufferMode;
GLuint stereoMode;
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] mesa: remove unused geometry shader variables

2015-05-28 Thread Marek Olšák
From: Marek Olšák 

These states are for GS assembly shaders only. We don't support those.
---
 src/mesa/main/context.c| 1 -
 src/mesa/main/mtypes.h | 7 ---
 src/mesa/main/shared.c | 1 -
 src/mesa/program/program.c | 9 -
 4 files changed, 18 deletions(-)

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index e4faf3d..483e9b1 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1333,7 +1333,6 @@ _mesa_free_context_data( struct gl_context *ctx )
_mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
_mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL);
 
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
_mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
 
_mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8342517..96ef060 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2275,16 +2275,10 @@ struct gl_vertex_program_state
  */
 struct gl_geometry_program_state
 {
-   GLboolean Enabled;   /**< GL_ARB_GEOMETRY_SHADER4 */
-   GLboolean _Enabled;  /**< Enabled and valid program? */
-   struct gl_geometry_program *Current;  /**< user-bound geometry program */
-
/** Currently enabled and valid program (including internal programs
 * and compiled shader programs).
 */
struct gl_geometry_program *_Current;
-
-   GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
 };
 
 /**
@@ -3004,7 +2998,6 @@ struct gl_shared_state
struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */
struct gl_vertex_program *DefaultVertexProgram;
struct gl_fragment_program *DefaultFragmentProgram;
-   struct gl_geometry_program *DefaultGeometryProgram;
/*@}*/
 
/* GL_ATI_fragment_shader */
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index 0b76cc0..d5ac9f1 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -313,7 +313,6 @@ free_shared_state(struct gl_context *ctx, struct 
gl_shared_state *shared)
_mesa_DeleteHashTable(shared->Programs);
 
_mesa_reference_vertprog(ctx, &shared->DefaultVertexProgram, NULL);
-   _mesa_reference_geomprog(ctx, &shared->DefaultGeometryProgram, NULL);
_mesa_reference_fragprog(ctx, &shared->DefaultFragmentProgram, NULL);
 
_mesa_HashDeleteAll(shared->ATIShaders, delete_fragshader_cb, ctx);
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index fb61f4d..f0a47ac 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -97,11 +97,6 @@ _mesa_init_program(struct gl_context *ctx)
assert(ctx->FragmentProgram.Current);
ctx->FragmentProgram.Cache = _mesa_new_program_cache();
 
-   ctx->GeometryProgram.Enabled = GL_FALSE;
-   /* right now by default we don't have a geometry program */
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
-NULL);
-
_mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
 
/* XXX probably move this stuff */
@@ -122,7 +117,6 @@ _mesa_free_program_data(struct gl_context *ctx)
_mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache);
_mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
_mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache);
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
_mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
 
/* XXX probably move this stuff */
@@ -153,9 +147,6 @@ _mesa_update_default_objects_program(struct gl_context *ctx)
 ctx->Shared->DefaultFragmentProgram);
assert(ctx->FragmentProgram.Current);
 
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
-  ctx->Shared->DefaultGeometryProgram);
-
/* XXX probably move this stuff */
if (ctx->ATIFragmentShader.Current) {
   ctx->ATIFragmentShader.Current->RefCount--;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] mesa: use GL_GEOMETRY_PROGRAM_NV instead of MESA_GEOMETRY_PROGRAM

2015-05-28 Thread Marek Olšák
From: Marek Olšák 

There's no reason to use our own definition.
Tessellation will use the NV definitions too.
---
 src/mesa/drivers/dri/i965/brw_program.c |  2 +-
 src/mesa/main/glheader.h|  6 --
 src/mesa/main/state.c   |  2 +-
 src/mesa/program/prog_print.c   |  2 +-
 src/mesa/program/program.c  | 12 ++--
 src/mesa/state_tracker/st_atom_shader.c |  2 +-
 src/mesa/state_tracker/st_cb_program.c  |  8 
 src/mesa/state_tracker/st_program.c |  2 +-
 8 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index e5c0d3c..414eab9 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -88,7 +88,7 @@ static struct gl_program *brwNewProgram( struct gl_context 
*ctx,
 return NULL;
}
 
-   case MESA_GEOMETRY_PROGRAM: {
+   case GL_GEOMETRY_PROGRAM_NV: {
   struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
   if (prog) {
  prog->id = get_new_program_id(brw->intelScreen);
diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index 7f7f9a3..a2d98d4 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -135,12 +135,6 @@ typedef void *GLeglImageOES;
 #define GL_SHADER_PROGRAM_MESA 0x
 
 
-/**
- * Internal token for geometry programs.
- * Use the value for GL_GEOMETRY_PROGRAM_NV for now.
- */
-#define MESA_GEOMETRY_PROGRAM 0x8c26
-
 /* Several fields of struct gl_config can take these as values.  Since
  * GLX header files may not be available everywhere they need to be used,
  * redefine them here.
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 2657c53..5b97008 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -225,7 +225,7 @@ update_program(struct gl_context *ctx)
if (ctx->GeometryProgram._Current != prevGP) {
   new_state |= _NEW_PROGRAM;
   if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM,
+ ctx->Driver.BindProgram(ctx, GL_GEOMETRY_PROGRAM_NV,
 (struct gl_program *) 
ctx->GeometryProgram._Current);
   }
}
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index d588d07..3d5c6e9 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -864,7 +864,7 @@ _mesa_fprint_program_opt(FILE *f,
   else
  fprintf(f, "# Fragment Program/Shader %u\n", prog->Id);
   break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
   fprintf(f, "# Geometry Shader\n");
}
 
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 1167adf..c13e61b 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -328,7 +328,7 @@ _mesa_new_program(struct gl_context *ctx, GLenum target, 
GLuint id)
  CALLOC_STRUCT(gl_fragment_program),
  target, id );
   break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
   prog = _mesa_init_geometry_program(ctx,
  CALLOC_STRUCT(gl_geometry_program),
  target, id);
@@ -414,8 +414,8 @@ _mesa_reference_program_(struct gl_context *ctx,
   else if ((*ptr)->Target == GL_FRAGMENT_PROGRAM_ARB)
  assert(prog->Target == GL_FRAGMENT_PROGRAM_ARB ||
 prog->Target == GL_FRAGMENT_PROGRAM_NV);
-  else if ((*ptr)->Target == MESA_GEOMETRY_PROGRAM)
- assert(prog->Target == MESA_GEOMETRY_PROGRAM);
+  else if ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV)
+ assert(prog->Target == GL_GEOMETRY_PROGRAM_NV);
}
 #endif
 
@@ -427,7 +427,7 @@ _mesa_reference_program_(struct gl_context *ctx,
   printf("Program %p ID=%u Target=%s  Refcount-- to %d\n",
  *ptr, (*ptr)->Id,
  ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-  ((*ptr)->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")),
+  ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
  (*ptr)->RefCount - 1);
 #endif
   assert((*ptr)->RefCount > 0);
@@ -452,7 +452,7 @@ _mesa_reference_program_(struct gl_context *ctx,
   printf("Program %p ID=%u Target=%s  Refcount++ to %d\n",
  prog, prog->Id,
  (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-  (prog->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")),
+  (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
  prog->RefCount);
 #endif
   /*mtx_unlock(&prog->Mutex);*/
@@ -542,7 +542,7 @@ _mesa_clone_program(struct gl_context *ctx, const struct 
gl_program *prog)
  fpc->PixelCenterInteger = fp->PixelCenterInteger;
   }
   break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
   {
  

Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Matt Turner
On Wed, May 27, 2015 at 10:16 PM, Ben Widawsky
 wrote:
> AFAICT, there is no real way to make sure a send message with EOT is properly
> ignored from compact, nor can I see a way to actually encode EOT while
> compacting. Before the single send optimization we'd always bail because we 
> hit
> the is_immediate && !is_compactable_immediate case. However, with single send,
> is_immediate is not true, and so we end up trying to compact the 
> un-compactible.
>
> Without this, any compacting single send instruction will hang because the EOT
> isn't there. I am not sure how I didn't hit this when I originally enabled the
> optimization.  I didn't check if some surrounding code changed.
>
> NOTE: This needs another piglit run or two before merge.
>
> I know Neil and Matt were both looking into this. I did a quick search and
> didn't see any patches out there to handle this. Please ignore if this has
> already been sent by someone. (Direct me to it and I will review it).
>
> Cc: Matt Turner 
> Cc: Neil Roberts 
> Cc: Mark Janes 
> Signed-off-by: Ben Widawsky 

I haven't figured out precisely how EOT's getting lost yet, but Mark
says that with this patch we can actually complete a piglit run, so
let's do it.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Mark Janes
This patch allows us to enable our Braswell in our continuous
integration pool.  Without it, gpu hangs prevent piglit from
completing.

Getting Braswell enabled in the pool will help us prevent future
regressions on the platform.

Tested-by: Mark Janes 

Ben Widawsky  writes:

> AFAICT, there is no real way to make sure a send message with EOT is properly
> ignored from compact, nor can I see a way to actually encode EOT while
> compacting. Before the single send optimization we'd always bail because we 
> hit
> the is_immediate && !is_compactable_immediate case. However, with single send,
> is_immediate is not true, and so we end up trying to compact the 
> un-compactible.
>
> Without this, any compacting single send instruction will hang because the EOT
> isn't there. I am not sure how I didn't hit this when I originally enabled the
> optimization.  I didn't check if some surrounding code changed.
>
> NOTE: This needs another piglit run or two before merge.
>
> I know Neil and Matt were both looking into this. I did a quick search and
> didn't see any patches out there to handle this. Please ignore if this has
> already been sent by someone. (Direct me to it and I will review it).
>
> Cc: Matt Turner 
> Cc: Neil Roberts 
> Cc: Mark Janes 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 69cb114..67f0b45 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -849,6 +849,12 @@ set_3src_source_index(const struct brw_device_info 
> *devinfo,
>  static bool
>  has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
>  {
> +   /* EOT can only be mapped on a send if the src1 is an immediate */
> +   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
> +brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
> +   brw_inst_eot(devinfo, src))
> +  return true;
> +
> /* Check for instruction bits that don't map to any of the fields of the
>  * compacted instruction.  The instruction cannot be compacted if any of
>  * them are set.  They overlap with:
> -- 
> 2.4.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] PIPE_SHADER_* vs TGSI_PROCESSOR_*

2015-05-28 Thread Dave Airlie
On 29 May 2015 at 09:36, Marek Olšák  wrote:
> Hi,
>
> Would people be okay with removing the TGSI_PROCESSOR_ enums and
> replacing all usages with PIPE_SHADER_*?

As long as there are no subtle bugs due to VERTEX and FRAGMENT
switching positions between the two,

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] mesa: remove unused geometry shader variables

2015-05-28 Thread Dave Airlie
On 29 May 2015 at 09:57, Marek Olšák  wrote:
> From: Marek Olšák 

All seem fine,

For the series,

Reviewed-by: Dave Airlie 

>
> These states are for GS assembly shaders only. We don't support those.
> ---
>  src/mesa/main/context.c| 1 -
>  src/mesa/main/mtypes.h | 7 ---
>  src/mesa/main/shared.c | 1 -
>  src/mesa/program/program.c | 9 -
>  4 files changed, 18 deletions(-)
>
> diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
> index e4faf3d..483e9b1 100644
> --- a/src/mesa/main/context.c
> +++ b/src/mesa/main/context.c
> @@ -1333,7 +1333,6 @@ _mesa_free_context_data( struct gl_context *ctx )
> _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
> _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL);
>
> -   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
> _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
>
> _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 8342517..96ef060 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2275,16 +2275,10 @@ struct gl_vertex_program_state
>   */
>  struct gl_geometry_program_state
>  {
> -   GLboolean Enabled;   /**< GL_ARB_GEOMETRY_SHADER4 */
> -   GLboolean _Enabled;  /**< Enabled and valid program? */
> -   struct gl_geometry_program *Current;  /**< user-bound geometry program */
> -
> /** Currently enabled and valid program (including internal programs
>  * and compiled shader programs).
>  */
> struct gl_geometry_program *_Current;
> -
> -   GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
>  };
>
>  /**
> @@ -3004,7 +2998,6 @@ struct gl_shared_state
> struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */
> struct gl_vertex_program *DefaultVertexProgram;
> struct gl_fragment_program *DefaultFragmentProgram;
> -   struct gl_geometry_program *DefaultGeometryProgram;
> /*@}*/
>
> /* GL_ATI_fragment_shader */
> diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
> index 0b76cc0..d5ac9f1 100644
> --- a/src/mesa/main/shared.c
> +++ b/src/mesa/main/shared.c
> @@ -313,7 +313,6 @@ free_shared_state(struct gl_context *ctx, struct 
> gl_shared_state *shared)
> _mesa_DeleteHashTable(shared->Programs);
>
> _mesa_reference_vertprog(ctx, &shared->DefaultVertexProgram, NULL);
> -   _mesa_reference_geomprog(ctx, &shared->DefaultGeometryProgram, NULL);
> _mesa_reference_fragprog(ctx, &shared->DefaultFragmentProgram, NULL);
>
> _mesa_HashDeleteAll(shared->ATIShaders, delete_fragshader_cb, ctx);
> diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
> index fb61f4d..f0a47ac 100644
> --- a/src/mesa/program/program.c
> +++ b/src/mesa/program/program.c
> @@ -97,11 +97,6 @@ _mesa_init_program(struct gl_context *ctx)
> assert(ctx->FragmentProgram.Current);
> ctx->FragmentProgram.Cache = _mesa_new_program_cache();
>
> -   ctx->GeometryProgram.Enabled = GL_FALSE;
> -   /* right now by default we don't have a geometry program */
> -   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
> -NULL);
> -
> _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
>
> /* XXX probably move this stuff */
> @@ -122,7 +117,6 @@ _mesa_free_program_data(struct gl_context *ctx)
> _mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache);
> _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
> _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache);
> -   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
> _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
>
> /* XXX probably move this stuff */
> @@ -153,9 +147,6 @@ _mesa_update_default_objects_program(struct gl_context 
> *ctx)
>  ctx->Shared->DefaultFragmentProgram);
> assert(ctx->FragmentProgram.Current);
>
> -   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
> -  ctx->Shared->DefaultGeometryProgram);
> -
> /* XXX probably move this stuff */
> if (ctx->ATIFragmentShader.Current) {
>ctx->ATIFragmentShader.Current->RefCount--;
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable compaction for EOT send messages

2015-05-28 Thread Ben Widawsky
On Thu, May 28, 2015 at 04:58:23PM -0700, Matt Turner wrote:
> On Wed, May 27, 2015 at 10:16 PM, Ben Widawsky
>  wrote:
> > AFAICT, there is no real way to make sure a send message with EOT is 
> > properly
> > ignored from compact, nor can I see a way to actually encode EOT while
> > compacting. Before the single send optimization we'd always bail because we 
> > hit
> > the is_immediate && !is_compactable_immediate case. However, with single 
> > send,
> > is_immediate is not true, and so we end up trying to compact the 
> > un-compactible.
> >
> > Without this, any compacting single send instruction will hang because the 
> > EOT
> > isn't there. I am not sure how I didn't hit this when I originally enabled 
> > the
> > optimization.  I didn't check if some surrounding code changed.
> >
> > NOTE: This needs another piglit run or two before merge.
> >
> > I know Neil and Matt were both looking into this. I did a quick search and
> > didn't see any patches out there to handle this. Please ignore if this has
> > already been sent by someone. (Direct me to it and I will review it).
> >
> > Cc: Matt Turner 
> > Cc: Neil Roberts 
> > Cc: Mark Janes 
> > Signed-off-by: Ben Widawsky 
> 
> I haven't figured out precisely how EOT's getting lost yet, but Mark
> says that with this patch we can actually complete a piglit run, so
> let's do it.
> 
> Reviewed-by: Matt Turner 

Just a reminder... I gave you the quick explanation of what's going on in the
office an hour ago. Perhaps you can take a look and see if you can find a better
fix before I push. I am in no huge hurry.

Also, I should probably cc stable this for 10.6

-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC][PATCH] gallivm: Avoid conflict with LLVM's definition of DEBUG

2015-05-28 Thread Michel Dänzer
On 28.05.2015 18:39, Jose Fonseca wrote:
> Hi Michel,
> 
> I'm sorry: I hadn't notice your review-request, and just pushed a
> similar fix.

No worries, though maybe this is an indication that it wasn't all that
trivial after all. :)


> I used push/pop_macro pragma instead. I think it's portable enough (at
> least gcc and msvc support it).  Your's is probable more portable
> though. I'm OK either way.

I'm fine with your change, I didn't know about those pragmas before.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] softpipe: fix offset wrapping calculations (v2)

2015-05-28 Thread Roland Scheidegger
Am 29.05.2015 um 01:25 schrieb Dave Airlie:
> Roland pointed out my previous attempt was lacking, so I enhanced the
> texwrap piglit test, and tested them. This fixes the offset calculations
> in a number of areas by adding the offset first, it also fixes the fastpaths,
> which I forgot to address in the previous commit.
> 
> v2: try and avoid divides in most paths, the repeat mirror path
> really was ugly no matter which way I went, so I left it having
> the divide.

Yes, I don't think there's a good way to do it for repeat mirror.
llvmpipe also does a divide there (actually, only for nearest, the
linear path doesn't, looks wrong to me but there probably weren't any
tests for it). Who the hell uses repeat mirror with offsets anyway :-).

There's actually some more potential for some small simplifications in
that sample code (not really related to offsets), it's probably my fault
- I simplified these things in llvmpipe at some point but never did for
softpipe (which is mostly why the actual arithmetic is different for the
llvmpipe soa sampling path vs. softpipe). I might tackle it one day...

Thanks!
Reviewed-by: Roland Scheidegger 

> Also fix the gather lod calculation bug.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 146 
> +--
>  1 file changed, 68 insertions(+), 78 deletions(-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index 4ac3498..1010b63 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -145,14 +145,14 @@ wrap_nearest_clamp(float s, unsigned size, int offset, 
> int *icoord)
>  {
> /* s limited to [0,1] */
> /* i limited to [0,size-1] */
> +   s *= size;
> +   s += offset;
> if (s <= 0.0F)
>*icoord = 0;
> -   else if (s >= 1.0F)
> +   else if (s >= size)
>*icoord = size - 1;
> else
> -  *icoord = util_ifloor(s * size);
> -   if (offset)
> -  *icoord = CLAMP(*icoord + offset, 0, size - 1);
> +  *icoord = util_ifloor(s);
>  }
>  
>  
> @@ -161,17 +161,18 @@ wrap_nearest_clamp_to_edge(float s, unsigned size, int 
> offset, int *icoord)
>  {
> /* s limited to [min,max] */
> /* i limited to [0, size-1] */
> -   const float min = 1.0F / (2.0F * size);
> -   const float max = 1.0F - min;
> +   const float min = 0.5F;
> +   const float max = (float)size - 0.5F;
> +
> +   s *= size;
> +   s += offset;
>  
> if (s < min)
>*icoord = 0;
> else if (s > max)
>*icoord = size - 1;
> else
> -  *icoord = util_ifloor(s * size);
> -   if (offset)
> -  *icoord = CLAMP(*icoord + offset, 0, size - 1);
> +  *icoord = util_ifloor(s);
>  }
>  
>  
> @@ -180,26 +181,30 @@ wrap_nearest_clamp_to_border(float s, unsigned size, 
> int offset, int *icoord)
>  {
> /* s limited to [min,max] */
> /* i limited to [-1, size] */
> -   const float min = -1.0F / (2.0F * size);
> -   const float max = 1.0F - min;
> +   const float min = -0.5F;
> +   const float max = size + 0.5F;
> +
> +   s *= size;
> +   s += offset;
> if (s <= min)
>*icoord = -1;
> else if (s >= max)
>*icoord = size;
> else
> -  *icoord = util_ifloor(s * size);
> -   if (offset)
> -  *icoord = CLAMP(*icoord + offset, 0, size - 1);
> +  *icoord = util_ifloor(s);
>  }
>  
> -
>  static void
>  wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
>  {
> const float min = 1.0F / (2.0F * size);
> const float max = 1.0F - min;
> -   const int flr = util_ifloor(s);
> -   float u = frac(s);
> +   int flr;
> +   float u;
> +
> +   s += (float)offset / size;
> +   flr = util_ifloor(s);
> +   u = frac(s);
> if (flr & 1)
>u = 1.0F - u;
> if (u < min)
> @@ -208,8 +213,6 @@ wrap_nearest_mirror_repeat(float s, unsigned size, int 
> offset, int *icoord)
>*icoord = size - 1;
> else
>*icoord = util_ifloor(u * size);
> -   if (offset)
> -  *icoord = CLAMP(*icoord + offset, 0, size - 1);
>  }
>  
>  
> @@ -218,15 +221,13 @@ wrap_nearest_mirror_clamp(float s, unsigned size, int 
> offset, int *icoord)
>  {
> /* s limited to [0,1] */
> /* i limited to [0,size-1] */
> -   const float u = fabsf(s);
> +   const float u = fabsf(s * size + offset);
> if (u <= 0.0F)
>*icoord = 0;
> -   else if (u >= 1.0F)
> +   else if (u >= size)
>*icoord = size - 1;
> else
> -  *icoord = util_ifloor(u * size);
> -   if (offset)
> -  *icoord = CLAMP(*icoord + offset, 0, size - 1);
> +  *icoord = util_ifloor(u);
>  }
>  
>  
> @@ -235,36 +236,33 @@ wrap_nearest_mirror_clamp_to_edge(float s, unsigned 
> size, int offset, int *icoor
>  {
> /* s limited to [min,max] */
> /* i limited to [0, size-1] */
> -   const float min = 1.0F / (2.0F * size);
> -   const float max = 1.0F - min;
> -   const float u = fabsf(s);
> +   const float min = 0.5F;
> +   con

[Mesa-dev] [PATCH] gallivm: make sampling more robust when the sampler setup is bogus

2015-05-28 Thread sroland
From: Roland Scheidegger 

Pure integer formats cannot be sampled with linear tex / mip filters. In GL
such a setup would make the texture incomplete.
We shouldn't rely on the state tracker though to filter that out, just return
all zeros instead of dying in the lerp.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 36 +++
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 1a60ca9..b7dcb22 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -2748,11 +2748,37 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
else {
   LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
   LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
-  boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
-op_is_tex &&
-/* not sure this is strictly needed or simply 
impossible */
-derived_sampler_state.compare_mode == 
PIPE_TEX_COMPARE_NONE &&
-lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
+  boolean use_aos;
+
+  if (util_format_is_pure_integer(static_texture_state->format) &&
+  !util_format_has_depth(bld.format_desc) &&
+  (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
+   static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
+   static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
+ /*
+  * Bail if impossible filtering is specified (the awkard additional
+  * depth check is because it is legal in gallium to have things like 
S8Z24
+  * here which would say it's pure int despite such formats should 
sample
+  * the depth component).
+  * In GL such filters make the texture incomplete, this makes it 
robust
+  * against state trackers which set this up regardless (we'd crash in 
the
+  * lerp later (except for gather)).
+  * Must do this after fetch_texel code since with GL state tracker 
we'll
+  * get some junk sampler for buffer textures.
+  */
+ unsigned chan;
+ LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
+ for (chan = 0; chan < 4; chan++) {
+texel_out[chan] = zero;
+ }
+ return;
+  }
+
+  use_aos = util_format_fits_8unorm(bld.format_desc) &&
+op_is_tex &&
+/* not sure this is strictly needed or simply impossible */
+derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
+lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
 
   use_aos &= bld.num_lods <= num_quads ||
  derived_sampler_state.min_img_filter ==
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-28 Thread Pohjolainen, Topi
On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote:
> I think pretty much everyone agrees that having more than a single bool as a
> function argument is bordering on a bad idea. What sucks about the current
> code is in several instances it's necessary to propagate these boolean
> selections down to lower layers of the code. This requires plumbing 
> (mechanical,
> but still churn) pretty much all of the miptree functions each time.  By
> introducing the flags paramater, it is possible to add miptree constraints 
> very
> easily.

I like this a lot. I have a few simple comments below.

> 
> The use of this, as is already the case, is sometimes we have some information
> at the time we create the miptree that needs to be known all the way at the
> lowest levels of the create/allocation, disable_aux_buffers is currently one
> such example. There will be another example coming up in a few patches.
> 
> Cc: Chad Versace 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 
> +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
>  src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
>  src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
>  src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
>  src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
>  src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
>  8 files changed, 63 insertions(+), 66 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> b/src/mesa/drivers/dri/i965/intel_fbo.c
> index aebed72..1b3a72f 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct gl_context 
> *ctx,
>   image->height,
>   1,
>   image->pitch,
> - true /*disable_aux_buffers*/);
> + MIPTREE_LAYOUT_DISABLE_AUX);
> if (!irb->mt)
>return;
>  
> @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
> *brw,
>   intel_image->base.Base.Level,
>   intel_image->base.Base.Level,
>   width, height, depth,
> - true,
>   irb->mt->num_samples,
>   INTEL_MIPTREE_TILING_ANY,
> - false);
> + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
>  
> if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
>intel_miptree_alloc_hiz(brw, new_mt);
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 24a5c3d..b243f8a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
>  GLuint width0,
>  GLuint height0,
>  GLuint depth0,
> -bool for_bo,
>  GLuint num_samples,
> -bool force_all_slices_at_each_lod,
> -bool disable_aux_buffers)
> +uint32_t layout_flags)
>  {
> struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
> if (!mt)
> @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
> mt->logical_height0 = height0;
> mt->logical_depth0 = depth0;
> mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
> -   mt->disable_aux_buffers = disable_aux_buffers;
> +   mt->disable_aux_buffers = !!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX);

You didn't mean to have double negation (!!), did you?

> exec_list_make_empty(&mt->hiz_map);
>  
> /* The cpp is bytes per (1, blockheight)-sized block for compressed
> @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
> mt->physical_height0 = height0;
> mt->physical_depth0 = depth0;
>  
> -   if (!for_bo &&
> +   if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
> _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
> (brw->must_use_separate_stencil ||
>   (brw->has_separate_stencil &&
>   intel_miptree_wants_hiz_buffer(brw, mt {
> -  const bool force_all_slices_at_each_lod = brw->gen == 6;
> +  uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
> +  if (brw->gen == 6)
> + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;

Perhaps a separating line here, your choice.

>mt->stencil_mt = intel_miptree_create(brw,
>  

Re: [Mesa-dev] [PATCH 1/6] i965: Consolidate certain miptree params to flags

2015-05-28 Thread Pohjolainen, Topi
On Fri, May 29, 2015 at 09:32:53AM +0300, Pohjolainen, Topi wrote:
> On Thu, May 28, 2015 at 10:21:29AM -0700, Ben Widawsky wrote:
> > I think pretty much everyone agrees that having more than a single bool as a
> > function argument is bordering on a bad idea. What sucks about the current
> > code is in several instances it's necessary to propagate these boolean
> > selections down to lower layers of the code. This requires plumbing 
> > (mechanical,
> > but still churn) pretty much all of the miptree functions each time.  By
> > introducing the flags paramater, it is possible to add miptree constraints 
> > very
> > easily.
> 
> I like this a lot. I have a few simple comments below.
> 
> > 
> > The use of this, as is already the case, is sometimes we have some 
> > information
> > at the time we create the miptree that needs to be known all the way at the
> > lowest levels of the create/allocation, disable_aux_buffers is currently one
> > such example. There will be another example coming up in a few patches.
> > 
> > Cc: Chad Versace 
> > Signed-off-by: Ben Widawsky 
> > ---
> >  src/mesa/drivers/dri/i965/intel_fbo.c  |  5 +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 86 
> > +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  9 ++-
> >  src/mesa/drivers/dri/i965/intel_pixel_draw.c   |  2 +-
> >  src/mesa/drivers/dri/i965/intel_tex.c  |  8 +--
> >  src/mesa/drivers/dri/i965/intel_tex.h  |  2 +-
> >  src/mesa/drivers/dri/i965/intel_tex_image.c| 14 ++---
> >  src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 +-
> >  8 files changed, 63 insertions(+), 66 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> > b/src/mesa/drivers/dri/i965/intel_fbo.c
> > index aebed72..1b3a72f 100644
> > --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> > +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> > @@ -390,7 +390,7 @@ intel_image_target_renderbuffer_storage(struct 
> > gl_context *ctx,
> >   image->height,
> >   1,
> >   image->pitch,
> > - true /*disable_aux_buffers*/);
> > + MIPTREE_LAYOUT_DISABLE_AUX);
> > if (!irb->mt)
> >return;
> >  
> > @@ -1027,10 +1027,9 @@ intel_renderbuffer_move_to_temp(struct brw_context 
> > *brw,
> >   intel_image->base.Base.Level,
> >   intel_image->base.Base.Level,
> >   width, height, depth,
> > - true,
> >   irb->mt->num_samples,
> >   INTEL_MIPTREE_TILING_ANY,
> > - false);
> > + MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
> >  
> > if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
> >intel_miptree_alloc_hiz(brw, new_mt);
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 24a5c3d..b243f8a 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -244,10 +244,8 @@ intel_miptree_create_layout(struct brw_context *brw,
> >  GLuint width0,
> >  GLuint height0,
> >  GLuint depth0,
> > -bool for_bo,
> >  GLuint num_samples,
> > -bool force_all_slices_at_each_lod,
> > -bool disable_aux_buffers)
> > +uint32_t layout_flags)
> >  {
> > struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
> > if (!mt)
> > @@ -286,7 +284,7 @@ intel_miptree_create_layout(struct brw_context *brw,
> > mt->logical_height0 = height0;
> > mt->logical_depth0 = depth0;
> > mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
> > -   mt->disable_aux_buffers = disable_aux_buffers;
> > +   mt->disable_aux_buffers = !!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX);
> 
> You didn't mean to have double negation (!!), did you?

I actually meant that isn't "layout_flags & MIPTREE_LAYOUT_DISABLE_AUX" enough
on its own?

> 
> > exec_list_make_empty(&mt->hiz_map);
> >  
> > /* The cpp is bytes per (1, blockheight)-sized block for compressed
> > @@ -422,12 +420,14 @@ intel_miptree_create_layout(struct brw_context *brw,
> > mt->physical_height0 = height0;
> > mt->physical_depth0 = depth0;
> >  
> > -   if (!for_bo &&
> > +   if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
> > _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
> > (brw->must_use_separate_stencil ||
> > (brw->has_separate_stencil &&
> >   intel_miptree_wants_hiz_buffer(brw, mt {
> > - 

Re: [Mesa-dev] [PATCH 4/6] i965/gen8: Correct HALIGN for AUX surfaces

2015-05-28 Thread Pohjolainen, Topi
On Thu, May 28, 2015 at 10:21:32AM -0700, Ben Widawsky wrote:
> This restriction was attempted in this commit:
> commit 47053464630888f819ef8cc44278f1a1220159b9
> Author: Anuj Phogat 
> Date:   Fri Feb 13 11:21:21 2015 -0800
> 
>i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT
> 
> However, the commit itself doesn't achieve the desired goal as determined by 
> the
> asserts which the next patch adds. mcs_mt is never a value because we're in 
> the
> process of allocating the mcs_mt miptree when we get to this function. I 
> didn't
> check, but perhaps this would work with blorp, however, meta clears allocate 
> the
> miptree structure (which AFAICT needs the alignment also) way before it
> allocates using meta clears where the renderbuffer is allocated way before the
> aux buffer.
> 
> The restriction is referenced in a few places, but the most concise one [IMO]
> from the spec is for Gen9. Gen8 8 loosens the restriction in that it only
> requires this for non-msrt surface.
> 
>When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 
> must
>be used.
> 
> With the code before the miptree layout flag rework (patches preceding this),
> accomplishing this workaround is very difficult.
> 
> Cc: Anuj Phogat 
> Cc: Neil Roberts 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_tex_layout.c| 16 ++--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  4 +++-
>  3 files changed, 25 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
> b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> index 72b02a2..b51c7c7 100644
> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
> @@ -41,8 +41,13 @@
>  
>  static unsigned int
>  intel_horizontal_texture_alignment_unit(struct brw_context *brw,
> -struct intel_mipmap_tree *mt)
> +struct intel_mipmap_tree *mt,
> +uint32_t layout_flags)
>  {
> +

Extra newline.

> +   if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16)
> +  return 16;
> +
> /**
>  * From the "Alignment Unit Size" section of various specs, namely:
>  * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
> @@ -91,9 +96,6 @@ intel_horizontal_texture_alignment_unit(struct brw_context 
> *brw,
> if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
>return 8;
>  
> -   if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
> -  return 16;
> -
> return 4;
>  }
>  
> @@ -459,7 +461,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
>  }
>  
>  void
> -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
> +brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt,
> +   uint32_t layout_flags)
>  {
> bool multisampled = mt->num_samples > 1;
> bool gen6_hiz_or_stencil = false;
> @@ -492,8 +495,9 @@ brw_miptree_layout(struct brw_context *brw, struct 
> intel_mipmap_tree *mt)
>   mt->align_w = 128 / mt->cpp;
>   mt->align_h = 32;
>}
> +  assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
> } else {
> -  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
> +  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt, 
> layout_flags);
>mt->align_h =
>   intel_vertical_texture_alignment_unit(brw, mt->format, 
> multisampled);
> }
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 75ee19a..a1ac0cf 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -475,7 +475,10 @@ intel_miptree_create_layout(struct brw_context *brw,
> if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
>mt->array_layout = ALL_SLICES_AT_EACH_LOD;
>  
> -   brw_miptree_layout(brw, mt);
> +   if (intel_miptree_is_fast_clear_capable(brw, mt))
> +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
> +
> +   brw_miptree_layout(brw, mt, layout_flags);
>  
> if (mt->disable_aux_buffers)
>assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
> @@ -722,6 +725,7 @@ intel_miptree_create(struct brw_context *brw,
>  
>  
> if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
> +  assert(mt->num_samples > 1);
>if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
>   intel_miptree_release(&mt);
>   return NULL;
> @@ -734,8 +738,10 @@ intel_miptree_create(struct brw_context *brw,
>  * clear actually occurs.
>  */
> if (intel_is_non_msrt_mcs_tile_supported(brw, mt->tiling) &&
> -   intel_miptree_is_fast_clear_capable(brw, mt))
> +   intel_miptree_is_fast_clear_capable(brw, mt)) {
>mt->fast_clear_state = INTEL_FAS

Re: [Mesa-dev] [PATCH 5/6] i965/gen9: Set HALIGN_16 for all aux buffers

2015-05-28 Thread Pohjolainen, Topi
On Thu, May 28, 2015 at 10:21:33AM -0700, Ben Widawsky wrote:
> Just like the previous patch, but for the GEN9 constraints.
> 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index a1ac0cf..89030aa 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -655,6 +655,11 @@ intel_miptree_create(struct brw_context *brw,
>  
> assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0);
> assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
> +
> +   if (brw->gen >= 9 && num_samples > 1)
> +  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
> +
> +

Extra newline.

> mt = intel_miptree_create_layout(brw, target, format,
>  first_level, last_level, width0,
>  height0, depth0, num_samples,
> -- 
> 2.4.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev