Re: [Mesa-dev] Statically linking libstdc++ and libgcc

2015-03-11 Thread Vivek Dasmohapatra

Here's a version of the mesa build patches rolled into one patch,
and driven by a configure argument --enable-static-libstdc++
which defaults to being off.From 2e967e89fefc2a107c29c6581c9885475a7b7a84 Mon Sep 17 00:00:00 2001
From: vivek 
Date: Thu, 12 Mar 2015 01:30:19 +
Subject: [PATCH 1/2] Allow static libstdc++/libgcc linking if selected

---
 configure.ac| 46 +
 src/gallium/Automake.inc|  1 +
 src/gallium/targets/dri/Makefile.am |  5 
 3 files changed, 52 insertions(+)

diff --git a/configure.ac b/configure.ac
index 90c7737..7aa695a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -705,6 +705,34 @@ AC_ARG_ENABLE([dri],
 [enable_dri="$enableval"],
 [enable_dri=yes])
 
+AC_ARG_ENABLE([static-libstdc++],
+[AS_HELP_STRING([--enable-static-libstdc++],
+[Statically link libstdc++/libgcc @<:@default=disabled@:>@])],
+[enable_static_libstdc__="$enableval"],
+[enable_static_libstdc__=no])
+
+dnl Strip out unnecessary dynamic linking in of libstdc++ and libgcc_s for
+dnl DRI modules: they cause problems when loaded by games linked against
+dnl a steam runtime with a different libgcc or libstdc++ version:
+if test x$enable_static_libstdc__ != xno;
+then
+if test x$enable_dri != xno;
+then
+AC_MSG_NOTICE([Cleanup libtool C++ postdeps: $postdeps_CXX (enable_dri=$enable_dri)])
+tmppdcxx=;
+for x in ${postdeps_CXX};
+do
+case $x in
+-lstdc++) true; ;;
+-lgcc_s) true; ;;
+*) tmppdcxx=${tmppdcxx}${tmppdcxx:+ }$x; ;;
+esac;
+done;
+postdeps_CXX="${tmppdcxx}";
+AC_MSG_NOTICE([Cleaned libtool C++ postdeps: $postdeps_CXX])
+fi;
+fi
+
 case "$host_os" in
 linux*)
 dri3_default=yes
@@ -2189,6 +2217,23 @@ if test "x$MESA_LLVM" != x0; then
 if test "x$llvm_have_one_so" = xyes; then
 dnl LLVM was built using auto*, so there is only one shared object.
 LLVM_LIBS="-l$LLVM_SO_NAME"
+if test x$enable_static_libstdc__ != xno;
+then
+if test x$enable_llvm_shared_libs != xno;
+then
+dnl If we want static libstdc++ linking and we asked for llvm shlib
+dnl then find a libLLVM-X.Y variant that doesn't need -lstdc++ -lgcc_s
+dnl Also: Make sure we don't ask for libstdc++ or libgcc_s explicitly:
+llvm_so_otherlibdeps=$($LLVM_CONFIG --ldflags)
+llvm_so_otherlibdeps=$(echo $llvm_so_otherlibdeps | sed -re 's@-lstdc\+\+|-lgcc_s@@g')
+AC_MSG_CHECKING([for a stdlib-independent lib$LLVM_SO_NAME])
+AC_CHECK_LIB([$LLVM_SO_NAME-nostdlib],
+ [LLVMInitializeCore],
+ [LLVM_LIBS="-l$LLVM_SO_NAME-nostdlib"],
+ [AC_MSG_ERROR([No lib$LLVM_SO_NAME with static libstdc++ linkage found])],
+ [$llvm_so_otherlibdeps])
+fi;
+fi;
 else
 dnl If LLVM was built with CMake, there will be one shared object per
 dnl component.
@@ -2450,6 +2495,7 @@ echo "prefix:  $prefix"
 echo "exec_prefix: $exec_prefix"
 echo "libdir:  $libdir"
 echo "includedir:  $includedir"
+echo "postdeps_CXX:$postdeps_CXX"
 
 dnl API info
 echo ""
diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index 95aae50..6de79c1 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -46,6 +46,7 @@ GALLIUM_TARGET_CFLAGS = \
 
 GALLIUM_COMMON_LIB_DEPS = \
 	-lm \
+ 	-l:libgcc.a -l:libgcc_eh.a -l:libstdc++.a \
 	$(CLOCK_LIB) \
 	$(PTHREAD_LIBS) \
 	$(DLOPEN_LIBS)
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index aaeb950..5185273 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -27,6 +27,11 @@ gallium_dri_la_LDFLAGS = \
 	-shrext .so \
 	-module \
 	-avoid-version \
+	-static-libgcc \
+	-static-libstdc++ \
+	-l:libgcc.a \
+	-l:libstdc++.a \
+	-Wl,--exclude-libs -Wl,libgcc.a:libstdc++.a \
 	$(GC_SECTIONS)
 
 if HAVE_LD_VERSION_SCRIPT
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: Expose built-in packing functions under GLSL 4.2.

2015-03-11 Thread Carl Worth
> ARB_shading_language_packing is part of GLSL 4.2, not 4.0 as I
> mistakenly believed. The following functions are available only with
> ARB_shading_language_packing, GLSL 4.2 (not GLSL 4.0), or ES 3.0:

I'll trust you that you're correct on the specification version, so:

Reviewed-by: Carl Worth 

(for both patches).

-Carl


pgpATr8uN7x4W.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] i965: Implement NIR intrinsics for loading VS system values.

2015-03-11 Thread Jason Ekstrand
On Wed, Mar 11, 2015 at 5:13 PM, Kenneth Graunke 
wrote:

> On Wednesday, March 11, 2015 03:33:24 PM Jason Ekstrand wrote:
> > I'm not terribly happy with how this worked out, I'm not going to NAK it
> as
> > I think it's the best we can do at the moment.
> >
> > The reason why Connor (and others) have chosen to emit these things at
> the
> > top of the shader is because they frequently require some computation and
> > we don't want to duplicate that if we don't have to.  However, it also
> > leads to variables with very long live ranges which we don't want either.
> > Once we have GVN, we can value-number system value intrinsics.  This will
> > ensure that we only do the (potentially expensive) caluculation once and
> > GCM will ensure that they land as far down the program as we can put
> them.
> > However, until we have GVN, this seems like as good as we're going to get
> > at the moment.
> > --Jason
>
> I think this code is fine.
>
> The built-in variables handled here are gl_VertexID and gl_InstanceID.
> Here, gl_VertexID = gl_VertexIDMESA (0-based) + gl_BaseVertex.
>
> All three values show up as an incoming vec4 vertex attribute, supplied
> by the vertex fetcher like any other vertex data.  This means that they
> inherently occupy registers starting at the beginning of the program.
>
> The only computation we could delay is the ADD for gl_VertexID.
> However, in SIMD8 VS, it appears that gl_VertexIDMESA and gl_BaseVertex
> occupy separate registers.  So doing the ADD up front actually makes 2
> GRFs available, at the cost of 1 GRF.  It's actually better up front.
>
> For the FS system values - gl_SampleID and the like - you have a point.
> The inputs to the calculation tend to come from g0/g1, which usually
> live for the whole shader anyway.  So we can compute the value whenever
> we like - moving it closer to the use would reduce live ranges, and
> potentially put it inside control flow, so it might be skipped
> altogether on some invocations.
>
> That's just not the case for the VS system values.
>

Sure, and I'm not picky about where the math happens.  I just don't like
this two-step emit process where we put a bunch of stuff at the top of the
shader and then mov's down below.  I'd rather we just put the system value
intrinsic at the top where we want it and emit it when we hit it in the
emit pass.  I'm not super-picky about it but that just seems to make mores
sense to me.
--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] docs: List ARB_shading_language_packing/EXT_shader_integer_mix.

2015-03-11 Thread Matt Turner
---
 docs/GL3.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 43bbf85..289dd36 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -142,6 +142,7 @@ GL 4.2, GLSL 4.20:
   GL_ARB_shader_image_load_store   in progress (curro)
   GL_ARB_conservative_depthDONE (all drivers that 
support GLSL 1.30)
   GL_ARB_shading_language_420pack  DONE (all drivers that 
support GLSL 1.30)
+  GL_ARB_shading_language_packing  DONE (all drivers)
   GL_ARB_internalformat_query  DONE (i965, nv50, nvc0, 
r300, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_map_buffer_alignment  DONE (all drivers)
 
@@ -207,6 +208,7 @@ GL 4.5, GLSL 4.50:
   GL_KHR_context_flush_control DONE (all - but needs 
GLX/EXT extension to be useful)
   GL_KHR_robust_buffer_access_behavior not started
   GL_KHR_robustness90% done (the ARB 
variant)
+  GL_EXT_shader_integer_mixDONE (all drivers that 
support GLSL)
 
 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
@@ -219,6 +221,7 @@ GLES3.1, GLSL ES 3.1
   GL_ARB_shader_atomic_countersDONE (i965)
   GL_ARB_shader_image_load_store   in progress (curro)
   GL_ARB_shader_storage_buffer_object  not started
+  GL_ARB_shading_language_packing  DONE (all drivers)
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_vertex_attrib_binding DONE (all drivers)
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] glsl: Expose built-in packing functions under GLSL 4.2.

2015-03-11 Thread Matt Turner
ARB_shading_language_packing is part of GLSL 4.2, not 4.0 as I
mistakenly believed. The following functions are available only with
ARB_shading_language_packing, GLSL 4.2 (not GLSL 4.0), or ES 3.0:

   - packSnorm2x16
   - unpackSnorm2x16
   - packHalf2x16
   - unpackHalf2x16
---
 src/glsl/builtin_functions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 84bbdc2..c607572 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -201,7 +201,7 @@ static bool
 shader_packing_or_es3(const _mesa_glsl_parse_state *state)
 {
return state->ARB_shading_language_packing_enable ||
-  state->is_version(400, 300);
+  state->is_version(420, 300);
 }
 
 static bool
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] tnl: HAVE_LE32_VERTS is never defined, remove associated code

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 6:29 PM, Brian Paul  wrote:
> ---
>  src/mesa/tnl_dd/t_dd_triemit.h | 7 ---
>  1 file changed, 7 deletions(-)
>
> diff --git a/src/mesa/tnl_dd/t_dd_triemit.h b/src/mesa/tnl_dd/t_dd_triemit.h
> index 39c9d26..082e83f 100644
> --- a/src/mesa/tnl_dd/t_dd_triemit.h
> +++ b/src/mesa/tnl_dd/t_dd_triemit.h
> @@ -16,13 +16,6 @@ do {   
>   \
> "D" ((long)vb), \
> "S" ((long)v) );\
>  } while (0)
> -#elif defined(HAVE_LE32_VERTS)
> -#define COPY_DWORDS( j, vb, vertsize, v )  \
> -do {   \
> -   for ( j = 0 ; j < vertsize ; j++ )  \
> -  vb[j] = CPU_TO_LE32(((GLuint *)v)[j]);   \
> -   vb += vertsize; \
> -} while (0)
>  #else
>  #define COPY_DWORDS( j, vb, vertsize, v )  \
>  do {   \
> --

Dead since the removal of the r128 driver!

Both are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: move LONGSTRING into generated enums.c

2015-03-11 Thread Brian Paul
enums.c is the only place this directive is needed.
---
 src/mapi/glapi/gen/gl_enums.py | 6 ++
 src/mesa/main/compiler.h   | 9 -
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_enums.py b/src/mapi/glapi/gen/gl_enums.py
index d61618f..f45782d 100644
--- a/src/mapi/glapi/gen/gl_enums.py
+++ b/src/mapi/glapi/gen/gl_enums.py
@@ -157,6 +157,12 @@ _mesa_lookup_prim_by_nr(GLuint nr)
 
 string_offsets = {}
 i = 0;
+print '#if defined(__GNUC__)'
+print '# define LONGSTRING __extension__'
+print '#else'
+print '# define LONGSTRING'
+print '#endif'
+print ''
 print 'LONGSTRING static const char enum_string_table[] = '
 for enum, name in enum_table:
 print '   "%s\\0"' % (name)
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index b9f808e..5c60391 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -113,15 +113,6 @@ extern "C" {
 #define LE32_TO_CPU( x )   CPU_TO_LE32( x )
 
 
-/**
- * LONGSTRING macro
- * gcc -pedantic warns about long string literals, LONGSTRING silences that.
- */
-#if !defined(__GNUC__)
-# define LONGSTRING
-#else
-# define LONGSTRING __extension__
-#endif
 
 #define IEEE_ONE 0x3f80
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] tnl: HAVE_LE32_VERTS is never defined, remove associated code

2015-03-11 Thread Brian Paul
---
 src/mesa/tnl_dd/t_dd_triemit.h | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/src/mesa/tnl_dd/t_dd_triemit.h b/src/mesa/tnl_dd/t_dd_triemit.h
index 39c9d26..082e83f 100644
--- a/src/mesa/tnl_dd/t_dd_triemit.h
+++ b/src/mesa/tnl_dd/t_dd_triemit.h
@@ -16,13 +16,6 @@ do { 
\
"D" ((long)vb), \
"S" ((long)v) );\
 } while (0)
-#elif defined(HAVE_LE32_VERTS)
-#define COPY_DWORDS( j, vb, vertsize, v )  \
-do {   \
-   for ( j = 0 ; j < vertsize ; j++ )  \
-  vb[j] = CPU_TO_LE32(((GLuint *)v)[j]);   \
-   vb += vertsize; \
-} while (0)
 #else
 #define COPY_DWORDS( j, vb, vertsize, v )  \
 do {   \
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GL/GLSL tests for GL 4.0 and newer

2015-03-11 Thread Marek Olšák
ARB_tessellation_shader

We have some tests, but it's not enough to say that it works.

Marek

On Wed, Mar 11, 2015 at 11:25 PM, Ilia Mirkin  wrote:
> On Wed, Mar 11, 2015 at 6:17 PM, Dave Airlie  wrote:
>> On 12 March 2015 at 08:07, Ian Romanick  wrote:
>>> On 03/09/2015 12:18 AM, Ishara Abeysekera wrote:
 /I am interested on write tests for OpenGL 4.0 /GLSL 4.00 .
 /
 /But can you be more specify what areas you are expecting to be tested,
>>>
>>> I haven't examined the API side very closely, but I know of a few things
>>> in the shading language that need tests.  For most of the things, there
>>> is absolutely no support in Mesa yet.
>>>
>>> We have no tests for GL_ARB_shader_subroutine.
>>>
>>> It would also be good to have tests to verify that features from other
>>> extensions (e.g., GL_ARB_gpu_shader5 and GL_ARB_gpu_shader_fp64) are
>>> enabled in #version 400 shaders.  The should be simple "touch" tests
>>> since we already have good tests for the extensions.  For example, you
>>> might add a glslparsertest test that tries every possible overload of
>>> uaddCarry.
>>>
>>> I don't think we have any tests for GL_ARB_vertex_attrib_64bit.  It's a
>>> GL 4.1 feature, but it will probably get done before the other missing
>>> 4.0 features.
>>
>> there are some initial ones I posted and in my piglit tree somewhere,
>> but there is definitely a need for more and validated against the
>> binary drivers.
>>
>> also thing like internal_format_query2 need a truck of tests,
>> and probably enhanced_layouts.
>
> Other fun extensions that will need a ton of tests:
>
> ARB_shader_storage_buffer_object
> ARB_compute_shader (which is somewhat in progress AFAIK, but very
> limited test coverage atm)
>
> And smaller extensions... would probably have to do a bunch of them up
> to make a single GSoC:
>
> ARB_get_texture_sub_image
> ARB_query_buffer_object
> ARB_texture_stencil8
> ARB_framebuffer_no_attachments
>
> You can look at
> http://cgit.freedesktop.org/mesa/mesa/tree/docs/GL3.txt to see which
> core extensions mesa supports. It's most common that there are tests
> for supported extensions and no tests for unsupported extensions (but
> check the piglit repo before assuming that... e.g. there are a lot of
> ARB_shader_image_load_store tests even though it's not presently
> supported by any driver).
>
>   -ilia
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] i965: Allow Y-tiled allocations for large surfaces

2015-03-11 Thread Ben Widawsky
On Tue, Mar 10, 2015 at 05:39:24PM +, Neil Roberts wrote:
> Ben Widawsky  writes:
> 
> > This patch will use a new calculation to determine if a surface can be 
> > blitted
> > from or to. Previously, the "total_height" member was used. Total_height in 
> > the
> > case of 2d, 3d, and cube map arrays is the height of each slice/layer/face.
> > Since the GL map APIS only ever deal with a slice at a time however, the
> > determining factor is really the height of one slice.
> 
> Do you mean to say that total_height is the combined height of all of
> the slices/layers/faces? If so the wording is confusing.
> 

Yep. Thanks, I agree, and fixed.

> > +static inline uint32_t
> > +intel_miptree_blit_height(struct intel_mipmap_tree *mt)
> > +{
> > +   switch (mt->target) {
> > +   case GL_TEXTURE_CUBE_MAP:
> > +   case GL_TEXTURE_1D_ARRAY:
> > +   case GL_TEXTURE_2D_ARRAY:
> > +   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
> > +   case GL_TEXTURE_CUBE_MAP_ARRAY:
> > +  assert(mt->logical_depth0);
> > +  return mt->qpitch;
> > +   case GL_TEXTURE_3D:
> > +  /* FIXME 3d textures don't have a qpitch. I think it's simply the 
> > tiled
> > +   * aligned mt->physical_height0. Since 3D textures aren't used 
> > often, just
> > +   * print the perf debug from the caller and bail
> > +   */
> > +  /* fallthrough */
> > +   default:
> > +  return mt->total_height;
> > +   }
> > +}
> 
> This function might stop working on Skylake if we land my patch to fix
> the qpitch calculation. In that case the qpitch isn't necessarily a
> count of the number of rows. In 1D textures it is the number of pixels
> and for compressed textures it is the number of blocks. Maybe we could
> also store the physical_qpitch that is calculated in
> brw_miptree_layout_texture_array?
> 

I'm pretty sure today we never use the blitter for compressed textures.
Therefore, I believe we can ignore that case. In the case where we use pixels, I
believe it will still work fine as long as long as each layer is tightly packed
(which I thought it was). If it's not, then I suppose we have a problem. I'm
also totally fine with making 1D fallthrough since I don't think it's a
particularly common case for it to surpass total_height anyway.

Any preference on which to land first (and therefore who has to fix it)? My
patches are older, but yours potentially fixes bugs and mine is just perf.

> Regards,
> - Neil



-- 
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] i965: Implement NIR intrinsics for loading VS system values.

2015-03-11 Thread Kenneth Graunke
On Wednesday, March 11, 2015 03:33:24 PM Jason Ekstrand wrote:
> I'm not terribly happy with how this worked out, I'm not going to NAK it as
> I think it's the best we can do at the moment.
> 
> The reason why Connor (and others) have chosen to emit these things at the
> top of the shader is because they frequently require some computation and
> we don't want to duplicate that if we don't have to.  However, it also
> leads to variables with very long live ranges which we don't want either.
> Once we have GVN, we can value-number system value intrinsics.  This will
> ensure that we only do the (potentially expensive) caluculation once and
> GCM will ensure that they land as far down the program as we can put them.
> However, until we have GVN, this seems like as good as we're going to get
> at the moment.
> --Jason

I think this code is fine.

The built-in variables handled here are gl_VertexID and gl_InstanceID.
Here, gl_VertexID = gl_VertexIDMESA (0-based) + gl_BaseVertex.

All three values show up as an incoming vec4 vertex attribute, supplied
by the vertex fetcher like any other vertex data.  This means that they
inherently occupy registers starting at the beginning of the program.

The only computation we could delay is the ADD for gl_VertexID.
However, in SIMD8 VS, it appears that gl_VertexIDMESA and gl_BaseVertex
occupy separate registers.  So doing the ADD up front actually makes 2
GRFs available, at the cost of 1 GRF.  It's actually better up front.

For the FS system values - gl_SampleID and the like - you have a point.
The inputs to the calculation tend to come from g0/g1, which usually
live for the whole shader anyway.  So we can compute the value whenever
we like - moving it closer to the use would reduce live ranges, and
potentially put it inside control flow, so it might be skipped
altogether on some invocations.

That's just not the case for the VS system values.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] c11: add c11 compatibility wrapper around stdlib.h

2015-03-11 Thread Emil Velikov
On 9 March 2015 at 11:54, Jose Fonseca  wrote:
> On 07/03/15 19:38, Emil Velikov wrote:
>>
>> On 07/03/15 07:23, Jose Fonseca wrote:
>> ...
>>>
>>> we still
>>> didn't eliminate the use of non-portable _MTX_INITIALIZER_NP from Mesa
>>> tree gave me pause.
>>>
>> The only way I can think about resolving this, is to use call_once() to
>> initialize the mutex,
>
>
> Yes, I'm afraid so.
>
Which makes me wonder, why does no other place in mesa (be that glx,
gallium etc.) do so already ?

Also it seem that some places might have to stick to pthreads. Haven't
checked the details though.

* src/glx/apple
pthread_threadid_np
pthread_is_threaded_np


* src/gallium/auxiliary/os
pthread_sigmask
pthread_setname_np


* src/gallium/state_trackers/omx/
pthread_join ? missing a matching thread_create


Cheers,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4 v2] mesa: Check for valid PBO access in gl(Compressed)Tex(Sub)Image calls

2015-03-11 Thread Laura Ekstrand
On Tue, Mar 10, 2015 at 11:36 AM, Eduardo Lima Mitev 
wrote:

> This patch adds two types of checks to the gl(Compressed)Tex(Sub)Imgage
> family
> of functions when a pixel buffer object is bound to GL_PIXEL_UNPACK_BUFFER:
>
> - That the buffer is not mapped.
> - The total data size is within the boundaries of the buffer size.
>
> It does so by calling auxiliary validations functions from PBO API:
> _mesa_validate_pbo_source() for non-compressed texture calls, and
> _mesa_validate_pbo_source_compressed() for compressed texture calls.
>
> The first check is defined in Section 6.3.2 'Effects of Mapping Buffers
> on Other GL Commands' of the GLES 3.1 spec, page 57:
>
> "Any GL command which attempts to read from, write to, or change the
>  state of a buffer object may generate an INVALID_OPERATION error if
> all
>  or part of the buffer object is mapped. However, only commands which
>  explicitly describe this error are required to do so. If an error is
> not
>  generated, using such commands to perform invalid reads, writes, or
>  state changes will have undefined results and may result in GL
>  interruption or termination."
>
> Similar wording exists in GL 4.5 spec, page 76.
>
> In the case of gl(Compressed)Tex(Sub)Image(2,3)D, the specification
> doesn't force
> implemtations to throw an error. However since Mesa don't currently
> implement
> checks to determine when it is safe to read/write from/to a mapped PBO, we
> should always return the error if all or parts of it are mapped.
>
> The 2nd check is defined in Section 8.5 'Texture Image Specification' of
> the
> OpenGL 4.5 spec, page 203:
>
> "An INVALID_OPERATION error is generated if a pixel unpack buffer
> object
>  is bound and storing texture data would access memory beyond the end
> of
>  the pixel unpack buffer."
>
> Fixes 4 dEQP tests:
> *
> dEQP-GLES3.functional.negative_api.texture.compressedteximage2d_invalid_buffer_target
> *
> dEQP-GLES3.functional.negative_api.texture.compressedtexsubimage2d_invalid_buffer_target
> *
> dEQP-GLES3.functional.negative_api.texture.compressedteximage3d_invalid_buffer_target
> *
> dEQP-GLES3.functional.negative_api.texture.compressedtexsubimage3d_invalid_buffer_target
> ---
>  src/mesa/main/teximage.c | 97
> +---
>  1 file changed, 67 insertions(+), 30 deletions(-)
>
> diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
> index 7b1a0e6..aae7c00 100644
> --- a/src/mesa/main/teximage.c
> +++ b/src/mesa/main/teximage.c
> @@ -53,6 +53,7 @@
>  #include "mtypes.h"
>  #include "glformats.h"
>  #include "texstore.h"
> +#include "pbo.h"
>
>
>  /**
> @@ -2113,7 +2114,8 @@ texture_error_check( struct gl_context *ctx,
>   GLint level, GLint internalFormat,
>   GLenum format, GLenum type,
>   GLint width, GLint height,
> - GLint depth, GLint border )
> + GLint depth, GLint border,
> + const GLvoid *pixels )
>  {
> GLenum err;
>
> @@ -2198,6 +2200,13 @@ texture_error_check( struct gl_context *ctx,
>return GL_TRUE;
> }
> +   /* validate the bound PBO, if any */
> +   if (!_mesa_validate_pbo_source(ctx, dimensions, &ctx->Unpack,
> +  width, height, depth, format, type,
> +  INT_MAX, pixels, "glTexImage")) {
> +  return GL_TRUE;
> +   }
> +
> /* make sure internal format and format basically agree */
> if (!texture_formats_agree(internalFormat, format)) {
>_mesa_error(ctx, GL_INVALID_OPERATION,
> @@ -2294,7 +2303,7 @@ compressed_texture_error_check(struct gl_context
> *ctx, GLint dimensions,
> GLenum target, GLint level,
> GLenum internalFormat, GLsizei width,
> GLsizei height, GLsizei depth, GLint
> border,
> -   GLsizei imageSize)
> +   GLsizei imageSize, const GLvoid *data)
>  {
> const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
> GLint expectedSize;
> @@ -2322,6 +2331,13 @@ compressed_texture_error_check(struct gl_context
> *ctx, GLint dimensions,
>return GL_TRUE;
> }
>
> +   /* validate the bound PBO, if any */
> +   if (!_mesa_validate_pbo_source_compressed(ctx, dimensions,
> &ctx->Unpack,
> + imageSize, data,
> + "glCompressedTexImage")) {
> +  return GL_TRUE;
> +   }
> +
> switch (internalFormat) {
> case GL_PALETTE4_RGB8_OES:
> case GL_PALETTE4_RGBA8_OES:
> @@ -2454,7 +2470,8 @@ texsubimage_error_check(struct gl_context *ctx,
> GLuint dimensions,
>  GLenum target, GLint level,
>  GLint xoffset, GLint yoffset, GLint zoffset,
>  GLint width, GLint height,

Re: [Mesa-dev] [PATCH 3/5] i965/fs: Handle CMP.nz ... 0 and AND.nz ... 1 similarly in cmod propagation

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 1:44 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> Espically on platforms that do not natively generate 0u and ~0u for
> Boolean results, we generate a lot of sequences where a CMP is
> followed by an AND with 1.  emit_bool_to_cond_code does this, for
> example.  On ILK, this results in a sequence like:
>
> add(8)  g3<1>F  g8<8,8,1>F  -g4<0,1,0>F
> cmp.l.f0(8) g3<1>D  g3<8,8,1>F  0F
> and.nz.f0(8)nullg3<8,8,1>D  1D
> (+f0) iff(8)Jump: 6
>
> The AND.nz is obviously redundant.  By propagating the cmod, we can
> instead generate
>
> add.l.f0(8) nullg8<8,8,1>F  -g4<0,1,0>F
> (+f0) iff(8)Jump: 6
>
> Existing code already handles the propagation from the CMP to the ADD.
>
> Shader-db results:
>
> GM45 (0x2A42):
> total instructions in shared programs: 3550829 -> 3550788 (-0.00%)
> instructions in affected programs: 10028 -> 9987 (-0.41%)
> helped:24
>
> Iron Lake (0x0046):
> total instructions in shared programs: 4993146 -> 4993105 (-0.00%)
> instructions in affected programs: 9675 -> 9634 (-0.42%)
> helped:24
>
> Ivy Bridge (0x0166):
> total instructions in shared programs: 6291870 -> 6291794 (-0.00%)
> instructions in affected programs: 17914 -> 17838 (-0.42%)
> helped:48
>
> Haswell (0x0426):
> total instructions in shared programs: 5779256 -> 5779180 (-0.00%)
> instructions in affected programs: 16694 -> 16618 (-0.46%)
> helped:48
>
> Broadwell (0x162E):
> total instructions in shared programs: 6823088 -> 6823014 (-0.00%)
> instructions in affected programs: 15824 -> 15750 (-0.47%)
> helped:46
>
> No chage on Sandy Bridge or on any platform when NIR is used.

typo: change

>
> Signed-off-by: Ian Romanick 
> Cc: Matt Turner 
> ---
>  .../drivers/dri/i965/brw_fs_cmod_propagation.cpp   | 32 
> +-
>  1 file changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
> index d0ca2f9..6df3d45 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
> @@ -57,7 +57,8 @@ opt_cmod_propagation_local(bblock_t *block)
> foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
>ip--;
>
> -  if ((inst->opcode != BRW_OPCODE_CMP &&
> +  if ((inst->opcode != BRW_OPCODE_AND &&
> +   inst->opcode != BRW_OPCODE_CMP &&
> inst->opcode != BRW_OPCODE_MOV) ||
>inst->predicate != BRW_PREDICATE_NONE ||
>!inst->dst.is_null() ||
> @@ -65,6 +66,19 @@ opt_cmod_propagation_local(bblock_t *block)
>inst->src[0].abs)
>   continue;
>
> +  /* Only an AND.NZ can be propagated.  Many AND.Z instructions are
> +   * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code).
> +   * Propagating those would require inverting the condition on the CMP.
> +   * This changes both the flag value and the register destination of the
> +   * CMP.  That result may be used elsewhere, so we can't change its 
> value
> +   * on a whim.
> +   */
> +  if (inst->opcode == BRW_OPCODE_AND &&
> +  !(inst->src[1].is_one() &&
> +inst->conditional_mod == BRW_CONDITIONAL_NZ &&
> +!inst->src[0].negate))

I had a hard time reading this condition, but I think it is right.

> + continue;
> +
>if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
>   continue;
>
> @@ -80,6 +94,22 @@ opt_cmod_propagation_local(bblock_t *block)
>  scan_inst->dst.reg_offset != inst->src[0].reg_offset)
> break;
>
> +/* This must be done before the dst.type check because the result
> + * type of the AND will always be D, but the result of the CMP
> + * could be anything.  The assumption is that the AND is just
> + * figuring out what the result of the previous comparison was
> + * instead of doing a new comparison with a different type.
> + */
> +if (inst->opcode == BRW_OPCODE_AND) {
> +   if (scan_inst->opcode == BRW_OPCODE_CMP &&
> +   scan_inst->writes_flag()) {

CMPs will always writes the flag, so no need to check that. I guess
you could assert it or something if you wanted.

This looks good... we just need some additional unit tests :)

  - Test that you can remove the AND.NZ after a CMP null
  - Test that AND.Z isn't removed

I don't think there's any other interesting case to test?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4 v2] mesa: Separate PBO validation checks from buffer mapping, to allow reuse

2015-03-11 Thread Laura Ekstrand
On Tue, Mar 10, 2015 at 11:34 AM, Eduardo Lima Mitev 
wrote:

> Internal PBO functions such as _mesa_map_validate_pbo_source() and
> _mesa_validate_pbo_compressed_teximage() perform validation and buffer
> mapping
> within the same call.
>
> This patch takes out the validation into separate functions to allow reuse
> of functionality by other code (i.e, gl(Compressed)Tex(Sub)Image).
> ---
>  src/mesa/main/pbo.c | 119
> ++--
>  src/mesa/main/pbo.h |  14 +++
>  2 files changed, 101 insertions(+), 32 deletions(-)
>
> diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
> index 259f763..46121a2 100644
> --- a/src/mesa/main/pbo.c
> +++ b/src/mesa/main/pbo.c
> @@ -164,23 +164,18 @@ _mesa_map_pbo_source(struct gl_context *ctx,
> return buf;
>  }
>
> -
>  /**
> - * Combine PBO-read validation and mapping.
> + * Perform PBO validation for read operations with uncompressed textures.
>   * If any GL errors are detected, they'll be recorded and NULL returned.
>
Maybe say something like "If there are errors, return false, else return
true"?  This is important because in other parts of the driver (like
main/teximage.c), error checking functions return GL_TRUE if an error is
found and GL_FALSE if one isn't found.  (Confusing, I know :< )

>   * \sa _mesa_validate_pbo_access
> - * \sa _mesa_map_pbo_source
> - * A call to this function should have a matching call to
> - * _mesa_unmap_pbo_source().
>   */
> -const GLvoid *
> -_mesa_map_validate_pbo_source(struct gl_context *ctx,
> -  GLuint dimensions,
> -  const struct gl_pixelstore_attrib *unpack,
> -  GLsizei width, GLsizei height, GLsizei
> depth,
> -  GLenum format, GLenum type,
> -  GLsizei clientMemSize,
> -  const GLvoid *ptr, const char *where)
> +bool
> +_mesa_validate_pbo_source(struct gl_context *ctx, GLuint dimensions,
> +  const struct gl_pixelstore_attrib *unpack,
> +  GLsizei width, GLsizei height, GLsizei depth,
> +  GLenum format, GLenum type,
> +  GLsizei clientMemSize,
> +  const GLvoid *ptr, const char *where)
>  {
> assert(dimensions == 1 || dimensions == 2 || dimensions == 3);
>
> @@ -188,24 +183,85 @@ _mesa_map_validate_pbo_source(struct gl_context *ctx,
>format, type, clientMemSize, ptr)) {
>if (_mesa_is_bufferobj(unpack->BufferObj)) {
>   _mesa_error(ctx, GL_INVALID_OPERATION,
> - "%s(out of bounds PBO access)", where);
> -  } else {
> - _mesa_error(ctx, GL_INVALID_OPERATION,
>
You changed the order of the conditions from the original version.  In the
original, if (_mesa_is_bufferobj), then say "out of bounds PBO access."  If
(!_mesa_is_bufferobj), say "out of bounds access: bufSize is too small."
You have them switched around here.

>   "%s(out of bounds access: bufSize (%d) is too
> small)",
>   where, clientMemSize);
> +  } else {
> + _mesa_error(ctx, GL_INVALID_OPERATION,
> + "%s(out of bounds access)",
> + where);
>}
> -  return NULL;
> +  return false;
> }
>
> if (!_mesa_is_bufferobj(unpack->BufferObj)) {
>/* non-PBO access: no further validation to be done */
> -  return ptr;
> +  return true;
> }
>
> if (_mesa_check_disallowed_mapping(unpack->BufferObj)) {
>/* buffer is already mapped - that's an error */
> -  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", where);
> -  return NULL;
> +  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)",
> +  where);
> +  return false;
> +   }
> +
> +   return true;
> +}
> +
> +/**
> + * Perform PBO validation for read operations with compressed textures.
> + * If any GL errors are detected, they'll be recorded and NULL returned.
>
Same here as above.  Say something about the return value (true or false).

> + */
> +bool
> +_mesa_validate_pbo_source_compressed(struct gl_context *ctx, GLuint
> dimensions,
> + const struct gl_pixelstore_attrib
> *unpack,
> + GLsizei imageSize, const GLvoid
> *pixels,
> + const char *where)
> +{
> +   if (!_mesa_is_bufferobj(unpack->BufferObj)) {
> +  /* not using a PBO */
> +  return true;
> +   }
> +
> +   if ((const GLubyte *) pixels + imageSize >
> +   ((const GLubyte *) 0) + unpack->BufferObj->Size) {
> +  /* out of bounds read! */
> +  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid PBO access)",
> +  where);
> +  return false;
> +   }
> +
> +   if (_mesa_check_disallowed_mapping(unpack->BufferObj)) 

Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Ilia Mirkin
On Wed, Mar 11, 2015 at 7:06 PM, Matt Turner  wrote:
> On Wed, Mar 11, 2015 at 4:04 PM, Ilia Mirkin  wrote:
>> On Wed, Mar 11, 2015 at 6:51 PM, Matt Turner  wrote:
>>> On Wed, Mar 11, 2015 at 3:13 PM, Ilia Mirkin  wrote:
 On Wed, Mar 11, 2015 at 6:09 PM, Matt Turner  wrote:
> glibc uses roundsd/roundss in rint/f guarded by a runtime check. The
> SSE code here just allows those instructions to be inlined.

 OK, as long as glibc would end up using the same rounding mode settings,

 Reviewed-by: Ilia Mirkin 
>>>
>>> Okay, so slight difference. It doesn't use the same rounding mode
>>> settings, since rint() rounds in the current rounding direction it
>>> uses rounds* with imm8=0x4 (current rounding direction) whereas we're
>>> using rounds* with imm8=0x8 (round to nearest). We're assuming the
>>> current rounding mode is to nearest, since that's the default and if
>>> it weren't other things would go badly.
>>>
>>> I don't know if your R-b stands in that case?
>>
>> Is it possible to determine the current rounding format in a
>> moderately cheap manner? If so, it may be nice to stick that into an
>> assert.
>
> The difficulty is that MSVC (surprise! surprise!) doesn't support
> fegetround()/fesetround().
>
> Otherwise, yes.

Good enough for an assert... I don't particularly care about MSVC :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] i965/fs: Apply gl_FrontFacing ? -1 : 1 optimization only for floats

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 1:44 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> At the very least, unreal4/sun-temple/102.shader_test uses this pattern
> for a signed integer result.  However, that shader did not hit the
> optimization in the first place because it uses !gl_FronFacing.  I
> changed the shader to use remove the logical-not and reverse the other
> operands.  I verified that incorrect code is generated before this
> change and correct code is generated after.

At one point I did have a patch that handled !gl_FrontFacing ? ... but
I think it got lost.

4-5 are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 4:04 PM, Ilia Mirkin  wrote:
> On Wed, Mar 11, 2015 at 6:51 PM, Matt Turner  wrote:
>> On Wed, Mar 11, 2015 at 3:13 PM, Ilia Mirkin  wrote:
>>> On Wed, Mar 11, 2015 at 6:09 PM, Matt Turner  wrote:
 glibc uses roundsd/roundss in rint/f guarded by a runtime check. The
 SSE code here just allows those instructions to be inlined.
>>>
>>> OK, as long as glibc would end up using the same rounding mode settings,
>>>
>>> Reviewed-by: Ilia Mirkin 
>>
>> Okay, so slight difference. It doesn't use the same rounding mode
>> settings, since rint() rounds in the current rounding direction it
>> uses rounds* with imm8=0x4 (current rounding direction) whereas we're
>> using rounds* with imm8=0x8 (round to nearest). We're assuming the
>> current rounding mode is to nearest, since that's the default and if
>> it weren't other things would go badly.
>>
>> I don't know if your R-b stands in that case?
>
> Is it possible to determine the current rounding format in a
> moderately cheap manner? If so, it may be nice to stick that into an
> assert.

The difficulty is that MSVC (surprise! surprise!) doesn't support
fegetround()/fesetround().

Otherwise, yes.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Ilia Mirkin
On Wed, Mar 11, 2015 at 6:51 PM, Matt Turner  wrote:
> On Wed, Mar 11, 2015 at 3:13 PM, Ilia Mirkin  wrote:
>> On Wed, Mar 11, 2015 at 6:09 PM, Matt Turner  wrote:
>>> glibc uses roundsd/roundss in rint/f guarded by a runtime check. The
>>> SSE code here just allows those instructions to be inlined.
>>
>> OK, as long as glibc would end up using the same rounding mode settings,
>>
>> Reviewed-by: Ilia Mirkin 
>
> Okay, so slight difference. It doesn't use the same rounding mode
> settings, since rint() rounds in the current rounding direction it
> uses rounds* with imm8=0x4 (current rounding direction) whereas we're
> using rounds* with imm8=0x8 (round to nearest). We're assuming the
> current rounding mode is to nearest, since that's the default and if
> it weren't other things would go badly.
>
> I don't know if your R-b stands in that case?

Is it possible to determine the current rounding format in a
moderately cheap manner? If so, it may be nice to stick that into an
assert.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 3:13 PM, Ilia Mirkin  wrote:
> On Wed, Mar 11, 2015 at 6:09 PM, Matt Turner  wrote:
>> glibc uses roundsd/roundss in rint/f guarded by a runtime check. The
>> SSE code here just allows those instructions to be inlined.
>
> OK, as long as glibc would end up using the same rounding mode settings,
>
> Reviewed-by: Ilia Mirkin 

Okay, so slight difference. It doesn't use the same rounding mode
settings, since rint() rounds in the current rounding direction it
uses rounds* with imm8=0x4 (current rounding direction) whereas we're
using rounds* with imm8=0x8 (round to nearest). We're assuming the
current rounding mode is to nearest, since that's the default and if
it weren't other things would go badly.

I don't know if your R-b stands in that case?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] i965/fs: Handle VS inputs in the NIR backend.

2015-03-11 Thread Connor Abbott
On Wed, Mar 11, 2015 at 6:28 PM, Jason Ekstrand  wrote:
> I think I mentioned this at least once before, but I don't think that this
> is a good long-term solution.  The nir_lower_io function does two things
> primarily.  1) It assigns each variable a "device location" and 2) changes
> the variable loads to loads with an offset.  In the long-term, I think we
> want to split 1) out and let the backend control what "device location" gets
> assigned to each input/output.  Then, add a knob to 2) to select how we want
> things packed.  Doing this will let us avoid this whole re-mapping mess and,
> in the case of uniforms, better control things so that we can push some of
> them.  That said, this is *not* a NAK of the current patch.  If you want to
> hold off on that for a while, this looks ok.
> --Jason

Yes, some re-thinking of how inputs/outputs/uniforms are handled would
certainly be useful. For some history, when I wrote the old code (and
Jason's code is essentially doing the same thing but in a
different/better way) it was more of a "let's get this working for
now" sort of thing since I wasn't sure what a cleaner solution would
look like (I hadn't even written the backend code yet!). But I agree
that scalar VS support shouldn't have to block on that rewrite.

>
> On Mon, Mar 9, 2015 at 1:58 AM, Kenneth Graunke 
> wrote:
>>
>> Signed-off-by: Kenneth Graunke 
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 23 ++-
>>  1 file changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 3baafc4..1734d03 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -199,11 +199,32 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
>> struct hash_entry *entry;
>> hash_table_foreach(shader->inputs, entry) {
>>nir_variable *var = (nir_variable *) entry->data;
>> +  enum brw_reg_type type = brw_type_for_base_type(var->type);
>>fs_reg input = offset(nir_inputs, var->data.driver_location);
>>
>>fs_reg reg;
>>switch (stage) {
>> -  case MESA_SHADER_VERTEX:
>> +  case MESA_SHADER_VERTEX: {
>> + /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
>> +  * stored in nir_variable::location.
>> +  *
>> +  * However, NIR's load_input intrinsics use a different index -
>> an
>> +  * offset into a single contiguous array containing all inputs.
>> +  * This index corresponds to the nir_variable::driver_location
>> field.
>> +  *
>> +  * So, we need to copy from fs_reg(ATTR, var->location) to
>> +  * offset(nir_inputs, var->data.driver_location).
>> +  */
>> + unsigned components = var->type->without_array()->components();
>> + unsigned array_length = var->type->is_array() ?
>> var->type->length : 1;
>> + for (unsigned i = 0; i < array_length; i++) {
>> +for (unsigned j = 0; j < components; j++) {
>> +   emit(MOV(retype(offset(input, components * i + j), type),
>> +offset(fs_reg(ATTR, var->data.location + i,
>> type), j)));
>> +}
>> + }
>> + break;
>> +  }
>>case MESA_SHADER_GEOMETRY:
>>case MESA_SHADER_COMPUTE:
>>   unreachable("fs_visitor not used for these stages yet.");
>> --
>> 2.2.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] main: Clean up a strange construction in use_shader_program().

2015-03-11 Thread Jordan Justen
From: Paul Berry 

Reviewed-by: Jordan Justen 
---
 Two patches found in Paul's cs branch...

 src/mesa/main/shaderapi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 5731d58..872b559 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1035,7 +1035,7 @@ use_shader_program(struct gl_context *ctx, GLenum type,
gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(type);
 
target = &shTarget->CurrentProgram[stage];
-   if ((shProg == NULL) || (shProg->_LinkedShaders[stage] == NULL))
+   if ((shProg != NULL) && (shProg->_LinkedShaders[stage] == NULL))
   shProg = NULL;
 
if (*target != shProg) {
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] main: Change the type argument of use_shader_program() to gl_shader_stage.

2015-03-11 Thread Jordan Justen
From: Paul Berry 

This allows it to be called from a loop.

Reviewed-by: Jordan Justen 
---
 src/mesa/main/shaderapi.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 872b559..9409536 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1027,12 +1027,11 @@ _mesa_active_program(struct gl_context *ctx, struct 
gl_shader_program *shProg,
 
 
 static void
-use_shader_program(struct gl_context *ctx, GLenum type,
+use_shader_program(struct gl_context *ctx, gl_shader_stage stage,
struct gl_shader_program *shProg,
struct gl_pipeline_object *shTarget)
 {
struct gl_shader_program **target;
-   gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(type);
 
target = &shTarget->CurrentProgram[stage];
if ((shProg != NULL) && (shProg->_LinkedShaders[stage] == NULL))
@@ -1048,17 +1047,17 @@ use_shader_program(struct gl_context *ctx, GLenum type,
* it from that binding point as well.  This ensures that the correct
* semantics of glDeleteProgram are maintained.
*/
-  switch (type) {
-  case GL_VERTEX_SHADER:
+  switch (stage) {
+  case MESA_SHADER_VERTEX:
 /* Empty for now. */
 break;
-  case GL_GEOMETRY_SHADER_ARB:
+  case MESA_SHADER_GEOMETRY:
 /* Empty for now. */
 break;
-  case GL_COMPUTE_SHADER:
+  case MESA_SHADER_COMPUTE:
  /* Empty for now. */
  break;
-  case GL_FRAGMENT_SHADER:
+  case MESA_SHADER_FRAGMENT:
  if (*target == ctx->_Shader->_CurrentFragmentProgram) {
_mesa_reference_shader_program(ctx,

&ctx->_Shader->_CurrentFragmentProgram,
@@ -1079,10 +1078,9 @@ use_shader_program(struct gl_context *ctx, GLenum type,
 void
 _mesa_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
 {
-   use_shader_program(ctx, GL_VERTEX_SHADER, shProg, &ctx->Shader);
-   use_shader_program(ctx, GL_GEOMETRY_SHADER_ARB, shProg, &ctx->Shader);
-   use_shader_program(ctx, GL_FRAGMENT_SHADER, shProg, &ctx->Shader);
-   use_shader_program(ctx, GL_COMPUTE_SHADER, shProg, &ctx->Shader);
+   int i;
+   for (i = 0; i < MESA_SHADER_STAGES; i++)
+  use_shader_program(ctx, i, shProg, &ctx->Shader);
_mesa_active_program(ctx, shProg, "glUseProgram");
 
if (ctx->Driver.UseProgram)
@@ -1889,7 +1887,8 @@ _mesa_use_shader_program(struct gl_context *ctx, GLenum 
type,
  struct gl_shader_program *shProg,
  struct gl_pipeline_object *shTarget)
 {
-   use_shader_program(ctx, type, shProg, shTarget);
+   gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(type);
+   use_shader_program(ctx, stage, shProg, shTarget);
 
if (ctx->Driver.UseProgram)
   ctx->Driver.UseProgram(ctx, shProg);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] loader: include for non-sysfs builds

2015-03-11 Thread Emil Velikov
On 11 March 2015 at 22:18, Ian Romanick  wrote:
> On 03/11/2015 02:44 PM, Emil Velikov wrote:
>> On 11 March 2015 at 20:50, Ian Romanick  wrote:
>>> On 03/11/2015 12:12 PM, Emil Velikov wrote:
 Required by fstat(), otherwise we'll error out due to implicit function
 declaration.
>>>
>>> Alternate suggestion... include unistd.h unconditionally.  I see that's
>>> already in the #ifdef HAVE_LIBUDEV path.  Does that work too?
>>>
>> Perhaps the commit message is a bit confusing, but your suggestion
>> does not parse here. How does the following sound ?
>>
>> When building with libudev (non-sysfs) we don't include sys/stat.h
>> thus we error out due to the implicit declaration of the function
>> fstat().
>
> What I meant was:
>
> diff --git a/src/loader/loader.c b/src/loader/loader.c
> index 9ff5115..4709b02 100644
> --- a/src/loader/loader.c
> +++ b/src/loader/loader.c
> @@ -67,11 +67,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #ifdef HAVE_LIBUDEV
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  #ifdef USE_DRICONF
> @@ -80,7 +80,6 @@
>  #endif
>  #endif
>  #ifdef HAVE_SYSFS
> -#include 
>  #include 
>  #endif
>  #include "loader.h"
>
> The fstat man page led me to believe that fstat also comes from
> unistd.h, but
$ man 3 fstat does not list unistd.h. Which made we think there is
some confusion.
Checking with the Linux manual (man 2 fstat) I do see your point.

> In any case, just ignore all my noise.
>
Wouldn't call it noise, by any means.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Carl Worth
On Wed, Mar 11 2015, Matt Turner wrote:
> Eric's initial patch adding constant expression evaluation for
> ir_unop_round_even used nearbyint...

Hi Matt,

It's great to see this commit, (and with such a detailed message).
Rounding is one of those things that can be surprisingly difficult to
get correct, and there are a lot of moving pieces here, (software
specifications, hardware behavior, performance penalties of changing
modes, exceptions, etc.). So this is definitely an area worth spending
the effort to get things right.

I do have a feeling that there's something we should add to this commit.
My first difficulty in trying to review it was in determining what the
behavioral change in the patch actually is. Most of the commit message
is history (which is useful) but it was hard to find a succinct
description of "here's what's changed".

Reading between the lines of the history, I can guess the following
list of things are happening in this commit:

> Worse, IROUND() is implemented using the trunc(x + 0.5) trick which
> fails for x = nextafterf(0.5, 0.0).

1. Fix _mesa_round_to_even to return a correct value in this case.

Since you've done the careful work to identify this boundary
case, (which was subtle enough that it was missed by the
original implementation), I think this should be verified with a
unit test.

> Still worse, _mesa_round_to_even unexpectedly returns an int.

2. Fix _mesa_round_to_even to return a floating-point value

This sounds logically independent from the above
change. Usually, I'd say that justifies separate commits, but
maybe combining these two in one is fine. What would at least be
nice if the first paragraph of the commit message could list
exactly what's changed, before all the motivation.

> rint() and nearbyint() implement the round-half-to-even behavior we want
> when the rounding mode is set to the default round-to-nearest. The only
> difference between them is that nearbyint() raises the inexact
> exception.
>
> This patch implements roundeven{f,}, a function added by a yet
> unimplemented technical specification (ISO/IEC TS 18661-1:2014), with a
> small difference in behavior -- we don't bother raising the inexact
> exception, which I don't think we care about anyway.

3. Rename _mesa_round_to_even to roundeven

I'm less confident about this change. If we're using the name of
a standardized function, (that may appear in glibc at any point
in the future), shouldn't we at least have some configure
machinery in place so that we can actually use the system
version if available?

> If we do indeed want the don't-raise-the-inexact-exception behavior,
> maybe we should just keep the _mesa_round_to_even name?

If we care and don't want the exception, then yes, we'd better not use
the standard name. But even if we don't care and could live with the
exception, I don't think we should use a standardized name for a
function that we know differs in its behavior.

If we do decide to keep the rename, I propose at least splitting this
part into a separate commit.

> The SSE 4.1 ROUND instructions let us implement roundeven directly.
> Otherwise we assume that the rounding mode has not been modified (as we
> do in the rest of Mesa) and use rint().

4. Optimize this function (whatever its name) with SSE4.1 ROUND

This looks like a reasonable optimization, but should be a
separate commit, (and this commit would benefit from the unit
tests mentioned before).

So I think I'd like to see at least three or four commits here:

1. Change _mesa_round_to_even to return a float
2. Fix rounding bug in _mesa_round_to_even
3. Add unit test for recent rounding-bug fix
4. Optimize _mesa_round_to_even with SSE 4.1 ROUND if available

With that: Reviewed-by: Carl Worth 

-Carl

-- 
carl.d.wo...@intel.com


pgp9C0kDV4b3E.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] i965/fs: Store a pointer to brw_sampler_prog_key_data in the visitor.

2015-03-11 Thread Jason Ekstrand
I'll second Topi's comment.  Other than that, not much jumpped out at me.
I did make a comment on 7 which also applies (somewhat) to 8 and I made
another comment on 5.  However, none of this is blockers so this series is

Reviewed-by: Jason Ekstrand 

On Wed, Mar 11, 2015 at 11:59 AM, Kenneth Graunke 
wrote:

> On Wednesday, March 11, 2015 10:44:26 AM Pohjolainen, Topi wrote:
> > On Mon, Mar 09, 2015 at 01:58:51AM -0700, Kenneth Graunke wrote:
> > > The NIR backend hardcodes brw_wm_prog_key at the moment, which won't
> > > work when we support scalar VS.  We could use get_tex(), but it's a
> > > static method.  I was going to promote it to fs_visitor, but then
> > > realized that both parameters (stage and key) are already members.
> > >
> > > It then occured to me that we could just set up a pointer in the
> > > constructor, and skip having a function altogether.
> > >
> > > This patch also converts all existing users to use key_tex.
> > >
> > > Signed-off-by: Kenneth Graunke 
> >
> > With the few nits:
> >
> > Reviewed-by: Topi Pohjolainen 
> >
> > > ---
> > >  src/mesa/drivers/dri/i965/brw_fs.h   |  2 +
> > >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  3 +-
> > >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55
> 
> > >  3 files changed, 27 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> > > index ee6ba98..7f4916a 100644
> > > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > > @@ -418,6 +418,8 @@ public:
> > > void visit_atomic_counter_intrinsic(ir_call *ir);
> > >
> > > const void *const key;
> > > +   struct brw_sampler_prog_key_data *key_tex;
> >
> > This could be constant pointer as well, we only use it for reading in the
> > visitor. (Also prevents the need to cast the constant 'key' pointer to
> > non-constant in init()).
>
> Good catch!  I've changed it to:
>
> const struct brw_sampler_prog_key_Data *key_tex;
>
> and changed the casts in init() to be "const struct brw_wm_prog_key" and
> so on.  I couldn't add the second 'const' (which prevents assignments to
> key_tex) since I can't really set this from a constructor initializer
> list - I need the switch statement in init().  But, this is probably the
> most important one.
>
> Also fixed the word wrapping.  Thanks!
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] i965/nir: Lower to registers a bit later.

2015-03-11 Thread Ian Romanick
On 03/11/2015 12:01 PM, Kenneth Graunke wrote:
> On Wednesday, March 11, 2015 10:29:02 AM Ian Romanick wrote:
>> On 03/09/2015 01:58 AM, Kenneth Graunke wrote:
>>> We can't safely call nir_optimize() with register present, since several
>>> passes called in the loop can't handle registers, and will fail asserts.
>>
>> Doesn't that mean this patch should go before patch 2?
> 
> Probably.  I only provoked the bug by using this code path for vertex
> shaders, which aren't enabled until the end of the series.
> 
> It may be possible to provoke it via fragment shaders (and piglit simply
> doesn't), but if so, it would have already been broken - we already have
> one nir_optimize() before lower_to_registers.

Okay... I'm not worried about it either then.

> So I don't feel too strongly about it.




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] i965: Implement NIR intrinsics for loading VS system values.

2015-03-11 Thread Jason Ekstrand
I'm not terribly happy with how this worked out, I'm not going to NAK it as
I think it's the best we can do at the moment.

The reason why Connor (and others) have chosen to emit these things at the
top of the shader is because they frequently require some computation and
we don't want to duplicate that if we don't have to.  However, it also
leads to variables with very long live ranges which we don't want either.
Once we have GVN, we can value-number system value intrinsics.  This will
ensure that we only do the (potentially expensive) caluculation once and
GCM will ensure that they land as far down the program as we can put them.
However, until we have GVN, this seems like as good as we're going to get
at the moment.
--Jason

On Mon, Mar 9, 2015 at 1:58 AM, Kenneth Graunke 
wrote:

> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 51
> 
>  1 file changed, 51 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index c5ed55c..d700523 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -363,6 +363,30 @@ emit_system_values_block(nir_block *block, void
> *void_visitor)
>
>nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>switch (intrin->intrinsic) {
> +  case nir_intrinsic_load_vertex_id:
> + unreachable("should be lowered by lower_vertex_id().");
> +
> +  case nir_intrinsic_load_vertex_id_zero_base:
> + assert(v->stage == MESA_SHADER_VERTEX);
> + reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
> + if (reg->file == BAD_FILE)
> +*reg =
> *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
> + break;
> +
> +  case nir_intrinsic_load_base_vertex:
> + assert(v->stage == MESA_SHADER_VERTEX);
> + reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
> + if (reg->file == BAD_FILE)
> +*reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
> + break;
> +
> +  case nir_intrinsic_load_instance_id:
> + assert(v->stage == MESA_SHADER_VERTEX);
> + reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
> + if (reg->file == BAD_FILE)
> +*reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
> + break;
> +
>case nir_intrinsic_load_sample_pos:
>   assert(v->stage == MESA_SHADER_FRAGMENT);
>   reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
> @@ -1344,6 +1368,33 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> *emit_frontfacing_interpolation()));
>break;
>
> +   case nir_intrinsic_load_vertex_id:
> +  unreachable("should be lowered by lower_vertex_id()");
> +
> +   case nir_intrinsic_load_vertex_id_zero_base: {
> +  fs_reg vertex_id =
> nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
> +  assert(vertex_id.file != BAD_FILE);
> +  dest.type = vertex_id.type;
> +  emit(MOV(dest, vertex_id));
> +  break;
> +   }
> +
> +   case nir_intrinsic_load_base_vertex: {
> +  fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
> +  assert(base_vertex.file != BAD_FILE);
> +  dest.type = base_vertex.type;
> +  emit(MOV(dest, base_vertex));
> +  break;
> +   }
> +
> +   case nir_intrinsic_load_instance_id: {
> +  fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
> +  assert(instance_id.file != BAD_FILE);
> +  dest.type = instance_id.type;
> +  emit(MOV(dest, instance_id));
> +  break;
> +   }
> +
> case nir_intrinsic_load_sample_mask_in: {
>fs_reg sample_mask_in =
> nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
>assert(sample_mask_in.file != BAD_FILE);
> --
> 2.2.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/13] i965: Factor out logic to build a send message instruction with indirect descriptor.

2015-03-11 Thread Francisco Jerez
Matt Turner  writes:

> On Wed, Mar 11, 2015 at 2:29 PM, Francisco Jerez  
> wrote:
>> Matt Turner  writes:
>>> commit 4c4934636cb286e7d7836afc26e9d392e2f0f155
>>> Author: Paul Berry 
>>> Date:   Tue Sep 24 15:18:52 2013 -0700
>>>
>>> i965/blorp: retype destination register for texture SEND instruction to 
>>> UW.
>>>
>>> The resource streamer only exists on HSW+, so the UW dest is certainly
>>> needed for things after Gen5.
>>
>> Odd, I haven't seen a mention of that restriction in the hardware specs
>> (at least not on reasonably recent ones -- the Gen4 and 5 specs do
>> mention it and they actually hang if you send a message with compression
>> enabled and anything bigger than a W as destination type).  Is this a
>> purely empirical finding?  If so, doesn't it deserve a big fat warning
>> comment?  Is this only a problem for some interaction with the resource
>> streamer or has it ever been observed to fix something else?
>
> This page [0] says:
>
> """
> The subregister number, horizontal stride, destination mask and type
> fields of  are always valid and are used in part to generate the
> WrEn. This is true even if  is a null register (this is an
> exception for null as for most cases these fields are ignored by
> hardware). These parameters of  follow the same restriction as
> that of normal destination operand – destination region cannot cross
> the 256-bit register boundary.
> """
>
> Searching for the exact phrase quoted in Paul's commit finds another
> page that says it applies to "DevSNB+,Pre-DevBDW".
>

Hm, searching for the same phrase ("destination region cannot cross the
256-bit register boundary.") gives several matches, some of the pages
are indeed tagged "DevSNB+,Pre-DevBDW", but in all of them that phrase
appears inside a SNB-only block, I don't see any indication of that
restriction applying to Gen7 and up.

> I think this is one of those cases where they technically give you all
> the information, they just don't tell you anything about what you're
> supposed to do with it. Totally bullshit, but par for the course.
>
> I guess the good news in all of this is that we now know we don't need
> to bother with this for BDW+.
>
> [0] 3D-Media-GPGPU Engine > EU Overview > ISA Introduction >
> Instruction Set Reference > EUISA Instructions > Send Message [SNB+]


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] i965/fs: Handle VS inputs in the NIR backend.

2015-03-11 Thread Jason Ekstrand
I think I mentioned this at least once before, but I don't think that this
is a good long-term solution.  The nir_lower_io function does two things
primarily.  1) It assigns each variable a "device location" and 2) changes
the variable loads to loads with an offset.  In the long-term, I think we
want to split 1) out and let the backend control what "device location"
gets assigned to each input/output.  Then, add a knob to 2) to select how
we want things packed.  Doing this will let us avoid this whole re-mapping
mess and, in the case of uniforms, better control things so that we can
push some of them.  That said, this is *not* a NAK of the current patch.
If you want to hold off on that for a while, this looks ok.
--Jason

On Mon, Mar 9, 2015 at 1:58 AM, Kenneth Graunke 
wrote:

> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 23 ++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 3baafc4..1734d03 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -199,11 +199,32 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
> struct hash_entry *entry;
> hash_table_foreach(shader->inputs, entry) {
>nir_variable *var = (nir_variable *) entry->data;
> +  enum brw_reg_type type = brw_type_for_base_type(var->type);
>fs_reg input = offset(nir_inputs, var->data.driver_location);
>
>fs_reg reg;
>switch (stage) {
> -  case MESA_SHADER_VERTEX:
> +  case MESA_SHADER_VERTEX: {
> + /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
> +  * stored in nir_variable::location.
> +  *
> +  * However, NIR's load_input intrinsics use a different index -
> an
> +  * offset into a single contiguous array containing all inputs.
> +  * This index corresponds to the nir_variable::driver_location
> field.
> +  *
> +  * So, we need to copy from fs_reg(ATTR, var->location) to
> +  * offset(nir_inputs, var->data.driver_location).
> +  */
> + unsigned components = var->type->without_array()->components();
> + unsigned array_length = var->type->is_array() ?
> var->type->length : 1;
> + for (unsigned i = 0; i < array_length; i++) {
> +for (unsigned j = 0; j < components; j++) {
> +   emit(MOV(retype(offset(input, components * i + j), type),
> +offset(fs_reg(ATTR, var->data.location + i,
> type), j)));
> +}
> + }
> + break;
> +  }
>case MESA_SHADER_GEOMETRY:
>case MESA_SHADER_COMPUTE:
>   unreachable("fs_visitor not used for these stages yet.");
> --
> 2.2.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GL/GLSL tests for GL 4.0 and newer

2015-03-11 Thread Ilia Mirkin
On Wed, Mar 11, 2015 at 6:17 PM, Dave Airlie  wrote:
> On 12 March 2015 at 08:07, Ian Romanick  wrote:
>> On 03/09/2015 12:18 AM, Ishara Abeysekera wrote:
>>> /I am interested on write tests for OpenGL 4.0 /GLSL 4.00 .
>>> /
>>> /But can you be more specify what areas you are expecting to be tested,
>>
>> I haven't examined the API side very closely, but I know of a few things
>> in the shading language that need tests.  For most of the things, there
>> is absolutely no support in Mesa yet.
>>
>> We have no tests for GL_ARB_shader_subroutine.
>>
>> It would also be good to have tests to verify that features from other
>> extensions (e.g., GL_ARB_gpu_shader5 and GL_ARB_gpu_shader_fp64) are
>> enabled in #version 400 shaders.  The should be simple "touch" tests
>> since we already have good tests for the extensions.  For example, you
>> might add a glslparsertest test that tries every possible overload of
>> uaddCarry.
>>
>> I don't think we have any tests for GL_ARB_vertex_attrib_64bit.  It's a
>> GL 4.1 feature, but it will probably get done before the other missing
>> 4.0 features.
>
> there are some initial ones I posted and in my piglit tree somewhere,
> but there is definitely a need for more and validated against the
> binary drivers.
>
> also thing like internal_format_query2 need a truck of tests,
> and probably enhanced_layouts.

Other fun extensions that will need a ton of tests:

ARB_shader_storage_buffer_object
ARB_compute_shader (which is somewhat in progress AFAIK, but very
limited test coverage atm)

And smaller extensions... would probably have to do a bunch of them up
to make a single GSoC:

ARB_get_texture_sub_image
ARB_query_buffer_object
ARB_texture_stencil8
ARB_framebuffer_no_attachments

You can look at
http://cgit.freedesktop.org/mesa/mesa/tree/docs/GL3.txt to see which
core extensions mesa supports. It's most common that there are tests
for supported extensions and no tests for unsupported extensions (but
check the piglit repo before assuming that... e.g. there are a lot of
ARB_shader_image_load_store tests even though it's not presently
supported by any driver).

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] loader: include for non-sysfs builds

2015-03-11 Thread Ian Romanick
On 03/11/2015 02:44 PM, Emil Velikov wrote:
> On 11 March 2015 at 20:50, Ian Romanick  wrote:
>> On 03/11/2015 12:12 PM, Emil Velikov wrote:
>>> Required by fstat(), otherwise we'll error out due to implicit function
>>> declaration.
>>
>> Alternate suggestion... include unistd.h unconditionally.  I see that's
>> already in the #ifdef HAVE_LIBUDEV path.  Does that work too?
>>
> Perhaps the commit message is a bit confusing, but your suggestion
> does not parse here. How does the following sound ?
> 
> When building with libudev (non-sysfs) we don't include sys/stat.h
> thus we error out due to the implicit declaration of the function
> fstat().

What I meant was:

diff --git a/src/loader/loader.c b/src/loader/loader.c
index 9ff5115..4709b02 100644
--- a/src/loader/loader.c
+++ b/src/loader/loader.c
@@ -67,11 +67,11 @@
 #include 
 #include 
 #include 
+#include 
 #ifdef HAVE_LIBUDEV
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #ifdef USE_DRICONF
@@ -80,7 +80,6 @@
 #endif
 #endif
 #ifdef HAVE_SYSFS
-#include 
 #include 
 #endif
 #include "loader.h"

The fstat man page led me to believe that fstat also comes from
unistd.h, but
http://pubs.opengroup.org/onlinepubs/009695399/functions/fstat.html
leads me to believe that is incorrect.  That link also says that 'struct
stat' is defined in sys/stat.h, so the implicit function declaration
should have been the least of the problems.

In any case, just ignore all my noise.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GL/GLSL tests for GL 4.0 and newer

2015-03-11 Thread Dave Airlie
On 12 March 2015 at 08:07, Ian Romanick  wrote:
> On 03/09/2015 12:18 AM, Ishara Abeysekera wrote:
>> /I am interested on write tests for OpenGL 4.0 /GLSL 4.00 .
>> /
>> /But can you be more specify what areas you are expecting to be tested,
>
> I haven't examined the API side very closely, but I know of a few things
> in the shading language that need tests.  For most of the things, there
> is absolutely no support in Mesa yet.
>
> We have no tests for GL_ARB_shader_subroutine.
>
> It would also be good to have tests to verify that features from other
> extensions (e.g., GL_ARB_gpu_shader5 and GL_ARB_gpu_shader_fp64) are
> enabled in #version 400 shaders.  The should be simple "touch" tests
> since we already have good tests for the extensions.  For example, you
> might add a glslparsertest test that tries every possible overload of
> uaddCarry.
>
> I don't think we have any tests for GL_ARB_vertex_attrib_64bit.  It's a
> GL 4.1 feature, but it will probably get done before the other missing
> 4.0 features.

there are some initial ones I posted and in my piglit tree somewhere,
but there is definitely a need for more and validated against the
binary drivers.

also thing like internal_format_query2 need a truck of tests,
and probably enhanced_layouts.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Ilia Mirkin
On Wed, Mar 11, 2015 at 6:09 PM, Matt Turner  wrote:
> On Wed, Mar 11, 2015 at 3:04 PM, Ilia Mirkin  wrote:
>> While I'm not opposed to this style of implementation, do note that
>> 99.738% of users will end up with distro-compiled packages
>> targeting generic x86_64 and thus won't use the SSE variant. If it's
>> really better, might make sense to do it "for real" (i.e. runtime
>> selection), otherwise just let it go and use rint/rintf everywhere so
>> that developers (who build their own libraries, probably with the
>> "right" -march) will get the same behaviour as users.
>
> glibc uses roundsd/roundss in rint/f guarded by a runtime check. The
> SSE code here just allows those instructions to be inlined.

OK, as long as glibc would end up using the same rounding mode settings,

Reviewed-by: Ilia Mirkin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 3:04 PM, Ilia Mirkin  wrote:
> While I'm not opposed to this style of implementation, do note that
> 99.738% of users will end up with distro-compiled packages
> targeting generic x86_64 and thus won't use the SSE variant. If it's
> really better, might make sense to do it "for real" (i.e. runtime
> selection), otherwise just let it go and use rint/rintf everywhere so
> that developers (who build their own libraries, probably with the
> "right" -march) will get the same behaviour as users.

glibc uses roundsd/roundss in rint/f guarded by a runtime check. The
SSE code here just allows those instructions to be inlined.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] nir: Optimize a + neg(a)

2015-03-11 Thread Ian Romanick
On 03/10/2015 05:05 PM, Jason Ekstrand wrote:
> 
> 
> On Tue, Mar 10, 2015 at 4:50 PM, Matt Turner  > wrote:
> 
> On Tue, Mar 10, 2015 at 4:20 PM, Ian Romanick  > wrote:
> > On 02/28/2015 12:19 PM, Matt Turner wrote:
> >> On Sat, Feb 28, 2015 at 11:47 AM, Thomas Helland
> >> mailto:thomashellan...@gmail.com>> wrote:
> >>> On Feb 28, 2015 8:39 PM, "Jason Ekstrand"  > wrote:
> 
>  Both patches are
> 
>  Reviewed-by: Jason Ekstrand  >
> >>>
> >>> Could you commit them?
> >>> I don't have commit access.
> >>
> >> I'd like to wait a few days to see if there are any comments about the
> >> floating-point optimizations. If not, I'll commit them. (The integer
> >> parts are surely fine)
> >
> > Hmm... does the GLSL spec say anything about integer overflow?  For
> > large values, (a*b)+(a*c) can produce dramatically different results
> > than a*(b+c).
> 
> It says (in 4.1.3 Integers)
> 
> """
> Addition, subtraction, and shift operations resulting in overflow or
> underflow will not cause any exception, nor will they saturate, rather
> they will “wrap” to yield the low-order 32 bits of the result.
> Division and multiplication operations resulting in overflow or
> underflow will not cause any exception but will result in an undefined
> value.
> """
> 
> More thinking required?
> 
> 
> Actually Assuming that everything's 32-bit, it's 100% ok.  A fun and
> somewhat surprising fact about modular arithmetic is that it is that
> multiplication is still distributive.  Yes, it can wrap around, but it
> will always wrap around "the same way".

Now that I think about it some more, that does seem obviously true.

> --Jason

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GL/GLSL tests for GL 4.0 and newer

2015-03-11 Thread Ian Romanick
On 03/09/2015 12:18 AM, Ishara Abeysekera wrote:
> /I am interested on write tests for OpenGL 4.0 /GLSL 4.00 .
> /
> /But can you be more specify what areas you are expecting to be tested, 

I haven't examined the API side very closely, but I know of a few things
in the shading language that need tests.  For most of the things, there
is absolutely no support in Mesa yet.

We have no tests for GL_ARB_shader_subroutine.

It would also be good to have tests to verify that features from other
extensions (e.g., GL_ARB_gpu_shader5 and GL_ARB_gpu_shader_fp64) are
enabled in #version 400 shaders.  The should be simple "touch" tests
since we already have good tests for the extensions.  For example, you
might add a glslparsertest test that tries every possible overload of
uaddCarry.

I don't think we have any tests for GL_ARB_vertex_attrib_64bit.  It's a
GL 4.1 feature, but it will probably get done before the other missing
4.0 features.

> Thank you!/
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Ilia Mirkin
On Wed, Mar 11, 2015 at 5:52 PM, Matt Turner  wrote:
> +static inline float
> +roundevenf(float x)
> +{
> +   float ret;
> +#ifdef __SSE4_1__
> +   __m128 m = _mm_load_ss(&x);
> +   m = _mm_round_ss(m, m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
> +   _mm_store_ss(&ret, m);
> +#else
> +   /* Assume that the floating-point rounding mode has not been changed from
> +* the default (Round to nearest).
> +*/
> +   ret = rintf(x);
> +#endif
> +   return ret;
> +}
> +
> +static inline double
> +roundeven(double x)
> +{
> +   double ret;
> +#ifdef __SSE4_1__
> +   __m128d m = _mm_load_sd(&x);
> +   m = _mm_round_sd(m, m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
> +   _mm_store_sd(&ret, m);
> +#else
> +   /* Assume that the floating-point rounding mode has not been changed from
> +* the default (Round to nearest).
> +*/
> +   ret = rint(x);
> +#endif
> +   return ret;
> +}

While I'm not opposed to this style of implementation, do note that
99.738% of users will end up with distro-compiled packages
targeting generic x86_64 and thus won't use the SSE variant. If it's
really better, might make sense to do it "for real" (i.e. runtime
selection), otherwise just let it go and use rint/rintf everywhere so
that developers (who build their own libraries, probably with the
"right" -march) will get the same behaviour as users.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/13] i965: Factor out logic to build a send message instruction with indirect descriptor.

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 2:29 PM, Francisco Jerez  wrote:
> Matt Turner  writes:
>> commit 4c4934636cb286e7d7836afc26e9d392e2f0f155
>> Author: Paul Berry 
>> Date:   Tue Sep 24 15:18:52 2013 -0700
>>
>> i965/blorp: retype destination register for texture SEND instruction to 
>> UW.
>>
>> The resource streamer only exists on HSW+, so the UW dest is certainly
>> needed for things after Gen5.
>
> Odd, I haven't seen a mention of that restriction in the hardware specs
> (at least not on reasonably recent ones -- the Gen4 and 5 specs do
> mention it and they actually hang if you send a message with compression
> enabled and anything bigger than a W as destination type).  Is this a
> purely empirical finding?  If so, doesn't it deserve a big fat warning
> comment?  Is this only a problem for some interaction with the resource
> streamer or has it ever been observed to fix something else?

This page [0] says:

"""
The subregister number, horizontal stride, destination mask and type
fields of  are always valid and are used in part to generate the
WrEn. This is true even if  is a null register (this is an
exception for null as for most cases these fields are ignored by
hardware). These parameters of  follow the same restriction as
that of normal destination operand – destination region cannot cross
the 256-bit register boundary.
"""

Searching for the exact phrase quoted in Paul's commit finds another
page that says it applies to "DevSNB+,Pre-DevBDW".

I think this is one of those cases where they technically give you all
the information, they just don't tell you anything about what you're
supposed to do with it. Totally bullshit, but par for the course.

I guess the good news in all of this is that we now know we don't need
to bother with this for BDW+.

[0] 3D-Media-GPGPU Engine > EU Overview > ISA Introduction >
Instruction Set Reference > EUISA Instructions > Send Message [SNB+]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH] mesa: Replace _mesa_round_to_even() with roundeven().

2015-03-11 Thread Matt Turner
Eric's initial patch adding constant expression evaluation for
ir_unop_round_even used nearbyint. The open-coded _mesa_round_to_even
implementation came about without much explanation after a reviewer
asked whether nearbyint depended on the application not modifying the
rounding mode. Of course (as Eric commented) we rely on the application
not changing the rounding mode from its default (round-to-nearest) in
many other places, including the IROUND function used by
_mesa_round_to_even!

Worse, IROUND() is implemented using the trunc(x + 0.5) trick which
fails for x = nextafterf(0.5, 0.0).

Still worse, _mesa_round_to_even unexpectedly returns an int. I suspect
that could cause problems when rounding large integral values not
representable as an int in ir_constant_expression.cpp's ir_unop_round_even
evaluation. Its use of _mesa_round_to_even is clearly broken for doubles
(as noted during review).

The constant expression evaluation code for the packing built-in
functions also mistakenly assumed that _mesa_round_to_even returned a
float, as can be seen by the cast through a signed integer type to an
unsigned (since negative float -> unsigned conversions are undefined).

rint() and nearbyint() implement the round-half-to-even behavior we want
when the rounding mode is set to the default round-to-nearest. The only
difference between them is that nearbyint() raises the inexact
exception.

This patch implements roundeven{f,}, a function added by a yet
unimplemented technical specification (ISO/IEC TS 18661-1:2014), with a
small difference in behavior -- we don't bother raising the inexact
exception, which I don't think we care about anyway.

At least recent Intel CPUs can quickly change a subset of the bits in
the x87 floating-point control register, but the exception mask bits are
not included. rint() does not need to change these bits, but nearbyint()
does (twice: save old, set new, and restore) in order to raise the inexact
exception, which would incur some penalty.

The SSE 4.1 ROUND instructions let us implement roundeven directly.
Otherwise we assume that the rounding mode has not been modified (as we
do in the rest of Mesa) and use rint().
---
v1 of Eric's previously mentioned patch is
 http://lists.freedesktop.org/archives/mesa-dev/2011-October/012878.html

Comments on v1 start with
 http://lists.freedesktop.org/archives/mesa-dev/2011-October/012900.html

v2 is
 http://lists.freedesktop.org/archives/mesa-dev/2011-October/013400.html

As far as I can tell, v2 came about because Eric didn't know that the
default rounding mode was what we wanted for the libc functions to work,
and that we didn't need to change it.

If we do indeed want the don't-raise-the-inexact-exception behavior,
maybe we should just keep the _mesa_round_to_even name?

 src/glsl/ir_constant_expression.cpp  | 18 
 src/glsl/nir/nir_constant_expressions.py | 15 ---
 src/glsl/nir/nir_opcodes.py  |  2 +-
 src/mesa/main/imports.c  | 25 ++-
 src/mesa/main/imports.h  |  3 --
 src/util/Makefile.sources|  1 +
 src/util/rounding.h  | 73 
 7 files changed, 97 insertions(+), 40 deletions(-)
 create mode 100644 src/util/rounding.h

diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index 388c4c2..b5938bf 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -35,6 +35,7 @@
 
 #include 
 #include "main/core.h" /* for MAX2, MIN2, CLAMP */
+#include "util/rounding.h" /* for roundeven */
 #include "ir.h"
 #include "glsl_types.h"
 #include "program/hash_table.h"
@@ -245,8 +246,8 @@ pack_snorm_1x8(float x)
  * We must first cast the float to an int, because casting a negative
  * float to a uint is undefined.
  */
-   return (uint8_t) (int8_t)
-  _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f);
+   return (uint8_t) (int)
+  roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
 }
 
 /**
@@ -267,8 +268,8 @@ pack_snorm_1x16(float x)
  * We must first cast the float to an int, because casting a negative
  * float to a uint is undefined.
  */
-   return (uint16_t) (int16_t)
-  _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+   return (uint16_t) (int)
+  roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 }
 
 /**
@@ -322,7 +323,7 @@ pack_unorm_1x8(float x)
  *
  *   packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
  */
-   return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f);
+   return (uint8_t) (int) roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
 }
 
 /**
@@ -340,7 +341,8 @@ pack_unorm_1x16(float x)
  *
  *   packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
  */
-   return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+   return (uint16_t) (int)
+  roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
 }
 
 /**
@@ -733,9 +735,9 @@ ir_expre

Re: [Mesa-dev] [PATCH] loader: include for non-sysfs builds

2015-03-11 Thread Emil Velikov
On 11 March 2015 at 20:50, Ian Romanick  wrote:
> On 03/11/2015 12:12 PM, Emil Velikov wrote:
>> Required by fstat(), otherwise we'll error out due to implicit function
>> declaration.
>
> Alternate suggestion... include unistd.h unconditionally.  I see that's
> already in the #ifdef HAVE_LIBUDEV path.  Does that work too?
>
Perhaps the commit message is a bit confusing, but your suggestion
does not parse here. How does the following sound ?

When building with libudev (non-sysfs) we don't include sys/stat.h
thus we error out due to the implicit declaration of the function
fstat().

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/13] i965: Factor out logic to build a send message instruction with indirect descriptor.

2015-03-11 Thread Francisco Jerez
Matt Turner  writes:

> On Wed, Mar 11, 2015 at 1:07 PM, Kenneth Graunke  
> wrote:
>> On Wednesday, March 11, 2015 07:25:14 PM Francisco Jerez wrote:
>>> "Pohjolainen, Topi"  writes:
>>> > On Fri, Feb 27, 2015 at 05:34:44PM +0200, Francisco Jerez wrote:
>>> >> @@ -1218,17 +1198,6 @@ 
>>> >> fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
>>> >>false /* header */,
>>> >>simd_mode,
>>> >>0);
>>> >> -  brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
>>> >> -  brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
>>> >> -  brw_set_src0(p, insn_or, addr);
>>> >> -  brw_set_dest(p, insn_or, addr);
>>> >> -
>>> >> -
>>> >> -  /* dst = send(offset, a0.0) */
>>> >> -  brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
>>> >> -  brw_set_dest(p, insn_send, retype(dst, BRW_REGISTER_TYPE_UW));
>>> >
>>> > I'm just reading this through again and noticed that the destination type
>>> > changes here to BRW_REGISTER_TYPE_UD (set in brw_send_indirect_message()).
>>>
>>> Ah, yes, that change is intentional.  The type being set to UW was a
>>> remnant from Gen4-5 times -- Those used to require the destination type
>>> of SEND to be W/UW when doing 16-wide (even if the message was actually
>>> writing dwords back...).  None of the codepaths modified in this patch
>>> (or in the rest of the series) should be executed on Gen4 or 5.
>>>
>>> Anyway good catch. :)
>>
>> I'm pretty sure this does still apply on Gen7 - notably, BLORP uses UW
>> destinations for SIMD16 sends, and I believe Paul Berry tracked actual
>> issues down to that.  BLORP only exists on Gen6+.
>>
>> I also seem to recall Chris Forbes hitting an issue relating to that
>> which he and Paul fixed during XDC 2013.
>>
>> CC'ing Chris and Matt in case they remember any details.
>
> I believe he was helping Abdiel track down a hang when using the
> resource streamer, and it turned out that using UW typed-destinations
> on sends in blorp fixed it:
>
> commit 4c4934636cb286e7d7836afc26e9d392e2f0f155
> Author: Paul Berry 
> Date:   Tue Sep 24 15:18:52 2013 -0700
>
> i965/blorp: retype destination register for texture SEND instruction to 
> UW.
>
> The resource streamer only exists on HSW+, so the UW dest is certainly
> needed for things after Gen5.

Odd, I haven't seen a mention of that restriction in the hardware specs
(at least not on reasonably recent ones -- the Gen4 and 5 specs do
mention it and they actually hang if you send a message with compression
enabled and anything bigger than a W as destination type).  Is this a
purely empirical finding?  If so, doesn't it deserve a big fat warning
comment?  Is this only a problem for some interaction with the resource
streamer or has it ever been observed to fix something else?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH ] i965/aa: fixing anti-aliasing bug for thinnest width lines.

2015-03-11 Thread Ilia Mirkin
On Wed, Mar 11, 2015 at 5:57 PM,   wrote:
> From: Marius Predut 

Set your email from name correctly in git and then you won't have this
line in your git send-email results.

>
> On SNB and IVB hw, for 1 pixel line thickness or less,
> the general anti-aliasing algorithm give up - garbage line is generated.
> Setting a Line Width of 0.0 specifies the rasterization
> of the “thinnest” (one-pixel-wide), non-antialiased lines.
> Lines rendered with zero Line Width are rasterized using
> Grid Intersection Quantization rules as specified by
> bspec section 6.3.12.1 Zero-Width (Cosmetic) Line Rasterization.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82668

This seems like the wrong bug reference...

> Signed-off-by: Marius Predut 
> ---
>  src/mesa/drivers/dri/i965/gen6_sf_state.c |   12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
> b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index f9d8d27..1bed444 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -367,9 +367,15 @@ upload_sf_state(struct brw_context *brw)
>float line_width =
>   roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth));
>uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
> -  /* TODO: line width of 0 is not allowed when MSAA enabled */
> -  if (line_width_u3_7 == 0)
> - line_width_u3_7 = 1;
> +
> +  if (!(multisampled_fbo && ctx->Multisample.Enabled)) {
> +if (ctx->Line.SmoothFlag && ctx->Line.Width <=1)
> +  line_width_u3_7 = 0;
> +  } else {
> +if (line_width_u3_7 == 0)
> +line_width_u3_7 = 1;
> +  }
> +
>dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
> }
> if (ctx->Line.SmoothFlag) {
> --
> 1.7.9.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH ] i965/aa: fixing anti-aliasing bug for thinnest width lines.

2015-03-11 Thread marius . predut
From: Marius Predut 

On SNB and IVB hw, for 1 pixel line thickness or less,
the general anti-aliasing algorithm give up - garbage line is generated.
Setting a Line Width of 0.0 specifies the rasterization
of the “thinnest” (one-pixel-wide), non-antialiased lines.
Lines rendered with zero Line Width are rasterized using
Grid Intersection Quantization rules as specified by
bspec section 6.3.12.1 Zero-Width (Cosmetic) Line Rasterization.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82668
Signed-off-by: Marius Predut 
---
 src/mesa/drivers/dri/i965/gen6_sf_state.c |   12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index f9d8d27..1bed444 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -367,9 +367,15 @@ upload_sf_state(struct brw_context *brw)
   float line_width =
  roundf(CLAMP(ctx->Line.Width, 0.0, ctx->Const.MaxLineWidth));
   uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
-  /* TODO: line width of 0 is not allowed when MSAA enabled */
-  if (line_width_u3_7 == 0)
- line_width_u3_7 = 1;
+
+  if (!(multisampled_fbo && ctx->Multisample.Enabled)) {
+if (ctx->Line.SmoothFlag && ctx->Line.Width <=1)
+  line_width_u3_7 = 0;
+  } else {
+if (line_width_u3_7 == 0)
+line_width_u3_7 = 1;
+  }
+
   dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
}
if (ctx->Line.SmoothFlag) {
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4] mesa: improve ARB_copy_image internal format compat check

2015-03-11 Thread Jason Ekstrand
Sean,
Sorry it's taken so long for me to get to this, but I went to test/push
this today and it doesn't apply against current mesa master.  Can you
please either rebase on master and re-send or give me the SHA1 hash of the
commit this one is based on.  (Not the SHA1 of this commit but the previous
one).
--Jason

On Sat, Mar 7, 2015 at 8:34 PM, Sean Burke  wrote:

> The memory layout of compatible internal formats may differ in bytes per
> block, so TexFormat is not a reliable measure of compatibility. For
> example,
> GL_RGB8 and GL_RGB8UI are compatible formats, but GL_RGB8 may be laid out
> in
> memory as B8G8R8X8. If GL_RGB8UI has a 3 byte-per-block memory layout, the
> existing compatibility check will fail.
>
> Additionally, the current check allows any two compressed textures which
> share
> block size to be used, whereas the spec gives an explicit table of
> compatible
> formats.
>
> v2: Use a switch instead of array iteration for block class and show the
> correct GL error when internal formats are mismatched.
> v3: Include spec citations for new compatibility checks, rearrange check
> order to ensure that compressed, view-compatible formats return the
> correct result, and make style fixes. Original commit message amended
> for clarity.
> v4: Reformatted spec citations.
>
> Reviewed-by: Jason Ekstrand 
> ---
>  src/mesa/main/copyimage.c | 151
> +++---
>  1 file changed, 130 insertions(+), 21 deletions(-)
>
> diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c
> index 455929d..fd22f28 100644
> --- a/src/mesa/main/copyimage.c
> +++ b/src/mesa/main/copyimage.c
> @@ -33,6 +33,12 @@
>  #include "texobj.h"
>  #include "fbobject.h"
>  #include "textureview.h"
> +#include "glformats.h"
> +
> +enum mesa_block_class {
> +   BLOCK_CLASS_128_BITS,
> +   BLOCK_CLASS_64_BITS
> +};
>
>  static bool
>  prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int
> level,
> @@ -253,6 +259,124 @@ check_region_bounds(struct gl_context *ctx,
> struct gl_texture_image *tex_image,
> return true;
>  }
>
> +static bool
> +compressed_format_compatible(struct gl_context *ctx,
> + GLenum compressedFormat, GLenum otherFormat)
> +{
> +   enum mesa_block_class compressedClass, otherClass;
> +
> +   /* Two view-incompatible compressed formats are never compatible. */
> +   if (_mesa_is_compressed_format(ctx, otherFormat)) {
> +  return false;
> +   }
> +
> +   /*
> +* From ARB_copy_image spec:
> +*Table 4.X.1 (Compatible internal formats for copying between
> +* compressed and uncompressed internal formats)
> +*
> -
> +*| Texel / | Uncompressed  |
>|
> +*| Block   | internal format   | Compressed internal format
> |
> +*| size|   |
>|
> +*
> -
> +*| 128-bit | RGBA32UI, | COMPRESSED_RGBA_S3TC_DXT3_EXT,
> |
> +*| | RGBA32I,  |
> COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,|
> +*| | RGBA32F   | COMPRESSED_RGBA_S3TC_DXT5_EXT,
> |
> +*| |   |
> COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,|
> +*| |   | COMPRESSED_RG_RGTC2,
> |
> +*| |   | COMPRESSED_SIGNED_RG_RGTC2,
>|
> +*| |   | COMPRESSED_RGBA_BPTC_UNORM,
>|
> +*| |   |
> COMPRESSED_SRGB_ALPHA_BPTC_UNORM,   |
> +*| |   |
> COMPRESSED_RGB_BPTC_SIGNED_FLOAT,   |
> +*| |   |
> COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT  |
> +*
> -
> +*| 64-bit  | RGBA16F, RG32F,   | COMPRESSED_RGB_S3TC_DXT1_EXT,
>|
> +*| | RGBA16UI, RG32UI, | COMPRESSED_SRGB_S3TC_DXT1_EXT,
> |
> +*| | RGBA16I, RG32I,   | COMPRESSED_RGBA_S3TC_DXT1_EXT,
> |
> +*| | RGBA16,   |
> COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,|
> +*| | RGBA16_SNORM  | COMPRESSED_RED_RGTC1,
>|
> +*| |   | COMPRESSED_SIGNED_RED_RGTC1
>|
> +*
> -
> +*/
> +
> +   switch (compressedFormat) {
> +  case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
> +  case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
> +  case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
> +  case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
> +  case GL_COMPRESSED_RG_RGTC2:
> +  case GL_COMPRESSED_SIGNED_RG_RGTC2:
> +  case GL_COMPRESSED_RGBA_BPTC_UNORM:
> +  case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
> +  case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
> +  case GL_COMPRESSED_RGB

Re: [Mesa-dev] [PATCH] loader: include for non-sysfs builds

2015-03-11 Thread Ian Romanick
On 03/11/2015 12:12 PM, Emil Velikov wrote:
> Required by fstat(), otherwise we'll error out due to implicit function
> declaration.

Alternate suggestion... include unistd.h unconditionally.  I see that's
already in the #ifdef HAVE_LIBUDEV path.  Does that work too?

> Cc: "10.4 10.5" 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89530
> Signed-off-by: Emil Velikov 
> ---
>  src/loader/loader.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/loader/loader.c b/src/loader/loader.c
> index 94c993a..638ebf2 100644
> --- a/src/loader/loader.c
> +++ b/src/loader/loader.c
> @@ -64,6 +64,7 @@
>   *Rob Clark 
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -80,7 +81,6 @@
>  #endif
>  #endif
>  #ifdef HAVE_SYSFS
> -#include 
>  #include 
>  #endif
>  #include "loader.h"
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] i965/fs: Handle CMP.nz ... 0 and AND.nz ... 1 similarly in cmod propagation

2015-03-11 Thread Ian Romanick
From: Ian Romanick 

Espically on platforms that do not natively generate 0u and ~0u for
Boolean results, we generate a lot of sequences where a CMP is
followed by an AND with 1.  emit_bool_to_cond_code does this, for
example.  On ILK, this results in a sequence like:

add(8)  g3<1>F  g8<8,8,1>F  -g4<0,1,0>F
cmp.l.f0(8) g3<1>D  g3<8,8,1>F  0F
and.nz.f0(8)nullg3<8,8,1>D  1D
(+f0) iff(8)Jump: 6

The AND.nz is obviously redundant.  By propagating the cmod, we can
instead generate

add.l.f0(8) nullg8<8,8,1>F  -g4<0,1,0>F
(+f0) iff(8)Jump: 6

Existing code already handles the propagation from the CMP to the ADD.

Shader-db results:

GM45 (0x2A42):
total instructions in shared programs: 3550829 -> 3550788 (-0.00%)
instructions in affected programs: 10028 -> 9987 (-0.41%)
helped:24

Iron Lake (0x0046):
total instructions in shared programs: 4993146 -> 4993105 (-0.00%)
instructions in affected programs: 9675 -> 9634 (-0.42%)
helped:24

Ivy Bridge (0x0166):
total instructions in shared programs: 6291870 -> 6291794 (-0.00%)
instructions in affected programs: 17914 -> 17838 (-0.42%)
helped:48

Haswell (0x0426):
total instructions in shared programs: 5779256 -> 5779180 (-0.00%)
instructions in affected programs: 16694 -> 16618 (-0.46%)
helped:48

Broadwell (0x162E):
total instructions in shared programs: 6823088 -> 6823014 (-0.00%)
instructions in affected programs: 15824 -> 15750 (-0.47%)
helped:46

No chage on Sandy Bridge or on any platform when NIR is used.

Signed-off-by: Ian Romanick 
Cc: Matt Turner 
---
 .../drivers/dri/i965/brw_fs_cmod_propagation.cpp   | 32 +-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index d0ca2f9..6df3d45 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -57,7 +57,8 @@ opt_cmod_propagation_local(bblock_t *block)
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
   ip--;
 
-  if ((inst->opcode != BRW_OPCODE_CMP &&
+  if ((inst->opcode != BRW_OPCODE_AND &&
+   inst->opcode != BRW_OPCODE_CMP &&
inst->opcode != BRW_OPCODE_MOV) ||
   inst->predicate != BRW_PREDICATE_NONE ||
   !inst->dst.is_null() ||
@@ -65,6 +66,19 @@ opt_cmod_propagation_local(bblock_t *block)
   inst->src[0].abs)
  continue;
 
+  /* Only an AND.NZ can be propagated.  Many AND.Z instructions are
+   * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code).
+   * Propagating those would require inverting the condition on the CMP.
+   * This changes both the flag value and the register destination of the
+   * CMP.  That result may be used elsewhere, so we can't change its value
+   * on a whim.
+   */
+  if (inst->opcode == BRW_OPCODE_AND &&
+  !(inst->src[1].is_one() &&
+inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+!inst->src[0].negate))
+ continue;
+
   if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
  continue;
 
@@ -80,6 +94,22 @@ opt_cmod_propagation_local(bblock_t *block)
 scan_inst->dst.reg_offset != inst->src[0].reg_offset)
break;
 
+/* This must be done before the dst.type check because the result
+ * type of the AND will always be D, but the result of the CMP
+ * could be anything.  The assumption is that the AND is just
+ * figuring out what the result of the previous comparison was
+ * instead of doing a new comparison with a different type.
+ */
+if (inst->opcode == BRW_OPCODE_AND) {
+   if (scan_inst->opcode == BRW_OPCODE_CMP &&
+   scan_inst->writes_flag()) {
+  inst->remove(block);
+  progress = true;
+   }
+
+   break;
+}
+
 /* Comparisons operate differently for ints and floats */
 if (scan_inst->dst.type != inst->dst.type)
break;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965/fs: Emit better b2f of an expression on GEN4 and GEN5

2015-03-11 Thread Ian Romanick
From: Ian Romanick 

On platforms that do not natively generate 0u and ~0u for Boolean
results, b2f expressions that look like

   f = b2f(expr cmp 0)

will generate better code by pretending the expression is

f = ir_triop_sel(0.0, 1.0, expr cmp 0)

This is because the last instruction of "expr" can generate the
condition code for the "cmp 0".  This avoids having to do the "-(b & 1)"
trick to generate 0u or ~0u for the Boolean result.  This means code like

mov(16) g16<1>F 1F
mul.ge.f0(16)   nullg6<8,8,1>F  g14<8,8,1>F
(+f0) sel(16)   m6<1>F  g16<8,8,1>F 0F

will be generated instead of

mul(16) g2<1>F  g12<8,8,1>F g4<8,8,1>F
cmp.ge.f0(16)   g2<1>D  g4<8,8,1>F  0F
and(16) g4<1>D  g2<8,8,1>D  1D
and(16) m6<1>D  -g4<8,8,1>D 0x3f80UD

v2: When the comparison is either == 0.0 or != 0.0 use the knowledge
that the true (or false) case already results in zero would allow better
code generation by possibly avoiding a load-immediate instruction.

v3: Apply the optimization even when neither comparitor is zero.

Shader-db results:

GM45 (0x2A42):
total instructions in shared programs: 3551002 -> 3550829 (-0.00%)
instructions in affected programs: 33269 -> 33096 (-0.52%)
helped:121

Iron Lake (0x0046):
total instructions in shared programs: 4993327 -> 4993146 (-0.00%)
instructions in affected programs: 34199 -> 34018 (-0.53%)
helped:129

No change on other platforms.

Signed-off-by: Ian Romanick 
Cc: Tapani Palli 
---
 src/mesa/drivers/dri/i965/brw_fs.h   |   2 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 101 +--
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index d9d5858..075e90c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -307,6 +307,7 @@ public:
  const fs_reg &a);
void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
 const fs_reg &src0, const fs_reg &src1);
+   bool try_emit_b2f_of_comparison(ir_expression *ir);
bool try_emit_saturate(ir_expression *ir);
bool try_emit_line(ir_expression *ir);
bool try_emit_mad(ir_expression *ir);
@@ -317,6 +318,7 @@ public:
bool opt_saturate_propagation();
bool opt_cmod_propagation();
void emit_bool_to_cond_code(ir_rvalue *condition);
+   void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]);
void emit_if_gen6(ir_if *ir);
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
  uint32_t spill_offset, int count);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3025a9d..3d79796 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -475,6 +475,87 @@ fs_visitor::try_emit_mad(ir_expression *ir)
return true;
 }
 
+bool
+fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir)
+{
+   /* On platforms that do not natively generate 0u and ~0u for Boolean
+* results, b2f expressions that look like
+*
+* f = b2f(expr cmp 0)
+*
+* will generate better code by pretending the expression is
+*
+* f = ir_triop_csel(0.0, 1.0, expr cmp 0)
+*
+* This is because the last instruction of "expr" can generate the
+* condition code for the "cmp 0".  This avoids having to do the "-(b & 1)"
+* trick to generate 0u or ~0u for the Boolean result.  This means code like
+*
+* mov(16) g16<1>F 1F
+* mul.ge.f0(16)   nullg6<8,8,1>F  g14<8,8,1>F
+* (+f0) sel(16)   m6<1>F  g16<8,8,1>F 0F
+*
+* will be generated instead of
+*
+* mul(16) g2<1>F  g12<8,8,1>F g4<8,8,1>F
+* cmp.ge.f0(16)   g2<1>D  g4<8,8,1>F  0F
+* and(16) g4<1>D  g2<8,8,1>D  1D
+* and(16) m6<1>D  -g4<8,8,1>D 0x3f80UD
+*
+* When the comparison is either == 0.0 or != 0.0 using the knowledge that
+* the true (or false) case already results in zero would allow better code
+* generation by possibly avoiding a load-immediate instruction.
+*/
+   ir_expression *cmp = ir->operands[0]->as_expression();
+   if (cmp == NULL)
+  return false;
+
+   if (cmp->operation == ir_binop_equal || cmp->operation == ir_binop_nequal) {
+  for (unsigned i = 0; i < 2; i++) {
+ ir_constant *c = cmp->operands[i]->as_constant();
+ if (c == NULL || !c->is_zero())
+continue;
+
+ ir_expression *expr = cmp->operands[i ^ 1]->as_expression();
+ if (expr != NULL) {
+fs_reg op[2];
+
+for (unsigned j = 

[Mesa-dev] [PATCH 4/5] i965/fs: Change try_opt_frontfacing_ternary to eliminate asserts

2015-03-11 Thread Ian Romanick
From: Ian Romanick 

If we check for the case that is actually necessary, the asserts
become superfluous.

Signed-off-by: Ian Romanick 
Cc: Matt Turner 
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3d79796..917d3da 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2845,11 +2845,8 @@ fs_visitor::try_opt_frontfacing_ternary(ir_if *ir)
if (!then_rhs || !else_rhs)
   return false;
 
-   if ((then_rhs->is_one() || then_rhs->is_negative_one()) &&
-   (else_rhs->is_one() || else_rhs->is_negative_one())) {
-  assert(then_rhs->is_one() == else_rhs->is_negative_one());
-  assert(else_rhs->is_one() == then_rhs->is_negative_one());
-
+   if ((then_rhs->is_one() && else_rhs->is_negative_one()) ||
+   (else_rhs->is_one() && then_rhs->is_negative_one())) {
   then_assign->lhs->accept(this);
   fs_reg dst = this->result;
   dst.type = BRW_REGISTER_TYPE_D;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965/fs: Apply gl_FrontFacing ? -1 : 1 optimization only for floats

2015-03-11 Thread Ian Romanick
From: Ian Romanick 

At the very least, unreal4/sun-temple/102.shader_test uses this pattern
for a signed integer result.  However, that shader did not hit the
optimization in the first place because it uses !gl_FronFacing.  I
changed the shader to use remove the logical-not and reverse the other
operands.  I verified that incorrect code is generated before this
change and correct code is generated after.

I'll send a piglit test out shortly...

No shader-db changes.

Signed-off-by: Ian Romanick 
Cc: Matt Turner 
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 917d3da..3740722 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2845,6 +2845,9 @@ fs_visitor::try_opt_frontfacing_ternary(ir_if *ir)
if (!then_rhs || !else_rhs)
   return false;
 
+   if (then_rhs->type->base_type != GLSL_TYPE_FLOAT)
+  return false;
+
if ((then_rhs->is_one() && else_rhs->is_negative_one()) ||
(else_rhs->is_one() && then_rhs->is_negative_one())) {
   then_assign->lhs->accept(this);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] i965/vs: Add missing resolve_bool_comparison calls on GEN4 and GEN5

2015-03-11 Thread Ian Romanick
From: Ian Romanick 

The ir_unop_any problem was discovered by some later optimization passes
that generate ir_triop_csel.  I was also able to reproduce it by
modifying the gl-2.0-vertexattribpointer vertex shader to generate its
result using

   color = mix(vec4(0, 1, 0, 0),
   vec4(1, 0, 0, 0),
   bvec4(any(greaterThan(diff, vec4(tolerance);

instead of an if-statement.  This also required using #version 130 and
MESA_GLSL_VERSION_OVERRIDE=130.

I have not nominated this for stable releases because I don't think
there's any way to trigger the problem without GLSL 1.30 or
optimizations that don't exist in stable.

Signed-off-by: Ian Romanick 
Cc: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5bf9e1b..195c6f5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1555,6 +1555,11 @@ vec4_visitor::visit(ir_expression *ir)
}
 
case ir_binop_all_equal:
+  if (brw->gen <= 5) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ resolve_bool_comparison(ir->operands[1], &op[1]);
+  }
+
   /* "==" operator producing a scalar boolean. */
   if (ir->operands[0]->type->is_vector() ||
  ir->operands[1]->type->is_vector()) {
@@ -1567,6 +1572,11 @@ vec4_visitor::visit(ir_expression *ir)
   }
   break;
case ir_binop_any_nequal:
+  if (brw->gen <= 5) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ resolve_bool_comparison(ir->operands[1], &op[1]);
+  }
+
   /* "!=" operator producing a scalar boolean. */
   if (ir->operands[0]->type->is_vector() ||
  ir->operands[1]->type->is_vector()) {
@@ -1581,6 +1591,9 @@ vec4_visitor::visit(ir_expression *ir)
   break;
 
case ir_unop_any:
+  if (brw->gen <= 5) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+  }
   emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
   emit(MOV(result_dst, src_reg(0)));
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] mesa: replace _ASMAPI with __cdecl

2015-03-11 Thread Jose Fonseca

On 11/03/15 16:52, Brian Paul wrote:

On 03/11/2015 08:21 AM, Jose Fonseca wrote:

On 11/03/15 14:07, Brian Paul wrote:

On 03/11/2015 01:29 AM, Jose Fonseca wrote:

I don't know the story about this _ASMAPI macro, but __cdecl is also
the
default calling convention for WIN32:

   https://msdn.microsoft.com/en-us/library/zkwh89ks.aspx


Yeah, I had read that too actually but I figured it was safer to keep
things as-is in case there was more to it than met the eye.



so it's redundant to add it.  (The other common calling convention --
__stdcall/APIENTRY/etc -- is the one that must always be explcitely
added when needed).

So we should just drop _ASMAPI/__cdecl completely.


Note that we have something similar in gallium (grep PIPE_CDECL).  Maybe
that could be removed too.


I suspect that's because we copied  src/mesa/x86/rtasm ->
src/gallium/auxiliary/rtasm .

(BTW, it would be a nice beginner project to replace translate_sse to
use LLVM and get rid of src/gallium/auxiliary/rtasm )


Anyway, I'll redo this patch series (rm all __cdecl) and do some testing
on Windows.


Heh, since we removed the old swrast-based Windows gdi driver, I believe
that none of functions that were tagged with __cdecl (mesa/math/ and
swrast/) are used on Windows now anyway.


I think that

  scons platform=windows src/mesa/drivers/osmesa/

should use it.  Though I wonder if we should remove that directory out 
of SCons, now that there's also src/gallium/targets/osmesa/


Jose


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89530] FTBFS in loader: missing fstat

2015-03-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89530

--- Comment #3 from Vadim Rutkovsky  ---
(In reply to Emil Velikov from comment #1)
> Sigh... you again with the fstat() implicit declaration :P
> But seriously that code has been there for 3+ months and no-one has reported
> a thing, despite the missing include. There must be something really special
> in your setup.

Its Continuous being extremely pedantic during build - this will most likely
not affect distributions, but might uncover some bugs.

> Anywho this patch should address the error. Can you give it a try ?
> http://patchwork.freedesktop.org/patch/44455/

Rebuild mesa from master with this patch, so +1 from me

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/13] i965: Factor out logic to build a send message instruction with indirect descriptor.

2015-03-11 Thread Kenneth Graunke
On Wednesday, March 11, 2015 07:25:14 PM Francisco Jerez wrote:
> "Pohjolainen, Topi"  writes:
> > On Fri, Feb 27, 2015 at 05:34:44PM +0200, Francisco Jerez wrote:
> >> @@ -1218,17 +1198,6 @@ 
> >> fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
> >>false /* header */,
> >>simd_mode,
> >>0);
> >> -  brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
> >> -  brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
> >> -  brw_set_src0(p, insn_or, addr);
> >> -  brw_set_dest(p, insn_or, addr);
> >> -
> >> -
> >> -  /* dst = send(offset, a0.0) */
> >> -  brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
> >> -  brw_set_dest(p, insn_send, retype(dst, BRW_REGISTER_TYPE_UW));
> >
> > I'm just reading this through again and noticed that the destination type
> > changes here to BRW_REGISTER_TYPE_UD (set in brw_send_indirect_message()).
> 
> Ah, yes, that change is intentional.  The type being set to UW was a
> remnant from Gen4-5 times -- Those used to require the destination type
> of SEND to be W/UW when doing 16-wide (even if the message was actually
> writing dwords back...).  None of the codepaths modified in this patch
> (or in the rest of the series) should be executed on Gen4 or 5.
> 
> Anyway good catch. :)

I'm pretty sure this does still apply on Gen7 - notably, BLORP uses UW
destinations for SIMD16 sends, and I believe Paul Berry tracked actual
issues down to that.  BLORP only exists on Gen6+.

I also seem to recall Chris Forbes hitting an issue relating to that
which he and Paul fixed during XDC 2013.

CC'ing Chris and Matt in case they remember any details.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89530] FTBFS in loader: missing fstat

2015-03-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89530

--- Comment #2 from Emil Velikov  ---
And just in case there was some misunderstanding - your system is reporting the
correct thing. I'm wondering why we haven't seen this before.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89530] FTBFS in loader: missing fstat

2015-03-11 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89530

--- Comment #1 from Emil Velikov  ---
Sigh... you again with the fstat() implicit declaration :P
But seriously that code has been there for 3+ months and no-one has reported a
thing, despite the missing include. There must be something really special in
your setup.

Anywho this patch should address the error. Can you give it a try ?
http://patchwork.freedesktop.org/patch/44455/

Thanks
Emil

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] loader: include for non-sysfs builds

2015-03-11 Thread Emil Velikov
Required by fstat(), otherwise we'll error out due to implicit function
declaration.

Cc: "10.4 10.5" 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89530
Signed-off-by: Emil Velikov 
---
 src/loader/loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/loader/loader.c b/src/loader/loader.c
index 94c993a..638ebf2 100644
--- a/src/loader/loader.c
+++ b/src/loader/loader.c
@@ -64,6 +64,7 @@
  *Rob Clark 
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -80,7 +81,6 @@
 #endif
 #endif
 #ifdef HAVE_SYSFS
-#include 
 #include 
 #endif
 #include "loader.h"
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] i965/nir: Lower to registers a bit later.

2015-03-11 Thread Kenneth Graunke
On Wednesday, March 11, 2015 10:29:02 AM Ian Romanick wrote:
> On 03/09/2015 01:58 AM, Kenneth Graunke wrote:
> > We can't safely call nir_optimize() with register present, since several
> > passes called in the loop can't handle registers, and will fail asserts.
> 
> Doesn't that mean this patch should go before patch 2?

Probably.  I only provoked the bug by using this code path for vertex
shaders, which aren't enabled until the end of the series.

It may be possible to provoke it via fragment shaders (and piglit simply
doesn't), but if so, it would have already been broken - we already have
one nir_optimize() before lower_to_registers.

So I don't feel too strongly about it.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/4] Support multiple state pipelines for i965

2015-03-11 Thread Kristian Høgsberg
On Wed, Mar 11, 2015 at 11:53 AM, Jordan Justen
 wrote:
> git://people.freedesktop.org/~jljusten/mesa i965-pipelines-v2
>
> The big changes from v1:
>  * Rename brw->atoms[] to render_atoms
>  * Add brw->compute_atoms[]
>  * Replace brw_pipeline_first_atom with brw_get_pipeline_atoms
>
> With this version, I was not able to measure a performance change with
> SynMask Batch7 on ivb or bsw.

Looks good to me,
Reviewed-by: Kristian Høgsberg 

> Jordan Justen (4):
>   i965/state: Rename brw_upload_state to brw_upload_render_state
>   i965/state: Support multiple pipelines in brw->num_atoms
>   i965/state: Create separate dirty state bits for each pipeline
>   i965/state: Add compute pipeline with empty atom lists
>
>  src/mesa/drivers/dri/i965/brw_context.h  |  13 ++-
>  src/mesa/drivers/dri/i965/brw_draw.c |   2 +-
>  src/mesa/drivers/dri/i965/brw_state.h|   3 +-
>  src/mesa/drivers/dri/i965/brw_state_upload.c | 154 
> ---
>  4 files changed, 127 insertions(+), 45 deletions(-)
>
> --
> 2.1.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/4] i965/state: Support multiple pipelines in brw->num_atoms

2015-03-11 Thread Jordan Justen
brw->num_atoms is converted to an array, but currently just an array
of length 1.

Adds brw_copy_pipeline_atoms which copies the atoms for a pipeline,
and sets brw->num_atoms[p] for pipeline p.

v2:
 * Rename brw->atoms[] to render_atoms
 * Rename brw_add_pipeline_atoms to brw_copy_pipeline_atoms
 * Rename brw_pipeline_first_atom to brw_get_pipeline_atoms

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  | 10 ++-
 src/mesa/drivers/dri/i965/brw_state_upload.c | 94 +---
 2 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 682fbe9..91b4054 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -149,6 +149,12 @@ struct brw_vue_prog_key;
 struct brw_wm_prog_key;
 struct brw_wm_prog_data;
 
+enum brw_pipeline {
+   BRW_RENDER_PIPELINE,
+
+   BRW_NUM_PIPELINES
+};
+
 enum brw_cache_id {
BRW_CACHE_FS_PROG,
BRW_CACHE_BLORP_BLIT_PROG,
@@ -1386,8 +1392,8 @@ struct brw_context
   int entries_per_oa_snapshot;
} perfmon;
 
-   int num_atoms;
-   const struct brw_tracked_state atoms[57];
+   int num_atoms[BRW_NUM_PIPELINES];
+   const struct brw_tracked_state render_atoms[57];
 
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 3b64a05..4f21002 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -174,7 +174,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_vertices,
 };
 
-static const struct brw_tracked_state *gen7_atoms[] =
+static const struct brw_tracked_state *gen7_render_atoms[] =
 {
/* Command packets: */
 
@@ -246,7 +246,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
&haswell_cut_index,
 };
 
-static const struct brw_tracked_state *gen8_atoms[] =
+static const struct brw_tracked_state *gen8_render_atoms[] =
 {
/* Command packets: */
&gen8_state_base_address,
@@ -342,48 +342,68 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
}
 }
 
+static inline const struct brw_tracked_state *
+brw_get_pipeline_atoms(struct brw_context *brw,
+   enum brw_pipeline pipeline)
+{
+   switch (pipeline) {
+   case BRW_RENDER_PIPELINE:
+  return &brw->render_atoms[0];
+   default:
+  STATIC_ASSERT(BRW_NUM_PIPELINES == 1);
+  unreachable("Unsupported pipeline");
+  return NULL;
+   }
+}
+
+static void
+brw_copy_pipeline_atoms(struct brw_context *brw,
+enum brw_pipeline pipeline,
+const struct brw_tracked_state **atoms,
+int num_atoms)
+{
+   /* This is to work around brw_context::atoms being declared const.  We want
+* it to be const, but it needs to be initialized somehow!
+*/
+   struct brw_tracked_state *context_atoms =
+  (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
+
+   for (int i = 0; i < num_atoms; i++) {
+  context_atoms[i] = *atoms[i];
+  assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
+  assert(context_atoms[i].emit);
+   }
+
+   brw->num_atoms[pipeline] = num_atoms;
+}
+
 void brw_init_state( struct brw_context *brw )
 {
struct gl_context *ctx = &brw->ctx;
-   const struct brw_tracked_state **atoms;
-   int num_atoms;
 
-   STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->atoms));
-   STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->atoms));
-   STATIC_ASSERT(ARRAY_SIZE(gen7_atoms) <= ARRAY_SIZE(brw->atoms));
-   STATIC_ASSERT(ARRAY_SIZE(gen8_atoms) <= ARRAY_SIZE(brw->atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
+ ARRAY_SIZE(brw->render_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
+ ARRAY_SIZE(brw->render_atoms));
 
brw_init_caches(brw);
 
if (brw->gen >= 8) {
-  atoms = gen8_atoms;
-  num_atoms = ARRAY_SIZE(gen8_atoms);
+  brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
+  gen8_render_atoms,
+  ARRAY_SIZE(gen8_render_atoms));
} else if (brw->gen == 7) {
-  atoms = gen7_atoms;
-  num_atoms = ARRAY_SIZE(gen7_atoms);
+  brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
+  gen7_render_atoms,
+  ARRAY_SIZE(gen7_render_atoms));
} else if (brw->gen == 6) {
-  atoms = gen6_atoms;
-  num_atoms = ARRAY_SIZE(gen6_atoms);
+  brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
+  gen6_atoms, ARRAY_SIZE(gen6_atoms));
} else {
-  atoms = gen4_atoms;
-  num_atoms = ARRAY_SIZE(gen4_atoms);
-   }
-

[Mesa-dev] [PATCH v2 0/4] Support multiple state pipelines for i965

2015-03-11 Thread Jordan Justen
git://people.freedesktop.org/~jljusten/mesa i965-pipelines-v2

The big changes from v1:
 * Rename brw->atoms[] to render_atoms
 * Add brw->compute_atoms[]
 * Replace brw_pipeline_first_atom with brw_get_pipeline_atoms

With this version, I was not able to measure a performance change with
SynMask Batch7 on ivb or bsw.

Jordan Justen (4):
  i965/state: Rename brw_upload_state to brw_upload_render_state
  i965/state: Support multiple pipelines in brw->num_atoms
  i965/state: Create separate dirty state bits for each pipeline
  i965/state: Add compute pipeline with empty atom lists

 src/mesa/drivers/dri/i965/brw_context.h  |  13 ++-
 src/mesa/drivers/dri/i965/brw_draw.c |   2 +-
 src/mesa/drivers/dri/i965/brw_state.h|   3 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c | 154 ---
 4 files changed, 127 insertions(+), 45 deletions(-)

-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/4] i965/state: Rename brw_upload_state to brw_upload_render_state

2015-03-11 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_draw.c | 2 +-
 src/mesa/drivers/dri/i965/brw_state.h| 2 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 2d3de45..c74c6af 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -508,7 +508,7 @@ retry:
*/
   if (brw->state.dirty.brw) {
 brw->no_batch_wrap = true;
-brw_upload_state(brw);
+brw_upload_render_state(brw);
   }
 
   brw_emit_prim(brw, &prims[i], brw->primitive);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 71210b9..ae5ef1f 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -168,7 +168,7 @@ brw_depthbuffer_format(struct brw_context *brw);
 /***
  * brw_state.c
  */
-void brw_upload_state(struct brw_context *brw);
+void brw_upload_render_state(struct brw_context *brw);
 void brw_clear_dirty_bits(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 1b84859..3b64a05 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -569,7 +569,7 @@ brw_upload_programs(struct brw_context *brw)
 /***
  * Emit all state:
  */
-void brw_upload_state(struct brw_context *brw)
+void brw_upload_render_state(struct brw_context *brw)
 {
struct gl_context *ctx = &brw->ctx;
struct brw_state_flags *state = &brw->state.dirty;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/4] i965/state: Add compute pipeline with empty atom lists

2015-03-11 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  2 ++
 src/mesa/drivers/dri/i965/brw_state.h|  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c | 28 +++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index e693f50..f15cd7c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -151,6 +151,7 @@ struct brw_wm_prog_data;
 
 enum brw_pipeline {
BRW_RENDER_PIPELINE,
+   BRW_COMPUTE_PIPELINE,
 
BRW_NUM_PIPELINES
 };
@@ -1395,6 +1396,7 @@ struct brw_context
 
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[57];
+   const struct brw_tracked_state compute_atoms[0];
 
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index ae5ef1f..5e4599d 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -169,6 +169,7 @@ brw_depthbuffer_format(struct brw_context *brw);
  * brw_state.c
  */
 void brw_upload_render_state(struct brw_context *brw);
+void brw_upload_compute_state(struct brw_context *brw);
 void brw_clear_dirty_bits(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 55a9050..0b38c09 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -246,6 +246,10 @@ static const struct brw_tracked_state *gen7_render_atoms[] 
=
&haswell_cut_index,
 };
 
+static const struct brw_tracked_state *gen7_compute_atoms[] =
+{
+};
+
 static const struct brw_tracked_state *gen8_render_atoms[] =
 {
/* Command packets: */
@@ -322,6 +326,10 @@ static const struct brw_tracked_state *gen8_render_atoms[] 
=
&gen8_pma_fix,
 };
 
+static const struct brw_tracked_state *gen8_compute_atoms[] =
+{
+};
+
 static void
 brw_upload_initial_gpu_state(struct brw_context *brw)
 {
@@ -349,8 +357,10 @@ brw_get_pipeline_atoms(struct brw_context *brw,
switch (pipeline) {
case BRW_RENDER_PIPELINE:
   return &brw->render_atoms[0];
+   case BRW_COMPUTE_PIPELINE:
+  return &brw->compute_atoms[0];
default:
-  STATIC_ASSERT(BRW_NUM_PIPELINES == 1);
+  STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
   unreachable("Unsupported pipeline");
   return NULL;
}
@@ -387,6 +397,10 @@ void brw_init_state( struct brw_context *brw )
  ARRAY_SIZE(brw->render_atoms));
STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
  ARRAY_SIZE(brw->render_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
+ ARRAY_SIZE(brw->compute_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
+ ARRAY_SIZE(brw->compute_atoms));
 
brw_init_caches(brw);
 
@@ -394,10 +408,16 @@ void brw_init_state( struct brw_context *brw )
   brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
   gen8_render_atoms,
   ARRAY_SIZE(gen8_render_atoms));
+  brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
+  gen8_compute_atoms,
+  ARRAY_SIZE(gen8_compute_atoms));
} else if (brw->gen == 7) {
   brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
   gen7_render_atoms,
   ARRAY_SIZE(gen7_render_atoms));
+  brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
+  gen7_compute_atoms,
+  ARRAY_SIZE(gen7_compute_atoms));
} else if (brw->gen == 6) {
   brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
   gen6_atoms, ARRAY_SIZE(gen6_atoms));
@@ -734,3 +754,9 @@ brw_clear_dirty_bits(struct brw_context *brw)
struct brw_state_flags *state = &brw->state.dirty;
memset(state, 0, sizeof(*state));
 }
+
+void
+brw_upload_compute_state(struct brw_context *brw)
+{
+   brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
+}
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/4] i965/state: Create separate dirty state bits for each pipeline

2015-03-11 Thread Jordan Justen
When uploading state for a pipeline, we will save changed state for
the other pipelines.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_context.h  |  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c | 42 ++--
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 91b4054..e693f50 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1101,6 +1101,7 @@ struct brw_context
GLuint NewGLState;
struct {
   struct brw_state_flags dirty;
+  struct brw_state_flags pipelines[BRW_NUM_PIPELINES];
} state;
 
struct brw_cache cache;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 4f21002..55a9050 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -586,15 +586,16 @@ brw_upload_programs(struct brw_context *brw)
brw_upload_wm_prog(brw);
 }
 
-/***
- * Emit all state:
- */
-void brw_upload_render_state(struct brw_context *brw)
+static inline void
+brw_upload_pipeline_state(struct brw_context *brw,
+  enum brw_pipeline pipeline)
 {
struct gl_context *ctx = &brw->ctx;
struct brw_state_flags *state = &brw->state.dirty;
int i;
static int dirty_count = 0;
+   struct brw_state_flags *pipeline_state =
+  &brw->state.pipelines[pipeline];
 
state->mesa |= brw->NewGLState;
brw->NewGLState = 0;
@@ -633,6 +634,12 @@ void brw_upload_render_state(struct brw_context *brw)
   brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES;
}
 
+   if ((pipeline_state->mesa | pipeline_state->brw) != 0) {
+  state->mesa |= pipeline_state->mesa;
+  state->brw |= pipeline_state->brw;
+  memset(pipeline_state, 0, sizeof(struct brw_state_flags));
+   }
+
if ((state->mesa | state->brw) == 0)
   return;
 
@@ -642,6 +649,10 @@ void brw_upload_render_state(struct brw_context *brw)
 
brw_upload_programs(brw);
 
+   const struct brw_tracked_state *atoms =
+  brw_get_pipeline_atoms(brw, pipeline);
+   const int num_atoms = brw->num_atoms[pipeline];
+
if (unlikely(INTEL_DEBUG)) {
   /* Debug version which enforces various sanity checks on the
* state flags which are generated and checked to help ensure
@@ -651,8 +662,8 @@ void brw_upload_render_state(struct brw_context *brw)
   memset(&examined, 0, sizeof(examined));
   prev = *state;
 
-  for (i = 0; i < brw->num_atoms[BRW_RENDER_PIPELINE]; i++) {
-const struct brw_tracked_state *atom = &brw->render_atoms[i];
+  for (i = 0; i < num_atoms; i++) {
+const struct brw_tracked_state *atom = &atoms[i];
 struct brw_state_flags generated;
 
 if (check_state(state, &atom->dirty)) {
@@ -671,8 +682,8 @@ void brw_upload_render_state(struct brw_context *brw)
   }
}
else {
-  for (i = 0; i < brw->num_atoms[BRW_RENDER_PIPELINE]; i++) {
-const struct brw_tracked_state *atom = &brw->render_atoms[i];
+  for (i = 0; i < num_atoms; i++) {
+const struct brw_tracked_state *atom = &atoms[i];
 
 if (check_state(state, &atom->dirty)) {
atom->emit(brw);
@@ -691,8 +702,23 @@ void brw_upload_render_state(struct brw_context *brw)
 fprintf(stderr, "\n");
   }
}
+
+   /* Save all dirty state into the other pipelines */
+   for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
+  if (i != pipeline) {
+ brw->state.pipelines[i].mesa |= state->mesa;
+ brw->state.pipelines[i].brw |= state->brw;
+  }
+   }
 }
 
+/***
+ * Emit all state:
+ */
+void brw_upload_render_state(struct brw_context *brw)
+{
+   brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
+}
 
 /**
  * Clear dirty bits to account for the fact that the state emitted by
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] Revert "common: Fix PBOs for 1D_ARRAY."

2015-03-11 Thread Emil Velikov
On 9 March 2015 at 11:36, Neil Roberts  wrote:
> Hi Emil,
>
> The resolve looks good, however I think it would also make sense to
> cherry pick a44606 to the stable branch. It doesn't do any harm either
> way but it should be slightly faster and cleaner with that patch as
> well.
>
Thanks Neil. The commit message of a44606 does mention "A layered PBO
image is now interpreted a..." and considering I'm not an expert in
the area, it wasn't clear if the "now" point is prior to the 10.5
branch point or not.

Based on your feedback I've picked both commits and will push then out
after a piglit run :-)

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 4/4] i965/fs: Don't issue FB writes for bound but unwritten color targets.

2015-03-11 Thread Emil Velikov
On 7 March 2015 at 21:35, Jason Ekstrand  wrote:
> On Sat, Mar 7, 2015 at 9:58 AM, Emil Velikov 
> wrote:
>>
>> On 27 February 2015 at 08:06, Kenneth Graunke 
>> wrote:
>> > We used to loop over all color attachments, and emit FB writes for each
>> > one, even if the shader didn't write to a corresponding output variable.
>> > Those color attachments would be filled with garbage (undefined values).
>> >
>> > Football Manager binds a framebuffer with 4 color attachments, but draws
>> > to it using a shader that only writes to gl_FragData[0..2].  This meant
>> > that color attachment 3 would be filled with garbage, resulting in
>> > rendering artifacts.  Now we skip writing to it, fixing rendering.
>> >
>> > Writes to gl_FragColor initialize outputs[0..nr_color_regions-1] to
>> > GRFs, while writes to gl_FragData[i] initialize outputs[i].
>> >
>> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86747
>> > Signed-off-by: Kenneth Graunke 
>> > Cc: mesa-sta...@lists.freedesktop.org
>> Hi Ken,
>>
>> This commit does not seems to have not landed in master. Did it fall
>> through the cracks ?
>
>
> I think it did.  This series is definitely somethiing we would like to be in
> 10.5
I fear it wasn't :'( Seems like Ken pushed it a couple days after your reply.

commit e95969cd9548033250ba12f2adf11740319b41e7
Author: Kenneth Graunke 
AuthorDate: Thu Feb 26 17:45:49 2015 -0800
Commit: Kenneth Graunke 
CommitDate: Mon Mar 9 16:07:04 2015 -0700

i965/fs: Don't issue FB writes for bound but unwritten color targets.


Thanks gents!
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] util: Fix foreach_list_typed_safe when exec_node is not at offset 0.

2015-03-11 Thread Connor Abbott
This version LGTM too.

On Wed, Mar 11, 2015 at 1:54 PM, Jason Ekstrand  wrote:
> __next and __prev are pointers to the structure containing the exec_node
> link, not the embedded exec_node.  NULL checks would fail unless the
> embedded exec_node happened to be at offset 0 in the parent struct.
>
> v1 Reviewed-by: Kenneth Graunke 
> v1 Reviewed-by: Connor Abbott 
>
> v2: Jason Ekstrand :
>Use "(__node)->__field.next != NULL" to check for the end of the list
>instead of the "&__next->__field != NULL".  The former is far more
>obviously correct as it matches what the non-safe versions do.  The
>original code tried to avoid any use of __next as the client code may
>delete it during its execution.  However, since the looping condition is
>checked after the iteration clause but before the client code is
>executed, we know that __node is valid during the looping condition.
>
> Signed-off-by: Jason Ekstrand 
> ---
>  src/glsl/list.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/list.h b/src/glsl/list.h
> index ddb98f7..25fc85c 100644
> --- a/src/glsl/list.h
> +++ b/src/glsl/list.h
> @@ -684,7 +684,7 @@ inline void exec_node::insert_before(exec_list *before)
> exec_node_data(__type, (__list)->head, __field),\
> * __next =  \
> exec_node_data(__type, (__node)->__field.next, __field);\
> -__next != NULL;\
> +   (__node)->__field.next != NULL;\
>  __node = __next, __next =  \
> exec_node_data(__type, (__next)->__field.next, __field))
>
> @@ -693,7 +693,7 @@ inline void exec_node::insert_before(exec_list *before)
> exec_node_data(__type, (__list)->tail_pred, __field),   \
> * __prev =  \
> exec_node_data(__type, (__node)->__field.prev, __field);\
> -__prev != NULL;\
> +(__node)->__field.prev != NULL;\
>  __node = __prev, __prev =  \
> exec_node_data(__type, (__prev)->__field.prev, __field))
>
> --
> 2.3.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: use strdup() instead of _mesa_strdup()

2015-03-11 Thread Emil Velikov
On 11/03/15 01:42, Brian Paul wrote:
> We were already using strdup() in various places in Mesa.  Get rid
> of the _mesa_strdup() wrapper.  All the callers pass a non-NULL
> argument so the NULL check isn't needed either.
Last time I've tried using strdup with MSVC there were a few warnings.
Iirc those were due to the function being part of the POSIX standard
rather than C89/99/etc.

I had to shut the compiler by adding the following define
_CRT_NONSTDC_NO_WARNINGS.

-Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] util: Fix foreach_list_typed_safe when exec_node is not at offset 0.

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 10:54 AM, Jason Ekstrand  wrote:
> __next and __prev are pointers to the structure containing the exec_node
> link, not the embedded exec_node.  NULL checks would fail unless the
> embedded exec_node happened to be at offset 0 in the parent struct.
>
> v1 Reviewed-by: Kenneth Graunke 
> v1 Reviewed-by: Connor Abbott 
>
> v2: Jason Ekstrand :
>Use "(__node)->__field.next != NULL" to check for the end of the list
>instead of the "&__next->__field != NULL".  The former is far more
>obviously correct as it matches what the non-safe versions do.  The
>original code tried to avoid any use of __next as the client code may
>delete it during its execution.  However, since the looping condition is
>checked after the iteration clause but before the client code is
>executed, we know that __node is valid during the looping condition.
>
> Signed-off-by: Jason Ekstrand 
> ---
>  src/glsl/list.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/list.h b/src/glsl/list.h
> index ddb98f7..25fc85c 100644
> --- a/src/glsl/list.h
> +++ b/src/glsl/list.h
> @@ -684,7 +684,7 @@ inline void exec_node::insert_before(exec_list *before)
> exec_node_data(__type, (__list)->head, __field),\
> * __next =  \
> exec_node_data(__type, (__node)->__field.next, __field);\
> -__next != NULL;\
> +   (__node)->__field.next != NULL;\

Tab here. With that fixed:

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/2] util: Fix foreach_list_typed_safe when exec_node is not at offset 0.

2015-03-11 Thread Jason Ekstrand
__next and __prev are pointers to the structure containing the exec_node
link, not the embedded exec_node.  NULL checks would fail unless the
embedded exec_node happened to be at offset 0 in the parent struct.

v1 Reviewed-by: Kenneth Graunke 
v1 Reviewed-by: Connor Abbott 

v2: Jason Ekstrand :
   Use "(__node)->__field.next != NULL" to check for the end of the list
   instead of the "&__next->__field != NULL".  The former is far more
   obviously correct as it matches what the non-safe versions do.  The
   original code tried to avoid any use of __next as the client code may
   delete it during its execution.  However, since the looping condition is
   checked after the iteration clause but before the client code is
   executed, we know that __node is valid during the looping condition.

Signed-off-by: Jason Ekstrand 
---
 src/glsl/list.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/list.h b/src/glsl/list.h
index ddb98f7..25fc85c 100644
--- a/src/glsl/list.h
+++ b/src/glsl/list.h
@@ -684,7 +684,7 @@ inline void exec_node::insert_before(exec_list *before)
exec_node_data(__type, (__list)->head, __field),\
* __next =  \
exec_node_data(__type, (__node)->__field.next, __field);\
-__next != NULL;\
+   (__node)->__field.next != NULL;\
 __node = __next, __next =  \
exec_node_data(__type, (__next)->__field.next, __field))
 
@@ -693,7 +693,7 @@ inline void exec_node::insert_before(exec_list *before)
exec_node_data(__type, (__list)->tail_pred, __field),   \
* __prev =  \
exec_node_data(__type, (__node)->__field.prev, __field);\
-__prev != NULL;\
+(__node)->__field.prev != NULL;\
 __node = __prev, __prev =  \
exec_node_data(__type, (__prev)->__field.prev, __field))
 
-- 
2.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] replace __FUNCTION__ with __func__ task

2015-03-11 Thread Predut, Marius
I take a look

Thanks,
marius

> -Original Message-
> From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On Behalf Of
> Jose Fonseca
> Sent: Wednesday, March 04, 2015 12:06 AM
> To: Jan Vesely; Brian Paul
> Cc: mesa-dev@lists.freedesktop.org
> Subject: Re: [Mesa-dev] replace __FUNCTION__ with __func__ task
> 
> On 03/03/15 20:56, Jan Vesely wrote:
> > On Tue, 2015-03-03 at 10:07 -0700, Brian Paul wrote:
> >> On 03/03/2015 09:56 AM, Jose Fonseca wrote:
> >>> On 03/03/15 15:57, Brian Paul wrote:
>  We're using both of these in Mesa/gallium.  It would be nice to
>  consistently just use C99's __func__ everywhere.  This would be any
>  easy task for someone looking for something simple to do.
> 
>  We could then get rid of this (broken) chunk seen in both
>  compiler.h and
>  p_compiler.h:
> 
>  #ifndef __FUNCTION__
>  #  define __FUNCTION__ __func__
>  #endif
> >>>
> >>> Sounds good to me.  Note that MSVC doesn't support __func__, only
> >>> __FUNCTION__, so we need to ensure that c99_compat.h gets included
> >>> everywhere.
> >>
> >> Right.  That should already be the case since it's included by
> >> src/mesa/main/compiler.h which gets included almost everywhere already.
> >
> > would it make sense to add -imacros/-include cmdline options for these
> > headers?
> > I have no idea whether these are supported by other compilers (at
> > least -include is supported by clang)
> >
> > jan
> 
> Yes, that's a thought.
> 
> MSVC has the /FI.h option.  We could  indeed pass it universally to
> ensure __func__/inline and friends are always there.  But if c99_compat.h is
> already included everywhere, that's fine too.
> 
> 
> BTW, looking at MSVC 14..2015 notes [1], it will:
> 
> - support __func__.
> - support snprintf (instead of _snprintf)
> 
> Unfortunately, not much more C99/C11 support.
> 
> Jose
> 
> [1] http://support.microsoft.com/kb/2967191
> 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] i965: Factor out descriptor building for indirect send messages

2015-03-11 Thread Pohjolainen, Topi
On Tue, Mar 10, 2015 at 11:07:26PM +0200, Francisco Jerez wrote:
> "Pohjolainen, Topi"  writes:
> 
> > On Mon, Mar 09, 2015 at 12:43:08PM +0200, Francisco Jerez wrote:
> >> "Pohjolainen, Topi"  writes:
> >> 
> >> > On Sat, Mar 07, 2015 at 04:15:08PM +0200, Francisco Jerez wrote:
> >> >> Topi Pohjolainen  writes:
> >> >> 
> >> >> > The original patch from Curro was based on something that is not
> >> >> > present in the master yet. This patch tries to mimick the logic on
> >> >> > top master.
> >> >> > I wanted to see if could separate the building of the descriptor
> >> >> > instruction from building of the send instruction. This logic now
> >> >> > allows the caller to construct any kind of sequence of instructions
> >> >> > filling in the descriptor before giving it to the send instruction
> >> >> > builder.
> >> >> >
> >> >> > This is only compile tested. Curro, how would you feel about this
> >> >> > sort of approach? I apologise for muddying the waters again but I
> >> >> > wasn't entirely comfortable with the logic and wanted to try to
> >> >> > something else.
> >> >> >
> >> >> > I believe patch number five should go nicely on top of this as
> >> >> > the descriptor instruction could be followed by (or preceeeded by)
> >> >> > any additional instructions modifying the descriptor register
> >> >> > before the actual send instruction.
> >> >> >
> >> >> 
> >> >> Topi, the goal I had in mind with PATCH 01 was to refactor a commonly
> >> >> recurring pattern.  In terms of the functions defined in this patch my
> >> >> example from yesterday [1] would now look like:
> >> >> 
> >> >> |   if (index.file == BRW_IMMEDIATE_VALUE) {
> >> >> |
> >> >> |  uint32_t surf_index = index.dw1.ud;
> >> >> |
> >> >> |  brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
> >> >> |  brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
> >> >> |  brw_set_src0(p, send, offset);
> >> >> |  brw_set_sampler_message(p, send,
> >> >> |  surf_index,
> >> >> |  0, /* LD message ignores sampler unit */
> >> >> |  GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
> >> >> |  rlen,
> >> >> |  mlen,
> >> >> |  false, /* no header */
> >> >> |  simd_mode,
> >> >> |  0);
> >> >> |
> >> >> |  brw_mark_surface_used(prog_data, surf_index);
> >> >> |
> >> >> |   } else {
> >> >> |
> >> >> |  struct brw_reg addr = vec1(retype(brw_address_reg(0), 
> >> >> BRW_REGISTER_TYPE_UD));
> >> >> |
> >> >> |  brw_push_insn_state(p);
> >> >> |  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> >> >> |  brw_set_default_access_mode(p, BRW_ALIGN_1);
> >> >> |
> >> >> |  /* a0.0 = surf_index & 0xff */
> >> >> |  brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
> >> >> |  brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1);
> >> >> |  brw_set_dest(p, insn_and, addr);
> >> >> |  brw_set_src0(p, insn_and, vec1(retype(index, 
> >> >> BRW_REGISTER_TYPE_UD)));
> >> >> |  brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
> >> >> |
> >> >> |
> >> >> |  /* a0.0 |=  */
> >> >> |  brw_inst *descr_inst = brw_build_indirect_message_descr(p, addr, 
> >> >> addr);
> >> >> |  brw_set_sampler_message(p, descr_inst,
> >> >> |  0 /* surface */,
> >> >> |  0 /* sampler */,
> >> >> |  GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
> >> >> |  rlen /* rlen */,
> >> >> |  mlen /* mlen */,
> >> >> |  false /* header */,
> >> >> |  simd_mode,
> >> >> |  0);
> >> >> |
> >> >> |  /* dst = send(offset, a0.0) */
> >> >> |  brw_send_indirect_message(p, BRW_SFID_SAMPLER, dst, offset, 
> >> >> addr);
> >> >> |
> >> >> |  brw_pop_insn_state(p);
> >> >> |
> >> >> |  /* visitor knows more than we do about the surface limit 
> >> >> required,
> >> >> |   * so has already done marking.
> >> >> |   */
> >> >> |   }
> >> >
> >> > Which I think could also be written as follows. Or am I missing something
> >> > again?
> >> >
> >> > static brw_inst *
> >> > brw_build_surface_index_descr(struct brw_compile *p,
> >> >   struct brw_reg dst, index)
> >> > {
> >> >brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> >> >brw_set_default_access_mode(p, BRW_ALIGN_1);
> >> >
> >> >/* a0.0 = surf_index & 0xff */
> >> >brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
> >> >brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1);
> >> >brw_set_dest(p, insn_and, addr);
> >> >brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
> >> >brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
> >> >
> >> >/* a0.0 |=  */
> >>

Re: [Mesa-dev] [PATCH 01/13] i965: Factor out logic to build a send message instruction with indirect descriptor.

2015-03-11 Thread Pohjolainen, Topi
On Wed, Mar 11, 2015 at 07:25:14PM +0200, Francisco Jerez wrote:
> "Pohjolainen, Topi"  writes:
> 
> > On Fri, Feb 27, 2015 at 05:34:44PM +0200, Francisco Jerez wrote:
> >> ---
> >>  src/mesa/drivers/dri/i965/brw_eu.h   | 19 ++--
> >>  src/mesa/drivers/dri/i965/brw_eu_emit.c  | 58 
> >> ++--
> >>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 55 
> >> +-
> >>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 37 ---
> >>  4 files changed, 77 insertions(+), 92 deletions(-)
> >> 
> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
> >> b/src/mesa/drivers/dri/i965/brw_eu.h
> >> index 1b954c8..9b1e0e2 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> >> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> >> @@ -205,11 +205,6 @@ void brw_set_sampler_message(struct brw_compile *p,
> >>   unsigned simd_mode,
> >>   unsigned return_format);
> >>  
> >> -void brw_set_indirect_send_descriptor(struct brw_compile *p,
> >> -  brw_inst *insn,
> >> -  unsigned sfid,
> >> -  struct brw_reg descriptor);
> >> -
> >>  void brw_set_dp_read_message(struct brw_compile *p,
> >> brw_inst *insn,
> >> unsigned binding_table_index,
> >> @@ -243,6 +238,20 @@ void brw_urb_WRITE(struct brw_compile *p,
> >>   unsigned offset,
> >>   unsigned swizzle);
> >>  
> >> +/**
> >> + * Send message to shared unit \p sfid with a possibly indirect 
> >> descriptor \p
> >> + * desc.  If the descriptor is not an immediate it will be transparently
> >> + * loaded to an address register using an OR instruction that will be 
> >> returned
> >> + * to the caller so additional descriptor bits can be specified with the 
> >> usual
> >> + * brw_set_*_message() helper functions.
> >> + */
> >> +struct brw_inst *
> >> +brw_send_indirect_message(struct brw_compile *p,
> >> +  unsigned sfid,
> >> +  struct brw_reg dst,
> >> +  struct brw_reg payload,
> >> +  struct brw_reg desc);
> >> +
> >>  void brw_ff_sync(struct brw_compile *p,
> >>   struct brw_reg dest,
> >>   unsigned msg_reg_nr,
> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
> >> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> >> index e69840a..cd2ce92 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> >> @@ -751,21 +751,6 @@ brw_set_sampler_message(struct brw_compile *p,
> >> }
> >>  }
> >>  
> >> -void brw_set_indirect_send_descriptor(struct brw_compile *p,
> >> -  brw_inst *insn,
> >> -  unsigned sfid,
> >> -  struct brw_reg descriptor)
> >> -{
> >> -   /* Only a0.0 may be used as SEND's descriptor operand. */
> >> -   assert(descriptor.file == BRW_ARCHITECTURE_REGISTER_FILE);
> >> -   assert(descriptor.type == BRW_REGISTER_TYPE_UD);
> >> -   assert(descriptor.nr == BRW_ARF_ADDRESS);
> >> -   assert(descriptor.subnr == 0);
> >> -
> >> -   brw_set_message_descriptor(p, insn, sfid, 0, 0, false, false);
> >> -   brw_set_src1(p, insn, descriptor);
> >> -}
> >> -
> >>  static void
> >>  gen7_set_dp_scratch_message(struct brw_compile *p,
> >>  brw_inst *inst,
> >> @@ -2490,6 +2475,49 @@ void brw_urb_WRITE(struct brw_compile *p,
> >>   swizzle);
> >>  }
> >>  
> >> +struct brw_inst *
> >> +brw_send_indirect_message(struct brw_compile *p,
> >> +  unsigned sfid,
> >> +  struct brw_reg dst,
> >> +  struct brw_reg payload,
> >> +  struct brw_reg desc)
> >> +{
> >> +   const struct brw_context *brw = p->brw;
> >> +   struct brw_inst *send, *setup;
> >> +
> >> +   assert(desc.type == BRW_REGISTER_TYPE_UD);
> >> +
> >> +   if (desc.file == BRW_IMMEDIATE_VALUE) {
> >> +  setup = send = next_insn(p, BRW_OPCODE_SEND);
> >> +  brw_set_src1(p, send, desc);
> >> +
> >> +   } else {
> >> +  struct brw_reg addr = retype(brw_address_reg(0), 
> >> BRW_REGISTER_TYPE_UD);
> >> +
> >> +  brw_push_insn_state(p);
> >> +  brw_set_default_access_mode(p, BRW_ALIGN_1);
> >> +  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> >> +  brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> >> +
> >> +  /* Load the indirect descriptor to an address register using OR so 
> >> the
> >> +   * caller can specify additional descriptor bits with the usual
> >> +   * brw_set_*_message() helper functions.
> >> +   */
> >> +  setup = brw_OR(p, addr, desc, brw_imm_ud(0));
> >> +
> >> +  brw_pop_insn_state(p);
> >> +
> >> +  send = next_insn(p

Re: [Mesa-dev] [PATCH 4/9] nir: Add intrinsics for SYSTEM_VALUE_BASE_VERTEX and VERTEX_ID_ZERO_BASE

2015-03-11 Thread Ian Romanick
This patch is

Reviewed-by: Ian Romanick 

On 03/09/2015 01:58 AM, Kenneth Graunke wrote:
> Ian and I added these around the time Connor was developing NIR.  Now
> that both exist, we should make them work together!
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/glsl/nir/nir_intrinsics.h  | 2 ++
>  src/glsl/nir/nir_lower_system_values.c | 6 ++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
> index 3bf102f..8e28765 100644
> --- a/src/glsl/nir/nir_intrinsics.h
> +++ b/src/glsl/nir/nir_intrinsics.h
> @@ -95,6 +95,8 @@ ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
>  
>  SYSTEM_VALUE(front_face, 1)
>  SYSTEM_VALUE(vertex_id, 1)
> +SYSTEM_VALUE(vertex_id_zero_base, 1)
> +SYSTEM_VALUE(base_vertex, 1)
>  SYSTEM_VALUE(instance_id, 1)
>  SYSTEM_VALUE(sample_id, 1)
>  SYSTEM_VALUE(sample_pos, 2)
> diff --git a/src/glsl/nir/nir_lower_system_values.c 
> b/src/glsl/nir/nir_lower_system_values.c
> index 328d4f1..a6eec65 100644
> --- a/src/glsl/nir/nir_lower_system_values.c
> +++ b/src/glsl/nir/nir_lower_system_values.c
> @@ -49,6 +49,12 @@ convert_instr(nir_intrinsic_instr *instr)
> case SYSTEM_VALUE_VERTEX_ID:
>op = nir_intrinsic_load_vertex_id;
>break;
> +   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
> +  op = nir_intrinsic_load_vertex_id_zero_base;
> +  break;
> +   case SYSTEM_VALUE_BASE_VERTEX:
> +  op = nir_intrinsic_load_base_vertex;
> +  break;
> case SYSTEM_VALUE_INSTANCE_ID:
>op = nir_intrinsic_load_instance_id;
>break;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] i965/nir: Lower to registers a bit later.

2015-03-11 Thread Ian Romanick
On 03/09/2015 01:58 AM, Kenneth Graunke wrote:
> We can't safely call nir_optimize() with register present, since several
> passes called in the loop can't handle registers, and will fail asserts.

Doesn't that mean this patch should go before patch 2?

> Notably, nir_lower_vec_alus() and nir_opt_algebraic() really don't want
> registers.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index fbdfc22..c5ed55c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -108,9 +108,6 @@ fs_visitor::emit_nir_code()
> nir_lower_io(nir);
> nir_validate_shader(nir);
>  
> -   nir_lower_locals_to_regs(nir);
> -   nir_validate_shader(nir);
> -
> nir_remove_dead_variables(nir);
> nir_validate_shader(nir);
>  
> @@ -125,6 +122,9 @@ fs_visitor::emit_nir_code()
>  
> nir_optimize(nir);
>  
> +   nir_lower_locals_to_regs(nir);
> +   nir_validate_shader(nir);
> +
> nir_lower_to_source_mods(nir);
> nir_validate_shader(nir);
> nir_copy_prop(nir);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix out-of-bounds accesses into pull_constant_loc array

2015-03-11 Thread Matt Turner
On Wed, Mar 11, 2015 at 10:10 AM, Ian Romanick  wrote:
> Given the age of this bug, should this be a candidate for 10.4 and 10.5
> stable branches?

I think so. I just sent a cherry-pick request for 10.4 and 10.5.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] swrast: remove _BLENDAPI

2015-03-11 Thread Ian Romanick
Series is

Reviewed-by: Ian Romanick 

One humorous observation below...

On 03/11/2015 09:49 AM, Brian Paul wrote:
> _BLENDAPI boils down to __cdecl on Windows, but __cdecl is the default
> calling convention so this serves no purpose.
> ---
>  src/mesa/swrast/s_blend.c   | 21 +
>  src/mesa/swrast/s_context.h |  8 
>  2 files changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/src/mesa/swrast/s_blend.c b/src/mesa/swrast/s_blend.c
> index 7cb1194..8479b0b 100644
> --- a/src/mesa/swrast/s_blend.c
> +++ b/src/mesa/swrast/s_blend.c
> @@ -48,9 +48,6 @@
>  #if defined(USE_MMX_ASM)
>  #include "x86/mmx.h"
>  #include "x86/common_x86_asm.h"
> -#define _BLENDAPI _ASMAPI
> -#else
> -#define _BLENDAPI
>  #endif
>  
>  
> @@ -69,7 +66,7 @@
>   * No-op means the framebuffer values remain unchanged.
>   * Any chanType ok.
>   */
> -static void _BLENDAPI
> +static void
>  blend_noop(struct gl_context *ctx, GLuint n, const GLubyte mask[],
> GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -97,7 +94,7 @@ blend_noop(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>   * Special case for glBlendFunc(GL_ONE, GL_ZERO)
>   * Any chanType ok.
>   */
> -static void _BLENDAPI
> +static void
>  blend_replace(struct gl_context *ctx, GLuint n, const GLubyte mask[],
>GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -117,7 +114,7 @@ blend_replace(struct gl_context *ctx, GLuint n, const 
> GLubyte mask[],
>   * Common transparency blending mode:
>   * glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA).
>   */
> -static void _BLENDAPI
> +static void
>  blend_transparency_ubyte(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>   GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -162,7 +159,7 @@ blend_transparency_ubyte(struct gl_context *ctx, GLuint 
> n, const GLubyte mask[],
>  }
>  
>  
> -static void _BLENDAPI
> +static void
>  blend_transparency_ushort(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -200,7 +197,7 @@ blend_transparency_ushort(struct gl_context *ctx, GLuint 
> n, const GLubyte mask[]
>  }
>  
>  
> -static void _BLENDAPI
> +static void
>  blend_transparency_float(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>   GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -242,7 +239,7 @@ blend_transparency_float(struct gl_context *ctx, GLuint 
> n, const GLubyte mask[],
>   * Add src and dest: glBlendFunc(GL_ONE, GL_ONE).
>   * Any chanType ok.
>   */
> -static void _BLENDAPI
> +static void
>  blend_add(struct gl_context *ctx, GLuint n, const GLubyte mask[],
>GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -308,7 +305,7 @@ blend_add(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>   * Blend min function.
>   * Any chanType ok.
>   */
> -static void _BLENDAPI
> +static void
>  blend_min(struct gl_context *ctx, GLuint n, const GLubyte mask[],
>GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -361,7 +358,7 @@ blend_min(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>   * Blend max function.
>   * Any chanType ok.
>   */
> -static void _BLENDAPI
> +static void
>  blend_max(struct gl_context *ctx, GLuint n, const GLubyte mask[],
>GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> @@ -415,7 +412,7 @@ blend_max(struct gl_context *ctx, GLuint n, const GLubyte 
> mask[],
>   * Modulate:  result = src * dest
>   * Any chanType ok.
>   */
> -static void _BLENDAPI
> +static void
>  blend_modulate(struct gl_context *ctx, GLuint n, const GLubyte mask[],
> GLvoid *src, const GLvoid *dst, GLenum chanType)
>  {
> diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h
> index d6fbc5d5..7cf0e30 100644
> --- a/src/mesa/swrast/s_context.h
> +++ b/src/mesa/swrast/s_context.h
> @@ -58,10 +58,10 @@ typedef void (*texture_sample_func)(struct gl_context 
> *ctx,
>  GLuint n, const GLfloat texcoords[][4],
>  const GLfloat lambda[], GLfloat 
> rgba[][4]);
>  
> -typedef void (_ASMAPIP blend_func)( struct gl_context *ctx, GLuint n,
> -const GLubyte mask[],
> -GLvoid *src, const GLvoid *dst,
> -GLenum chanType);

So... this code was already broken (or at least inconsistent) sometimes
since this would be _ASMAPIP even if _BLENDAPI was not _ASMAPI. :)

> +typedef void (*blend_func)(struct gl_context *ctx, GLuint n,
> +   const GLubyte mask[],
> +   GLvoid *src, const GLvoid *dst,
> +   GLenum chanType);
>  
>  typedef void (*swrast_point_func)( struct gl_context *ctx, const SWvertex *);
>  

___

Re: [Mesa-dev] [PATCH 01/13] i965: Factor out logic to build a send message instruction with indirect descriptor.

2015-03-11 Thread Pohjolainen, Topi
On Fri, Feb 27, 2015 at 05:34:44PM +0200, Francisco Jerez wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_eu.h   | 19 ++--
>  src/mesa/drivers/dri/i965/brw_eu_emit.c  | 58 
> ++--
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 55 +-
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 37 ---
>  4 files changed, 77 insertions(+), 92 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
> b/src/mesa/drivers/dri/i965/brw_eu.h
> index 1b954c8..9b1e0e2 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -205,11 +205,6 @@ void brw_set_sampler_message(struct brw_compile *p,
>   unsigned simd_mode,
>   unsigned return_format);
>  
> -void brw_set_indirect_send_descriptor(struct brw_compile *p,
> -  brw_inst *insn,
> -  unsigned sfid,
> -  struct brw_reg descriptor);
> -
>  void brw_set_dp_read_message(struct brw_compile *p,
>brw_inst *insn,
>unsigned binding_table_index,
> @@ -243,6 +238,20 @@ void brw_urb_WRITE(struct brw_compile *p,
>  unsigned offset,
>  unsigned swizzle);
>  
> +/**
> + * Send message to shared unit \p sfid with a possibly indirect descriptor \p
> + * desc.  If the descriptor is not an immediate it will be transparently
> + * loaded to an address register using an OR instruction that will be 
> returned
> + * to the caller so additional descriptor bits can be specified with the 
> usual
> + * brw_set_*_message() helper functions.
> + */
> +struct brw_inst *
> +brw_send_indirect_message(struct brw_compile *p,
> +  unsigned sfid,
> +  struct brw_reg dst,
> +  struct brw_reg payload,
> +  struct brw_reg desc);
> +
>  void brw_ff_sync(struct brw_compile *p,
>  struct brw_reg dest,
>  unsigned msg_reg_nr,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index e69840a..cd2ce92 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -751,21 +751,6 @@ brw_set_sampler_message(struct brw_compile *p,
> }
>  }
>  
> -void brw_set_indirect_send_descriptor(struct brw_compile *p,
> -  brw_inst *insn,
> -  unsigned sfid,
> -  struct brw_reg descriptor)
> -{
> -   /* Only a0.0 may be used as SEND's descriptor operand. */
> -   assert(descriptor.file == BRW_ARCHITECTURE_REGISTER_FILE);
> -   assert(descriptor.type == BRW_REGISTER_TYPE_UD);
> -   assert(descriptor.nr == BRW_ARF_ADDRESS);
> -   assert(descriptor.subnr == 0);
> -
> -   brw_set_message_descriptor(p, insn, sfid, 0, 0, false, false);
> -   brw_set_src1(p, insn, descriptor);
> -}
> -
>  static void
>  gen7_set_dp_scratch_message(struct brw_compile *p,
>  brw_inst *inst,
> @@ -2490,6 +2475,49 @@ void brw_urb_WRITE(struct brw_compile *p,
>  swizzle);
>  }
>  
> +struct brw_inst *
> +brw_send_indirect_message(struct brw_compile *p,
> +  unsigned sfid,
> +  struct brw_reg dst,
> +  struct brw_reg payload,
> +  struct brw_reg desc)
> +{
> +   const struct brw_context *brw = p->brw;
> +   struct brw_inst *send, *setup;
> +
> +   assert(desc.type == BRW_REGISTER_TYPE_UD);
> +
> +   if (desc.file == BRW_IMMEDIATE_VALUE) {
> +  setup = send = next_insn(p, BRW_OPCODE_SEND);
> +  brw_set_src1(p, send, desc);
> +
> +   } else {
> +  struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
> +
> +  brw_push_insn_state(p);
> +  brw_set_default_access_mode(p, BRW_ALIGN_1);
> +  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> +  brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> +
> +  /* Load the indirect descriptor to an address register using OR so the
> +   * caller can specify additional descriptor bits with the usual
> +   * brw_set_*_message() helper functions.
> +   */
> +  setup = brw_OR(p, addr, desc, brw_imm_ud(0));
> +
> +  brw_pop_insn_state(p);
> +
> +  send = next_insn(p, BRW_OPCODE_SEND);
> +  brw_set_src1(p, send, addr);
> +   }
> +
> +   brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
> +   brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
> +   brw_inst_set_sfid(brw, send, sfid);
> +
> +   return setup;
> +}
> +
>  static int
>  brw_find_next_block_end(struct brw_compile *p, int start_offset)
>  {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/dr

[Mesa-dev] [PATCH 2/3] mesa: remove _XFORMAPI

2015-03-11 Thread Brian Paul
---
 src/mesa/math/m_clip_tmp.h  |  8 +++
 src/mesa/math/m_norm_tmp.h  | 18 +++---
 src/mesa/math/m_xform.h | 36 +++-
 src/mesa/math/m_xform_tmp.h | 58 ++---
 4 files changed, 56 insertions(+), 64 deletions(-)

diff --git a/src/mesa/math/m_clip_tmp.h b/src/mesa/math/m_clip_tmp.h
index 45dec47..e289be7 100644
--- a/src/mesa/math/m_clip_tmp.h
+++ b/src/mesa/math/m_clip_tmp.h
@@ -40,7 +40,7 @@
  * \param andMask bitwise-AND of clipMask values
  * \return proj_vec pointer
  */
-static GLvector4f * _XFORMAPI TAG(cliptest_points4)( GLvector4f *clip_vec,
+static GLvector4f * TAG(cliptest_points4)( GLvector4f *clip_vec,
  GLvector4f *proj_vec,
  GLubyte clipMask[],
  GLubyte *orMask,
@@ -120,7 +120,7 @@ static GLvector4f * _XFORMAPI TAG(cliptest_points4)( 
GLvector4f *clip_vec,
  * \param andMask bitwise-AND of clipMask values
  * \return clip_vec pointer
  */
-static GLvector4f * _XFORMAPI TAG(cliptest_np_points4)( GLvector4f *clip_vec,
+static GLvector4f * TAG(cliptest_np_points4)( GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
@@ -177,7 +177,7 @@ static GLvector4f * _XFORMAPI TAG(cliptest_np_points4)( 
GLvector4f *clip_vec,
 }
 
 
-static GLvector4f * _XFORMAPI TAG(cliptest_points3)( GLvector4f *clip_vec,
+static GLvector4f * TAG(cliptest_points3)( GLvector4f *clip_vec,
  GLvector4f *proj_vec,
  GLubyte clipMask[],
  GLubyte *orMask,
@@ -213,7 +213,7 @@ static GLvector4f * _XFORMAPI TAG(cliptest_points3)( 
GLvector4f *clip_vec,
 }
 
 
-static GLvector4f * _XFORMAPI TAG(cliptest_points2)( GLvector4f *clip_vec,
+static GLvector4f * TAG(cliptest_points2)( GLvector4f *clip_vec,
  GLvector4f *proj_vec,
  GLubyte clipMask[],
  GLubyte *orMask,
diff --git a/src/mesa/math/m_norm_tmp.h b/src/mesa/math/m_norm_tmp.h
index c8fab0e..d3ec1c2 100644
--- a/src/mesa/math/m_norm_tmp.h
+++ b/src/mesa/math/m_norm_tmp.h
@@ -39,7 +39,7 @@
  *   optimization)
  * dest - the destination vector of normals
  */
-static void _XFORMAPI
+static void
 TAG(transform_normalize_normals)( const GLmatrix *mat,
   GLfloat scale,
   const GLvector4f *in,
@@ -106,7 +106,7 @@ TAG(transform_normalize_normals)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(transform_normalize_normals_no_rot)( const GLmatrix *mat,
  GLfloat scale,
  const GLvector4f *in,
@@ -171,7 +171,7 @@ TAG(transform_normalize_normals_no_rot)( const GLmatrix 
*mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(transform_rescale_normals_no_rot)( const GLmatrix *mat,
GLfloat scale,
const GLvector4f *in,
@@ -200,7 +200,7 @@ TAG(transform_rescale_normals_no_rot)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(transform_rescale_normals)( const GLmatrix *mat,
 GLfloat scale,
 const GLvector4f *in,
@@ -232,7 +232,7 @@ TAG(transform_rescale_normals)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(transform_normals_no_rot)( const GLmatrix *mat,
   GLfloat scale,
   const GLvector4f *in,
@@ -262,7 +262,7 @@ TAG(transform_normals_no_rot)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(transform_normals)( const GLmatrix *mat,
 GLfloat scale,
 const GLvector4f *in,
@@ -292,7 +292,7 @@ TAG(transform_normals)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(normalize_normals)( const GLmatrix *mat,
 GLfloat scale,
 const GLvector4f *in,
@@ -338,7 +338,7 @@ TAG(normalize_normals)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(rescale_normals)( const GLmatrix *mat,
   GLfloat scale,
   const GLvector4f *in,
@@ -361,7 +361,7 @@ TAG(rescale_normals)( const GLmatrix *mat,
 }
 
 
-static void _XFORMAPI
+static void
 TAG(init_c_norm_transform)( void )
 {
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
diff --git a/src/mesa/math/m_xform.h b/src/mesa/math/m_xfor

[Mesa-dev] [PATCH 1/3] swrast: remove _BLENDAPI

2015-03-11 Thread Brian Paul
_BLENDAPI boils down to __cdecl on Windows, but __cdecl is the default
calling convention so this serves no purpose.
---
 src/mesa/swrast/s_blend.c   | 21 +
 src/mesa/swrast/s_context.h |  8 
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/mesa/swrast/s_blend.c b/src/mesa/swrast/s_blend.c
index 7cb1194..8479b0b 100644
--- a/src/mesa/swrast/s_blend.c
+++ b/src/mesa/swrast/s_blend.c
@@ -48,9 +48,6 @@
 #if defined(USE_MMX_ASM)
 #include "x86/mmx.h"
 #include "x86/common_x86_asm.h"
-#define _BLENDAPI _ASMAPI
-#else
-#define _BLENDAPI
 #endif
 
 
@@ -69,7 +66,7 @@
  * No-op means the framebuffer values remain unchanged.
  * Any chanType ok.
  */
-static void _BLENDAPI
+static void
 blend_noop(struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -97,7 +94,7 @@ blend_noop(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
  * Special case for glBlendFunc(GL_ONE, GL_ZERO)
  * Any chanType ok.
  */
-static void _BLENDAPI
+static void
 blend_replace(struct gl_context *ctx, GLuint n, const GLubyte mask[],
   GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -117,7 +114,7 @@ blend_replace(struct gl_context *ctx, GLuint n, const 
GLubyte mask[],
  * Common transparency blending mode:
  * glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA).
  */
-static void _BLENDAPI
+static void
 blend_transparency_ubyte(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
  GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -162,7 +159,7 @@ blend_transparency_ubyte(struct gl_context *ctx, GLuint n, 
const GLubyte mask[],
 }
 
 
-static void _BLENDAPI
+static void
 blend_transparency_ushort(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
   GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -200,7 +197,7 @@ blend_transparency_ushort(struct gl_context *ctx, GLuint n, 
const GLubyte mask[]
 }
 
 
-static void _BLENDAPI
+static void
 blend_transparency_float(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
  GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -242,7 +239,7 @@ blend_transparency_float(struct gl_context *ctx, GLuint n, 
const GLubyte mask[],
  * Add src and dest: glBlendFunc(GL_ONE, GL_ONE).
  * Any chanType ok.
  */
-static void _BLENDAPI
+static void
 blend_add(struct gl_context *ctx, GLuint n, const GLubyte mask[],
   GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -308,7 +305,7 @@ blend_add(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
  * Blend min function.
  * Any chanType ok.
  */
-static void _BLENDAPI
+static void
 blend_min(struct gl_context *ctx, GLuint n, const GLubyte mask[],
   GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -361,7 +358,7 @@ blend_min(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
  * Blend max function.
  * Any chanType ok.
  */
-static void _BLENDAPI
+static void
 blend_max(struct gl_context *ctx, GLuint n, const GLubyte mask[],
   GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
@@ -415,7 +412,7 @@ blend_max(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
  * Modulate:  result = src * dest
  * Any chanType ok.
  */
-static void _BLENDAPI
+static void
 blend_modulate(struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *src, const GLvoid *dst, GLenum chanType)
 {
diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h
index d6fbc5d5..7cf0e30 100644
--- a/src/mesa/swrast/s_context.h
+++ b/src/mesa/swrast/s_context.h
@@ -58,10 +58,10 @@ typedef void (*texture_sample_func)(struct gl_context *ctx,
 GLuint n, const GLfloat texcoords[][4],
 const GLfloat lambda[], GLfloat rgba[][4]);
 
-typedef void (_ASMAPIP blend_func)( struct gl_context *ctx, GLuint n,
-const GLubyte mask[],
-GLvoid *src, const GLvoid *dst,
-GLenum chanType);
+typedef void (*blend_func)(struct gl_context *ctx, GLuint n,
+   const GLubyte mask[],
+   GLvoid *src, const GLvoid *dst,
+   GLenum chanType);
 
 typedef void (*swrast_point_func)( struct gl_context *ctx, const SWvertex *);
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] mesa: remove _ASMAPI, ASMAPIP

2015-03-11 Thread Brian Paul
---
 src/mesa/main/compiler.h| 19 ---
 src/mesa/swrast/s_context.c |  2 +-
 src/mesa/x86/3dnow.c|  6 +++---
 src/mesa/x86/common_x86.c   | 12 ++--
 src/mesa/x86/mmx.h  | 10 +-
 src/mesa/x86/sse.c  | 20 ++--
 src/mesa/x86/x86_xform.c|  8 
 src/mesa/x86/x86_xform.h| 30 +++---
 8 files changed, 44 insertions(+), 63 deletions(-)

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 6fded88..b9f808e 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -113,25 +113,6 @@ extern "C" {
 #define LE32_TO_CPU( x )   CPU_TO_LE32( x )
 
 
-
-/**
- * Create a macro so that asm functions can be linked into compilers other
- * than GNU C
- */
-#ifndef _ASMAPI
-#if defined(_WIN32)
-#define _ASMAPI __cdecl
-#else
-#define _ASMAPI
-#endif
-#ifdef PTR_DECL_IN_FRONT
-#define_ASMAPIP * _ASMAPI
-#else
-#define_ASMAPIP _ASMAPI *
-#endif
-#endif
-
-
 /**
  * LONGSTRING macro
  * gcc -pedantic warns about long string literals, LONGSTRING silences that.
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index 51cc227..ecde292 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -409,7 +409,7 @@ _swrast_validate_point( struct gl_context *ctx, const 
SWvertex *v0 )
  * Called via swrast->BlendFunc.  Examine GL state to choose a blending
  * function, then call it.
  */
-static void _ASMAPI
+static void
 _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
 GLvoid *src, const GLvoid *dst,
 GLenum chanType )
diff --git a/src/mesa/x86/3dnow.c b/src/mesa/x86/3dnow.c
index c46cfbc..df7c64f 100644
--- a/src/mesa/x86/3dnow.c
+++ b/src/mesa/x86/3dnow.c
@@ -47,20 +47,20 @@ DECLARE_XFORM_GROUP( 3dnow, 3 )
 DECLARE_XFORM_GROUP( 3dnow, 4 )
 
 
-extern void _ASMAPI
+extern void
 _mesa_v16_3dnow_general_xform( GLfloat *first_vert,
   const GLfloat *m,
   const GLfloat *src,
   GLuint src_stride,
   GLuint count );
 
-extern void _ASMAPI
+extern void
 _mesa_3dnow_project_vertices( GLfloat *first,
  GLfloat *last,
  const GLfloat *m,
  GLuint stride );
 
-extern void _ASMAPI
+extern void
 _mesa_3dnow_project_clipped_vertices( GLfloat *first,
  GLfloat *last,
  const GLfloat *m,
diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c
index 14b497d..86fbca9 100644
--- a/src/mesa/x86/common_x86.c
+++ b/src/mesa/x86/common_x86.c
@@ -68,12 +68,12 @@ static int detection_debug = GL_FALSE;
 
 /* No reason for this to be public.
  */
-extern GLuint  _ASMAPI _mesa_x86_has_cpuid(void);
-extern void_ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint 
*reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
-extern GLuint  _ASMAPI _mesa_x86_cpuid_eax(GLuint op);
-extern GLuint  _ASMAPI _mesa_x86_cpuid_ebx(GLuint op);
-extern GLuint  _ASMAPI _mesa_x86_cpuid_ecx(GLuint op);
-extern GLuint  _ASMAPI _mesa_x86_cpuid_edx(GLuint op);
+extern GLuint _mesa_x86_has_cpuid(void);
+extern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, 
GLuint *reg_ecx, GLuint *reg_edx);
+extern GLuint _mesa_x86_cpuid_eax(GLuint op);
+extern GLuint _mesa_x86_cpuid_ebx(GLuint op);
+extern GLuint _mesa_x86_cpuid_ecx(GLuint op);
+extern GLuint _mesa_x86_cpuid_edx(GLuint op);
 
 
 #if defined(USE_SSE_ASM)
diff --git a/src/mesa/x86/mmx.h b/src/mesa/x86/mmx.h
index 8101cf8..0c35490 100644
--- a/src/mesa/x86/mmx.h
+++ b/src/mesa/x86/mmx.h
@@ -31,27 +31,27 @@
 
 struct gl_context;
 
-extern void _ASMAPI
+extern void
 _mesa_mmx_blend_transparency( struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
   GLvoid *rgba, const GLvoid *dest,
   GLenum chanType );
 
-extern void _ASMAPI
+extern void
 _mesa_mmx_blend_add( struct gl_context *ctx, GLuint n, const GLubyte mask[],
  GLvoid *rgba, const GLvoid *dest,
  GLenum chanType );
 
-extern void _ASMAPI
+extern void
 _mesa_mmx_blend_min( struct gl_context *ctx, GLuint n, const GLubyte mask[],
  GLvoid *rgba, const GLvoid *dest,
  GLenum chanType );
 
-extern void _ASMAPI
+extern void
 _mesa_mmx_blend_max( struct gl_context *ctx, GLuint n, const GLubyte mask[],
  GLvoid *rgba, const GLvoid *dest,
  GLenum chanType );
 
-extern void _ASMAPI
+extern void
 _mesa_mmx_blend_modulate( struct gl_context *ctx, GLuint n, const GLubyte 
mask[],
   GLvoid *rgba, const GLvoid *dest,
   GLenum chanType );
diff --git a/src/mesa/x86/sse.c b/src/mesa/x86/sse.c

Re: [Mesa-dev] [PATCH 5/6] mesa: replace _ASMAPI with __cdecl

2015-03-11 Thread Brian Paul

On 03/11/2015 08:21 AM, Jose Fonseca wrote:

On 11/03/15 14:07, Brian Paul wrote:

On 03/11/2015 01:29 AM, Jose Fonseca wrote:

I don't know the story about this _ASMAPI macro, but __cdecl is also the
default calling convention for WIN32:

   https://msdn.microsoft.com/en-us/library/zkwh89ks.aspx


Yeah, I had read that too actually but I figured it was safer to keep
things as-is in case there was more to it than met the eye.



so it's redundant to add it.  (The other common calling convention --
__stdcall/APIENTRY/etc -- is the one that must always be explcitely
added when needed).

So we should just drop _ASMAPI/__cdecl completely.


Note that we have something similar in gallium (grep PIPE_CDECL).  Maybe
that could be removed too.


I suspect that's because we copied  src/mesa/x86/rtasm ->
src/gallium/auxiliary/rtasm .

(BTW, it would be a nice beginner project to replace translate_sse to
use LLVM and get rid of src/gallium/auxiliary/rtasm )


Anyway, I'll redo this patch series (rm all __cdecl) and do some testing
on Windows.


Heh, since we removed the old swrast-based Windows gdi driver, I believe 
that none of functions that were tagged with __cdecl (mesa/math/ and 
swrast/) are used on Windows now anyway.


New patch series coming...

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Statically linking libstdc++ and libgcc

2015-03-11 Thread Ian Romanick
On 03/11/2015 09:32 AM, Jose Fonseca wrote:
> I can almost bet that NVIDIA uses C++ somewhere, but no dependencies on
> libstdc++.  Not sure if they chose to statically link libstdc++ to
> support multiple distros, or to avoid issues like applications shipping
> their own libstdc++...

My recollection from talking to be people is that they use a subset of
C++ that prevents them from needing to link with libstc++.  Use of LLVM
prevents much of Mesa from using that option.

> Jose
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Statically linking libstdc++ and libgcc

2015-03-11 Thread Ian Romanick
On 03/11/2015 09:31 AM, Tobias Klausmann wrote:
> The problem in not forcing this to link statically is, that if a
> distribution decides to not use this static option, the problem persists
> on that distribution. On top every lib pulled in by steam from the
> system would need to be link statically, like mesa. Instead of fixing
> every lib steam pulls in (how many are there?), fix the steam runtime to

Yeah, static linking is a terrible, partial solution.

> a) not ship libstdc++.so and libgcc_s.so and declare older version of
> these libs as not supported (thats what people do when they face the
> incompatibility problem with steams versions of these libs: just delete
> them from the runtime and everything is fine)

Let's be 100% clear.  This is NOT an option for Steam.  They ship
thousands of closed-source applications.  These applications were built
and tested against specific versions of specific libraries.  Removing
support for old run-times is equivalent to removing support for those
applications.  They can't tell their customers, "You upgraded your
distro, so that game you paid money for no longer works.  Tough break, kid."

Forcing an application to use a newer run-time may work over the short
term, but do you think that will still work in 5 years?  It's possible,
but it seems unlikely.  And "seems to work" is not the same as "works
all the way through the whole game every time."

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Statically linking libstdc++ and libgcc

2015-03-11 Thread Tobias Klausmann



On 11.03.2015 17:01, Francisco Jerez wrote:

Vivek Dasmohapatra  writes:


Hi -

Hi,


As you probably already know, there can only be one version of
libstdc++.so in your runtime link chain

That's a common misconception, in principle several versions of
libstdc++.so with different DT_SONAME (i.e. with mutually incompatible
ABIs) can be loaded at the same time, the GNU C++ runtime uses library
and symbol versioning to achieve this.  Of course two different versions
with the same DT_SONAME cannot coexist, but that's not supposed to pose
a problem because applications linked to the earlier version are
expected to be forwards-compatible with more recent minor versions of
the standard library.  See [1].


This is usually not a problem, but when things are linked against the
Steam runtime (for example), they can end up with two - one from the
steam runtime, and one pulled in via the mesa dri libs from the
OS/distribution.


Last time I looked into this, it seemed to be a logical consequence of
the inconsistency of the Steam runtime overriding system libraries: If
they go as far as to override the operating system's copy of the C++
standard library and runtime with an outdated version (even though the
OS libraries with the same DT_SONAME are expected to be
backwards-compatible with Steam's), they should be shipping their own
builds of libGL and DRI drivers as well.


In order to address this, Valve asked Collabora to look at enabling
mesa linking with libstdcc+.a/libgcc.a/libgcc_eh.a instead of
listdcc++so and libgcc_s.so.

I think I've figured out a minimally intrusive way to do this, working
around the fact that libtool really, really wants to link those in
if C++ is involved.

I've broken this up into a couple of pieces:

The first patch attached gets libtool to not link in the .so
files in question: It's pretty small - it doesn't introduce a
configure flag to control this behaviour, but I would be happy
to adapt it to do so if that's considered a better approach.


Honestly, I think that statically linking the C++ runtime is a hack, and
it should be an opt-in based on some configure option if we want to
support it at all.



The problem in not forcing this to link statically is, that if a 
distribution decides to not use this static option, the problem persists 
on that distribution. On top every lib pulled in by steam from the 
system would need to be link statically, like mesa. Instead of fixing 
every lib steam pulls in (how many are there?), fix the steam runtime to
a) not ship libstdc++.so and libgcc_s.so and declare older version of 
these libs as not supported (thats what people do when they face the 
incompatibility problem with steams versions of these libs: just delete 
them from the runtime and everything is fine)

b) ship own mesa and libdrm drivers


The second and third extend this a little further: Patch #3
is actually to llvm, and builds a parallel libLLVM-X.Y-nostdlib.so
which is likewise linked with libstdc++.a et al instead of .so,
and patch #2 makes the mesa build system use said library if it
is available.

So: Would mesa be interested in patches #1 and/or #2?
If not, is there something I could do to make the patches
more palatable, or some other approach that you'd prefer?

( I'm aware of the configure flags to statically link against
libLLVM, but when I tried it with llvm-3.5 and a git checkout
of mesa 10.6-dev I got link errors, hence the LLVM patchset. )

[1] https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html


[...]


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/worklist: Don't change the start index when computing the tail index

2015-03-11 Thread Mark Janes
Reviewed-by: Mark Janes 

Jason Ekstrand  writes:

> ---
>  src/glsl/nir/nir_worklist.c | 10 +-
>  src/glsl/nir/nir_worklist.h |  4 ++--
>  2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/src/glsl/nir/nir_worklist.c b/src/glsl/nir/nir_worklist.c
> index a8baae9..3087a1d 100644
> --- a/src/glsl/nir/nir_worklist.c
> +++ b/src/glsl/nir/nir_worklist.c
> @@ -82,7 +82,7 @@ nir_block_worklist_push_head(nir_block_worklist *w, 
> nir_block *block)
>  }
>  
>  nir_block *
> -nir_block_worklist_peek_head(nir_block_worklist *w)
> +nir_block_worklist_peek_head(const nir_block_worklist *w)
>  {
> assert(w->count > 0);
>  
> @@ -114,18 +114,18 @@ nir_block_worklist_push_tail(nir_block_worklist *w, 
> nir_block *block)
>  
> w->count++;
>  
> -   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
> +   unsigned tail = (w->start + w->count - 1) % w->size;
>  
> w->blocks[tail] = block;
> BITSET_SET(w->blocks_present, block->index);
>  }
>  
>  nir_block *
> -nir_block_worklist_peek_tail(nir_block_worklist *w)
> +nir_block_worklist_peek_tail(const nir_block_worklist *w)
>  {
> assert(w->count > 0);
>  
> -   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
> +   unsigned tail = (w->start + w->count - 1) % w->size;
>  
> return w->blocks[tail];
>  }
> @@ -135,7 +135,7 @@ nir_block_worklist_pop_tail(nir_block_worklist *w)
>  {
> assert(w->count > 0);
>  
> -   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
> +   unsigned tail = (w->start + w->count - 1) % w->size;
>  
> w->count--;
>  
> diff --git a/src/glsl/nir/nir_worklist.h b/src/glsl/nir/nir_worklist.h
> index d5a8568..829bff2 100644
> --- a/src/glsl/nir/nir_worklist.h
> +++ b/src/glsl/nir/nir_worklist.h
> @@ -74,13 +74,13 @@ nir_block_worklist_is_empty(const nir_block_worklist *w)
>  
>  void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block);
>  
> -nir_block *nir_block_worklist_peek_head(nir_block_worklist *w);
> +nir_block *nir_block_worklist_peek_head(const nir_block_worklist *w);
>  
>  nir_block *nir_block_worklist_pop_head(nir_block_worklist *w);
>  
>  void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block);
>  
> -nir_block *nir_block_worklist_peek_tail(nir_block_worklist *w);
> +nir_block *nir_block_worklist_peek_tail(const nir_block_worklist *w);
>  
>  nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w);
>  
> -- 
> 2.3.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Defer the throttle until we submit new commands

2015-03-11 Thread Ian Romanick
I've sent my substantive feedback in other messages in the thread.
Below is just some nitpicky formatting kinds of stuff.

On 03/11/2015 05:36 AM, Chris Wilson wrote:
> Currently, we throttle before the user begins preparing commands for the
> next frame when we acquire the draw/read buffers. However, construction
> of the command buffer can itself take significant time relative to the
> frame time. If we move the throttle from the buffer acquire to the
> command submit phase we can allow the user to improve concurrency
> between the CPU and GPU (i.e. reduce the amount of time we waste inside
> the throttle).
> 
> Signed-off-by: Chris Wilson 
> Cc: Daniel Vetter 
> Cc: Kenneth Graunke 
> Cc: Ben Widawsky 
> Cc: Kristian Høgsberg 
> Cc: Chad Versace 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c   | 34 --
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 41 
> +++
>  2 files changed, 41 insertions(+), 34 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 8257fb6..88685cd 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -1231,40 +1231,6 @@ intel_prepare_render(struct brw_context *brw)
>  */
> if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
>brw->front_buffer_dirty = true;
> -
> -   /* Wait for the swapbuffers before the one we just emitted, so we
> -* don't get too many swaps outstanding for apps that are GPU-heavy
> -* but not CPU-heavy.
> -*
> -* We're using intelDRI2Flush (called from the loader before
> -* swapbuffer) and glFlush (for front buffer rendering) as the
> -* indicator that a frame is done and then throttle when we get
> -* here as we prepare to render the next frame.  At this point for
> -* round trips for swap/copy and getting new buffers are done and
> -* we'll spend less time waiting on the GPU.
> -*
> -* Unfortunately, we don't have a handle to the batch containing
> -* the swap, and getting our hands on that doesn't seem worth it,
> -* so we just us the first batch we emitted after the last swap.
> -*/
> -   if (brw->need_swap_throttle && brw->throttle_batch[0]) {
> -  if (brw->throttle_batch[1]) {
> - if (!brw->disable_throttling)
> -drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
> - drm_intel_bo_unreference(brw->throttle_batch[1]);
> -  }
> -  brw->throttle_batch[1] = brw->throttle_batch[0];
> -  brw->throttle_batch[0] = NULL;
> -  brw->need_swap_throttle = false;
> -  /* Throttling here is more precise than the throttle ioctl, so skip it 
> */
> -  brw->need_flush_throttle = false;
> -   }
> -
> -   if (brw->need_flush_throttle) {
> -  __DRIscreen *psp = brw->intelScreen->driScrnPriv;
> -  drmCommandNone(psp->fd, DRM_I915_GEM_THROTTLE);
> -  brw->need_flush_throttle = false;
> -   }
>  }
>  
>  /**
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 87862cd..8d7741b 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -33,6 +33,9 @@
>  #include "intel_fbo.h"
>  #include "brw_context.h"
>  
> +#include 
> +#include 
> +
>  static void
>  intel_batchbuffer_reset(struct brw_context *brw);
>  
> @@ -226,6 +229,43 @@ brw_finish_batch(struct brw_context *brw)
> brw->cache.bo_used_by_gpu = true;
>  }
>  
> +static void throttle(struct brw_context *brw)

static void
throttle(struct brw_context *brw)

(For other readers: This is so you can use 'git grep ^throttle' to find
a function definition.)

> +{
> +   /* Wait for the swapbuffers before the one we just emitted, so we
> +* don't get too many swaps outstanding for apps that are GPU-heavy
> +* but not CPU-heavy.
> +*
> +* We're using intelDRI2Flush (called from the loader before
> +* swapbuffer) and glFlush (for front buffer rendering) as the
> +* indicator that a frame is done and then throttle when we get
> +* here as we prepare to render the next frame.  At this point for
> +* round trips for swap/copy and getting new buffers are done and
> +* we'll spend less time waiting on the GPU.
> +*
> +* Unfortunately, we don't have a handle to the batch containing
> +* the swap, and getting our hands on that doesn't seem worth it,
> +* so we just us the first batch we emitted after the last swap.
^^ use

> +*/
> +   if (brw->need_swap_throttle && brw->throttle_batch[0]) {
> +  if (brw->throttle_batch[1]) {
> + if (!brw->disable_throttling)
> +drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
> + drm_intel_bo_unreference(brw->throttle_batch[1]);
> +  }
> +  brw->throttle_batch[1] = brw->throttle_batch[0];
> +  brw->throttle_batch[0] = NULL;
> +  brw->need_swap_t

Re: [Mesa-dev] [PATCH 1/2] i965: Throttle rendering to an fbo

2015-03-11 Thread Ian Romanick
I've sent my substantive feedback in other messages in the thread.
Below is just some nitpicky formatting kinds of stuff.

On 03/06/2015 01:58 PM, Chris Wilson wrote:
> When rendering to an fbo, even though it may be acting as a winsys
> frontbuffer or just generally, we never throttle. However, when rendering
> to an fbo, there is no natural frame boundary. Conventionally we use
> SwapBuffers and glFinish, but potential callers avoid often glFinish for
> being too heavy handed (waiting on all outstanding rendering to complete).
> The kernel provides a soft-throttling option for this case that waits for
> rendering older than 20ms to be complete (that's a little too lax to be
> used for swapbuffers, but is here a useful safety net). The remaining
> choice is then either never to throttle, throttle after every draw call,
> or at after intermediate user defined point such as glFlush and thus all the
> implied flushes. This patch opts for the latter as that is the current
> method used for flushing to front buffers.
> 
> v2: Defer the throttling from inside the flush to the next
> intel_prepare_render() and switch non-fbo frontbuffer throttling over to
> use the same lax method. The issuing being that
> glFlush()/intel_prepare_read() is just as likely to be called inside a
> tight loop and not at "frame" boundaries.
> 
> v3: Rename from need_front_throttle to need_flush_throttle to avoid any
> ambiguity between front buffer rendering and fbo rendering. (Chad)
> 
> Signed-off-by: Chris Wilson 
> Cc: Daniel Vetter 
> Cc: Kenneth Graunke 
> Cc: Ben Widawsky 
> Cc: Kristian Høgsberg 
> Cc: Chad Versace 
> Reviewed-by: Chad Versace 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c  | 16 
>  src/mesa/drivers/dri/i965/brw_context.h  | 12 +++-
>  src/mesa/drivers/dri/i965/intel_screen.c |  8 
>  3 files changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 972e458..bfda55f 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -232,8 +232,8 @@ intel_glFlush(struct gl_context *ctx)
>  
> intel_batchbuffer_flush(brw);
> intel_flush_front(ctx);
> -   if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
> -  brw->need_throttle = true;
> +
> +   brw->need_flush_throttle = true;
>  }
>  
>  static void
> @@ -1238,12 +1238,20 @@ intel_prepare_render(struct brw_context *brw)
>  * the swap, and getting our hands on that doesn't seem worth it,
>  * so we just us the first batch we emitted after the last swap.
>  */
> -   if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
> +   if (brw->need_swap_throttle && brw->first_post_swapbuffers_batch) {
>if (!brw->disable_throttling)
>   drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
>drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
>brw->first_post_swapbuffers_batch = NULL;
> -  brw->need_throttle = false;
> +  brw->need_swap_throttle = false;
> +  /* Throttling here is more precise than the throttle ioctl, so skip it 
> */
> +  brw->need_flush_throttle = false;
> +   }
> +
> +   if (brw->need_flush_throttle) {
> +  __DRIscreen *psp = brw->intelScreen->driScrnPriv;
> +  drmCommandNone(psp->fd, DRM_I915_GEM_THROTTLE);
> +  brw->need_flush_throttle = false;
> }
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 682fbe9..7854300 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1031,7 +1031,17 @@ struct brw_context
>  
> /** Framerate throttling: @{ */
> drm_intel_bo *first_post_swapbuffers_batch;
> -   bool need_throttle;

Blank line here.

> +   /* Limit the number of outstanding SwapBuffers by waiting for an earlier
> +* frame of rendering to complete. This gives a very precise cap to the
> +* latency between input and output such that rendering never gets more
> +* than a frame behind the user. (With the caveat that we technically are
> +* not using the SwapBuffers itself as a barrier but the first batch
> +* submitted afterwards, which may be immediately prior to the next
> +* SwapBuffers.)
> +*/
> +   bool need_swap_throttle;
> +   /** General throttling, not caught by throttling between SwapBuffers */
> +   bool need_flush_throttle;
> /** @} */
>  
> GLuint stats_wm;
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index cea7ddf..3640b67 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -174,10 +174,10 @@ intel_dri2_flush_with_flags(__DRIcontext *cPriv,
> if (flags & __DRI2_FLUSH_DRAWABLE)
>intel_resolve_for_dri2_flush(brw, dPriv);
>  
> -   if (reason == __DRI2_THROTTLE_SWAPBUFFER ||
> 

Re: [Mesa-dev] [PATCH] i965: Throttle rendering to an fbo

2015-03-11 Thread Ian Romanick
On 03/06/2015 06:30 AM, Daniel Vetter wrote:
> On Thu, Mar 05, 2015 at 02:38:44PM -0800, Ian Romanick wrote:
>> On 03/04/2015 10:28 AM, Chad Versace wrote:
>>> That text does not appear in the GL spec. When I read the manpage alongside
>>> the GL spec, to get a more complete context, I think the manpage contains
>>> that phrase simply to contrast with glFinish. In my reading, it does not 
>>> imply that
>>> glFlush may wait for *some* previously issued GL commands to complete.
>>
>> glFlush was invented to support indirect rendering (especially to the
>> front buffer):  it flushes the buffer in libGL to the xserver.  If
>> you're making any other assumptions about what it does or does not do...
>> continue at your own peril.
> 
> Well plan B is the kernel throttles for you. But that's going to cause a
> fuzz since at least for benchmarking the kernel kinda lacks the necessary
> information to make informed calls about when to throttle and how.

Based on my (possibly flawed) understanding of the kernel commit message
the Chris previously cited, we only need to throttle when not throttling
would cause a problem.  Is that correct?  Based on the rest of my
understanding, Mesa doesn't have the right information either.

> So is there no gl entry point in mesa we can abuse and make this happen?
> Citing the spec doesn't make the real world issue go away.

I think we're at a place where real worlds collide.

There are a lot of poorly written apps out there.  Much of it is the
"beat on it until it works" syndrome.  This results in a lot of glFlush
abuse.  This is part of the reason that drivers for deferred rendering
GPUs generally just ignore glFlush.  Unnecessary flushes REALLY hurt
performance on tilers, but they don't matter so much on immediate mode
renderers.

Applications do FBO rendering for a lot of things that don't end up on
the screen.  For example, some applications will load images from disk
and use FBO rendering to generate the mipmaps.  Applications also do
varying amounts of FBO rendering for pre-passes, shadow map generation,
etc.  A few applications also do FBO rendering as a sort of poor man's
compute shader.

Now we have the collision.  You have an game that is going to do a few
thousand draws to FBOs to generate mipmaps at start-up with a bunch of
(probably unnecessary) glFlush calls sprinkled around.  Now somebody
gets a bug report that the game takes too long to load because it's
getting throttled.

I think throttling submission in Mesa is the right place, but I think we
need additional information to know when it is necessary.

> -Daniel

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa: move fpclassify work-arounds into c99_math.h

2015-03-11 Thread Jose Fonseca

On 11/03/15 15:48, Brian Paul wrote:

On 03/11/2015 08:20 AM, Brian Paul wrote:

On 03/11/2015 01:23 AM, Jose Fonseca wrote:

On 11/03/15 01:41, Brian Paul wrote:

---
  include/c99_math.h  | 52
+
  src/mesa/main/querymatrix.c | 51
+---
  2 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/include/c99_math.h b/include/c99_math.h
index 0a49950..f1a6685 100644
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -161,4 +161,56 @@ llrintf(float f)
  #endif


+#if defined(fpclassify)
+/* ISO C99 says that fpclassify is a macro.  Assume that any
implementation
+ * of fpclassify, whether it's in a C99 compiler or not, will be a
macro.
+ */
+#elif defined(__cplusplus)
+/* For C++, fpclassify() should be defined in  */


Does MSVC's cmath implement fpclassify?  If not this #elif clause should
be moved after MSC_VER clause.


Looks like the answer is "no".


Actually, I take that back.  This patch works as-is on MSVC.  I also
added an fpclassify() test in a .cpp file and it builds too (both MSVC
and gcc).

The MSVC header #includes files are hard to follow, but somewhere,
fpclassify() and the FP_* tokens are getting defined when compiling C++
code.


It looks like fpclassify is part of C++11 -- 
http://en.cppreference.com/w/cpp/numeric/math/fpclassify  -- and MSVC 
has much better C++11 complaince than C99.


I doubt that the same is true for MSVC 2008 though, as C++11 was first 
added in 2012.


So we might need

  [...]
  #elif defined(__cplusplus) && (!defined(_MSC_VER) || _MSC_VER >= 1700)
  /* For C++11, fpclassify() should be defined in  */
  #elif defined(_MSC_VER)
  [...]

Anyway I doubt this will be enough...

The problem is that for C++11 fpclassify is a function inside std 
namespace  and to get it one needs to include  . But if there's a 
`include ` first the std::fpclassify is never defined. 
Furthermore certain system headers include / internally, 
so often apps don't even have full control over which gets included when.


Honestly, whoever thought of reusing C99 macros and functions in C++ std 
namespace did a huge disservice to everybody. :(



Mesa is not using C++11 by defaut on GCC, which is why it gets away. 
MSVC 2013 enables C++11 support by default.  I recently hit this issue 
(with isnan/isinf and not fpclassify) on Apitrace after enabling C++11 
support [1].



Anyway I don't want to make this any harder for you.  I think it's fine 
for you to commit whatever you have at the moment, and deal with the 
problems later when we actually hit them.  Just warning  that 
fpclassify/isinf/isnan and C++ business will probably come up again (and 
again)...




If I move the  #elif defined(__cplusplus) case after the #elif
defined(_MSC_VER) case then things blow up.

I'll repost the patch with just the #error change.



[1] 
https://github.com/apitrace/apitrace/commit/fc740d702d3a1369603d08444ec8809d1a109160

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa: move fpclassify work-arounds into c99_math.h

2015-03-11 Thread Brian Paul

On 03/11/2015 08:20 AM, Brian Paul wrote:

On 03/11/2015 01:23 AM, Jose Fonseca wrote:

On 11/03/15 01:41, Brian Paul wrote:

---
  include/c99_math.h  | 52
+
  src/mesa/main/querymatrix.c | 51
+---
  2 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/include/c99_math.h b/include/c99_math.h
index 0a49950..f1a6685 100644
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -161,4 +161,56 @@ llrintf(float f)
  #endif


+#if defined(fpclassify)
+/* ISO C99 says that fpclassify is a macro.  Assume that any
implementation
+ * of fpclassify, whether it's in a C99 compiler or not, will be a
macro.
+ */
+#elif defined(__cplusplus)
+/* For C++, fpclassify() should be defined in  */


Does MSVC's cmath implement fpclassify?  If not this #elif clause should
be moved after MSC_VER clause.


Looks like the answer is "no".


Actually, I take that back.  This patch works as-is on MSVC.  I also 
added an fpclassify() test in a .cpp file and it builds too (both MSVC 
and gcc).


The MSVC header #includes files are hard to follow, but somewhere, 
fpclassify() and the FP_* tokens are getting defined when compiling C++ 
code.


If I move the  #elif defined(__cplusplus) case after the #elif 
defined(_MSC_VER) case then things blow up.


I'll repost the patch with just the #error change.

-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v1] vbo: improve the code style by adjust the preprocessing c code directives

2015-03-11 Thread Brian Paul

Looks good.  I'll push this for you.

-Brian

On 03/11/2015 03:25 AM, marius.pre...@intel.com wrote:

From: Marius Predut 

Brian Paul review suggestion: there's more macro use here than necessary.
Removed and redefine some #define preprocessing directives.
Removed the directive input parameter 'T' .
No functional changes.

Signed-off-by: Marius Predut 
Reviewed-by: Brian Paul 
---
  src/mesa/vbo/vbo_attrib_tmp.h |   74 ++---
  src/mesa/vbo/vbo_exec_api.c   |2 +-
  2 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h
index b1c3d98..17e0578 100644
--- a/src/mesa/vbo/vbo_attrib_tmp.h
+++ b/src/mesa/vbo/vbo_attrib_tmp.h
@@ -30,35 +30,30 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.


  /* ATTR */
-#define ATTR( A, N, T, V0, V1, V2, V3 ) \
-ATTR_##T((A), (N), (T), (V0), (V1), (V2), (V3))
-
-#define ATTR_GL_UNSIGNED_INT( A, N, T, V0, V1, V2, V3 ) \
-ATTR_UNION(A, N, T, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \
-UINT_AS_UNION(V2), UINT_AS_UNION(V3))
-#define ATTR_GL_INT( A, N, T, V0, V1, V2, V3 ) \
-ATTR_UNION(A, N, T, INT_AS_UNION(V0), INT_AS_UNION(V1), \
+#define ATTRI( A, N, V0, V1, V2, V3 ) \
+ATTR_UNION(A, N, GL_INT, INT_AS_UNION(V0), INT_AS_UNION(V1), \
  INT_AS_UNION(V2), INT_AS_UNION(V3))
-#define ATTR_GL_FLOAT( A, N, T, V0, V1, V2, V3 ) \
-ATTR_UNION(A, N, T, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\
+#define ATTRUI( A, N, V0, V1, V2, V3 ) \
+ATTR_UNION(A, N, GL_UNSIGNED_INT, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \
+UINT_AS_UNION(V2), UINT_AS_UNION(V3))
+#define ATTRF( A, N, V0, V1, V2, V3 ) \
+ATTR_UNION(A, N, GL_FLOAT, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\
  FLOAT_AS_UNION(V2), FLOAT_AS_UNION(V3))


  /* float */
-#define ATTR1FV( A, V ) ATTR( A, 1, GL_FLOAT, (V)[0], 0, 0, 1 )
-#define ATTR2FV( A, V ) ATTR( A, 2, GL_FLOAT, (V)[0], (V)[1], 0, 1 )
-#define ATTR3FV( A, V ) ATTR( A, 3, GL_FLOAT, (V)[0], (V)[1], (V)[2], 1 )
-#define ATTR4FV( A, V ) ATTR( A, 4, GL_FLOAT, (V)[0], (V)[1], (V)[2], (V)[3] )
+#define ATTR1FV( A, V ) ATTRF( A, 1, (V)[0], 0, 0, 1 )
+#define ATTR2FV( A, V ) ATTRF( A, 2, (V)[0], (V)[1], 0, 1 )
+#define ATTR3FV( A, V ) ATTRF( A, 3, (V)[0], (V)[1], (V)[2], 1 )
+#define ATTR4FV( A, V ) ATTRF( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )

-#define ATTR1F( A, X )  ATTR( A, 1, GL_FLOAT, X, 0, 0, 1 )
-#define ATTR2F( A, X, Y )   ATTR( A, 2, GL_FLOAT, X, Y, 0, 1 )
-#define ATTR3F( A, X, Y, Z )ATTR( A, 3, GL_FLOAT, X, Y, Z, 1 )
-#define ATTR4F( A, X, Y, Z, W ) ATTR( A, 4, GL_FLOAT, X, Y, Z, W )
+#define ATTR1F( A, X )  ATTRF( A, 1, X, 0, 0, 1 )
+#define ATTR2F( A, X, Y )   ATTRF( A, 2, X, Y, 0, 1 )
+#define ATTR3F( A, X, Y, Z )ATTRF( A, 3, X, Y, Z, 1 )
+#define ATTR4F( A, X, Y, Z, W ) ATTRF( A, 4, X, Y, Z, W )

-/* int */
-#define ATTRI( A, N, X, Y, Z, W) ATTR( A, N, GL_INT, \
-   X, Y, Z, W )

+/* int */
  #define ATTR2IV( A, V ) ATTRI( A, 2, (V)[0], (V)[1], 0, 1 )
  #define ATTR3IV( A, V ) ATTRI( A, 3, (V)[0], (V)[1], (V)[2], 1 )
  #define ATTR4IV( A, V ) ATTRI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )
@@ -70,9 +65,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.


  /* uint */
-#define ATTRUI( A, N, X, Y, Z, W) ATTR( A, N, GL_UNSIGNED_INT, \
-X, Y, Z, W )
-
  #define ATTR2UIV( A, V ) ATTRUI( A, 2, (V)[0], (V)[1], 0, 1 )
  #define ATTR3UIV( A, V ) ATTRUI( A, 3, (V)[0], (V)[1], (V)[2], 1 )
  #define ATTR4UIV( A, V ) ATTRUI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )
@@ -82,7 +74,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define ATTR3UI( A, X, Y, Z )ATTRUI( A, 3, X, Y, Z, 1 )
  #define ATTR4UI( A, X, Y, Z, W ) ATTRUI( A, 4, X, Y, Z, W )

-#define MAT_ATTR( A, N, V ) ATTR( A, N, GL_FLOAT, (V)[0], (V)[1], (V)[2], 
(V)[3] )
+#define MAT_ATTR( A, N, V ) ATTRF( A, N, (V)[0], (V)[1], (V)[2], (V)[3] )

  static inline float conv_ui10_to_norm_float(unsigned ui10)
  {
@@ -94,20 +86,20 @@ static inline float conv_ui2_to_norm_float(unsigned ui2)
 return ui2 / 3.0f;
  }

-#define ATTRUI10_1( A, UI ) ATTR( A, 1, GL_FLOAT, (UI) & 0x3ff, 0, 0, 1 )
-#define ATTRUI10_2( A, UI ) ATTR( A, 2, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 
0x3ff, 0, 1 )
-#define ATTRUI10_3( A, UI ) ATTR( A, 3, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 
((UI) >> 20) & 0x3ff, 1 )
-#define ATTRUI10_4( A, UI ) ATTR( A, 4, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) 
>> 20) & 0x3ff, ((UI) >> 30) & 0x3 )
+#define ATTRUI10_1( A, UI ) ATTRF( A, 1, (UI) & 0x3ff, 0, 0, 1 )
+#define ATTRUI10_2( A, UI ) ATTRF( A, 2, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 
0, 1 )
+#define ATTRUI10_3( A, UI ) ATTRF( A, 3, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) 
>> 20) & 0x3ff, 1 )
+#define ATTRUI10_4( A, UI ) ATTRF( A, 4, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, ((UI) >> 20) 
& 0x3ff, ((UI) >> 30) & 0x3 )

-#define ATTRUI10N_1( A, UI ) ATTR( A, 1, GL_FLOAT, conv_ui10_to_norm_float((UI) 

Re: [Mesa-dev] [PATCH ] vbo: improve the code style by adjust the preprocessing c code directives.

2015-03-11 Thread Predut, Marius
> -Original Message-
> From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On Behalf Of
> Brian Paul
> Sent: Tuesday, March 10, 2015 5:36 PM
> To: Predut, Marius; mesa-dev@lists.freedesktop.org
> Subject: Re: [Mesa-dev] [PATCH ] vbo: improve the code style by adjust the
> preprocessing c code directives.
> 
> On 03/09/2015 01:44 PM, marius.pre...@intel.com wrote:
> > From: Marius Predut 
> >
> > Brain Paul review suggestion: there's more macro use here than necessary.
> 
> "Brian"

Sorry

> 
> > Removed and redefine some #define preprocessing directives.
> > Removed the directive input parameter 'T' .
> > No functional changes.
> >
> > Signed-off-by: Marius Predut 
> 
> Looks good.  Thanks.  I presume you did a piglit test to verify the change.
> 
> Reviewed-by: Brian Paul 

Yes, I done the piglit tests. 

> 
> 
> > ---
> >   src/mesa/vbo/vbo_attrib_tmp.h |   74 ++---
> 
> >   src/mesa/vbo/vbo_exec_api.c   |2 +-
> >   2 files changed, 34 insertions(+), 42 deletions(-)
> >
> > diff --git a/src/mesa/vbo/vbo_attrib_tmp.h
> > b/src/mesa/vbo/vbo_attrib_tmp.h index 80e8aaf..348dd77 100644
> > --- a/src/mesa/vbo/vbo_attrib_tmp.h
> > +++ b/src/mesa/vbo/vbo_attrib_tmp.h
> > @@ -30,35 +30,30 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
> >
> >
> >   /* ATTR */
> > -#define ATTR( A, N, T, V0, V1, V2, V3 ) \
> > -ATTR_##T((A), (N), (T), (V0), (V1), (V2), (V3))
> > -
> > -#define ATTR_GL_UNSIGNED_INT( A, N, T, V0, V1, V2, V3 ) \
> > -ATTR_UNION(A, N, T, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \
> > -UINT_AS_UNION(V2), UINT_AS_UNION(V3))
> > -#define ATTR_GL_INT( A, N, T, V0, V1, V2, V3 ) \
> > -ATTR_UNION(A, N, T, INT_AS_UNION(V0), INT_AS_UNION(V1), \
> > +#define ATTRI( A, N, V0, V1, V2, V3 ) \
> > +ATTR_UNION(A, N, GL_INT, INT_AS_UNION(V0), INT_AS_UNION(V1), \
> >   INT_AS_UNION(V2), INT_AS_UNION(V3)) -#define ATTR_GL_FLOAT(
> > A, N, T, V0, V1, V2, V3 ) \
> > -ATTR_UNION(A, N, T, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\
> > +#define ATTRUI( A, N, V0, V1, V2, V3 ) \
> > +ATTR_UNION(A, N, GL_UNSIGNED_INT, UINT_AS_UNION(V0), UINT_AS_UNION(V1),
> \
> > +UINT_AS_UNION(V2), UINT_AS_UNION(V3)) #define ATTRF( A, N,
> > +V0, V1, V2, V3 ) \
> > +ATTR_UNION(A, N, GL_FLOAT, FLOAT_AS_UNION(V0),
> > +FLOAT_AS_UNION(V1),\
> >   FLOAT_AS_UNION(V2), FLOAT_AS_UNION(V3))
> >
> >
> >   /* float */
> > -#define ATTR1FV( A, V ) ATTR( A, 1, GL_FLOAT, (V)[0], 0, 0, 1 )
> > -#define ATTR2FV( A, V ) ATTR( A, 2, GL_FLOAT, (V)[0], (V)[1], 0, 1 )
> > -#define ATTR3FV( A, V ) ATTR( A, 3, GL_FLOAT, (V)[0], (V)[1], (V)[2],
> > 1 ) -#define ATTR4FV( A, V ) ATTR( A, 4, GL_FLOAT, (V)[0], (V)[1],
> > (V)[2], (V)[3] )
> > +#define ATTR1FV( A, V ) ATTRF( A, 1, (V)[0], 0, 0, 1 ) #define
> > +ATTR2FV( A, V ) ATTRF( A, 2, (V)[0], (V)[1], 0, 1 ) #define ATTR3FV(
> > +A, V ) ATTRF( A, 3, (V)[0], (V)[1], (V)[2], 1 ) #define ATTR4FV( A, V
> > +) ATTRF( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )
> >
> > -#define ATTR1F( A, X )  ATTR( A, 1, GL_FLOAT, X, 0, 0, 1 )
> > -#define ATTR2F( A, X, Y )   ATTR( A, 2, GL_FLOAT, X, Y, 0, 1 )
> > -#define ATTR3F( A, X, Y, Z )ATTR( A, 3, GL_FLOAT, X, Y, Z, 1 )
> > -#define ATTR4F( A, X, Y, Z, W ) ATTR( A, 4, GL_FLOAT, X, Y, Z, W )
> > +#define ATTR1F( A, X )  ATTRF( A, 1, X, 0, 0, 1 )
> > +#define ATTR2F( A, X, Y )   ATTRF( A, 2, X, Y, 0, 1 )
> > +#define ATTR3F( A, X, Y, Z )ATTRF( A, 3, X, Y, Z, 1 )
> > +#define ATTR4F( A, X, Y, Z, W ) ATTRF( A, 4, X, Y, Z, W )
> >
> > -/* int */
> > -#define ATTRI( A, N, X, Y, Z, W) ATTR( A, N, GL_INT, \
> > -   X, Y, Z, W )
> >
> > +/* int */
> >   #define ATTR2IV( A, V ) ATTRI( A, 2, (V)[0], (V)[1], 0, 1 )
> >   #define ATTR3IV( A, V ) ATTRI( A, 3, (V)[0], (V)[1], (V)[2], 1 )
> >   #define ATTR4IV( A, V ) ATTRI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3]
> > ) @@ -70,9 +65,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
> >
> >
> >   /* uint */
> > -#define ATTRUI( A, N, X, Y, Z, W) ATTR( A, N, GL_UNSIGNED_INT, \
> > -X, Y, Z, W )
> > -
> >   #define ATTR2UIV( A, V ) ATTRUI( A, 2, (V)[0], (V)[1], 0, 1 )
> >   #define ATTR3UIV( A, V ) ATTRUI( A, 3, (V)[0], (V)[1], (V)[2], 1 )
> >   #define ATTR4UIV( A, V ) ATTRUI( A, 4, (V)[0], (V)[1], (V)[2],
> > (V)[3] ) @@ -82,7 +74,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
> >   #define ATTR3UI( A, X, Y, Z )ATTRUI( A, 3, X, Y, Z, 1 )
> >   #define ATTR4UI( A, X, Y, Z, W ) ATTRUI( A, 4, X, Y, Z, W )
> >
> > -#define MAT_ATTR( A, N, V ) ATTR( A, N, GL_FLOAT, (V)[0], (V)[1],
> > (V)[2], (V)[3] )
> > +#define MAT_ATTR( A, N, V ) ATTRF( A, N, (V)[0], (V)[1], (V)[2],
> > +(V)[3] )
> >
> >   static inline float conv_ui10_to_norm_float(unsigned ui10)
> >   {
> > @@ -94,20 +86,20 @@ static inline float conv_ui2_to_norm_float(unsigned ui2)
> >  return ui2 / 3.0f;
> >   }
> >
> > -#define ATTRUI10_1( A, UI ) ATTR( A, 1, GL_FLOAT, (UI) & 0x3ff, 0

[Mesa-dev] [PATCH v1] vbo: improve the code style by adjust the preprocessing c code directives

2015-03-11 Thread marius . predut
From: Marius Predut 

Brian Paul review suggestion: there's more macro use here than necessary.
Removed and redefine some #define preprocessing directives.
Removed the directive input parameter 'T' .
No functional changes.

Signed-off-by: Marius Predut 
Reviewed-by: Brian Paul 
---
 src/mesa/vbo/vbo_attrib_tmp.h |   74 ++---
 src/mesa/vbo/vbo_exec_api.c   |2 +-
 2 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h
index b1c3d98..17e0578 100644
--- a/src/mesa/vbo/vbo_attrib_tmp.h
+++ b/src/mesa/vbo/vbo_attrib_tmp.h
@@ -30,35 +30,30 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
 /* ATTR */
-#define ATTR( A, N, T, V0, V1, V2, V3 ) \
-ATTR_##T((A), (N), (T), (V0), (V1), (V2), (V3))
-
-#define ATTR_GL_UNSIGNED_INT( A, N, T, V0, V1, V2, V3 ) \
-ATTR_UNION(A, N, T, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \
-UINT_AS_UNION(V2), UINT_AS_UNION(V3))
-#define ATTR_GL_INT( A, N, T, V0, V1, V2, V3 ) \
-ATTR_UNION(A, N, T, INT_AS_UNION(V0), INT_AS_UNION(V1), \
+#define ATTRI( A, N, V0, V1, V2, V3 ) \
+ATTR_UNION(A, N, GL_INT, INT_AS_UNION(V0), INT_AS_UNION(V1), \
 INT_AS_UNION(V2), INT_AS_UNION(V3))
-#define ATTR_GL_FLOAT( A, N, T, V0, V1, V2, V3 ) \
-ATTR_UNION(A, N, T, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\
+#define ATTRUI( A, N, V0, V1, V2, V3 ) \
+ATTR_UNION(A, N, GL_UNSIGNED_INT, UINT_AS_UNION(V0), UINT_AS_UNION(V1), \
+UINT_AS_UNION(V2), UINT_AS_UNION(V3))
+#define ATTRF( A, N, V0, V1, V2, V3 ) \
+ATTR_UNION(A, N, GL_FLOAT, FLOAT_AS_UNION(V0), FLOAT_AS_UNION(V1),\
 FLOAT_AS_UNION(V2), FLOAT_AS_UNION(V3))
 
 
 /* float */
-#define ATTR1FV( A, V ) ATTR( A, 1, GL_FLOAT, (V)[0], 0, 0, 1 )
-#define ATTR2FV( A, V ) ATTR( A, 2, GL_FLOAT, (V)[0], (V)[1], 0, 1 )
-#define ATTR3FV( A, V ) ATTR( A, 3, GL_FLOAT, (V)[0], (V)[1], (V)[2], 1 )
-#define ATTR4FV( A, V ) ATTR( A, 4, GL_FLOAT, (V)[0], (V)[1], (V)[2], (V)[3] )
+#define ATTR1FV( A, V ) ATTRF( A, 1, (V)[0], 0, 0, 1 )
+#define ATTR2FV( A, V ) ATTRF( A, 2, (V)[0], (V)[1], 0, 1 )
+#define ATTR3FV( A, V ) ATTRF( A, 3, (V)[0], (V)[1], (V)[2], 1 )
+#define ATTR4FV( A, V ) ATTRF( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )
 
-#define ATTR1F( A, X )  ATTR( A, 1, GL_FLOAT, X, 0, 0, 1 )
-#define ATTR2F( A, X, Y )   ATTR( A, 2, GL_FLOAT, X, Y, 0, 1 )
-#define ATTR3F( A, X, Y, Z )ATTR( A, 3, GL_FLOAT, X, Y, Z, 1 )
-#define ATTR4F( A, X, Y, Z, W ) ATTR( A, 4, GL_FLOAT, X, Y, Z, W )
+#define ATTR1F( A, X )  ATTRF( A, 1, X, 0, 0, 1 )
+#define ATTR2F( A, X, Y )   ATTRF( A, 2, X, Y, 0, 1 )
+#define ATTR3F( A, X, Y, Z )ATTRF( A, 3, X, Y, Z, 1 )
+#define ATTR4F( A, X, Y, Z, W ) ATTRF( A, 4, X, Y, Z, W )
 
-/* int */
-#define ATTRI( A, N, X, Y, Z, W) ATTR( A, N, GL_INT, \
-   X, Y, Z, W )
 
+/* int */
 #define ATTR2IV( A, V ) ATTRI( A, 2, (V)[0], (V)[1], 0, 1 )
 #define ATTR3IV( A, V ) ATTRI( A, 3, (V)[0], (V)[1], (V)[2], 1 )
 #define ATTR4IV( A, V ) ATTRI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )
@@ -70,9 +65,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
 /* uint */
-#define ATTRUI( A, N, X, Y, Z, W) ATTR( A, N, GL_UNSIGNED_INT, \
-X, Y, Z, W )
-
 #define ATTR2UIV( A, V ) ATTRUI( A, 2, (V)[0], (V)[1], 0, 1 )
 #define ATTR3UIV( A, V ) ATTRUI( A, 3, (V)[0], (V)[1], (V)[2], 1 )
 #define ATTR4UIV( A, V ) ATTRUI( A, 4, (V)[0], (V)[1], (V)[2], (V)[3] )
@@ -82,7 +74,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define ATTR3UI( A, X, Y, Z )ATTRUI( A, 3, X, Y, Z, 1 )
 #define ATTR4UI( A, X, Y, Z, W ) ATTRUI( A, 4, X, Y, Z, W )
 
-#define MAT_ATTR( A, N, V ) ATTR( A, N, GL_FLOAT, (V)[0], (V)[1], (V)[2], 
(V)[3] )
+#define MAT_ATTR( A, N, V ) ATTRF( A, N, (V)[0], (V)[1], (V)[2], (V)[3] )
 
 static inline float conv_ui10_to_norm_float(unsigned ui10)
 {
@@ -94,20 +86,20 @@ static inline float conv_ui2_to_norm_float(unsigned ui2)
return ui2 / 3.0f;
 }
 
-#define ATTRUI10_1( A, UI ) ATTR( A, 1, GL_FLOAT, (UI) & 0x3ff, 0, 0, 1 )
-#define ATTRUI10_2( A, UI ) ATTR( A, 2, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 
0x3ff, 0, 1 )
-#define ATTRUI10_3( A, UI ) ATTR( A, 3, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 
0x3ff, ((UI) >> 20) & 0x3ff, 1 )
-#define ATTRUI10_4( A, UI ) ATTR( A, 4, GL_FLOAT, (UI) & 0x3ff, ((UI) >> 10) & 
0x3ff, ((UI) >> 20) & 0x3ff, ((UI) >> 30) & 0x3 )
+#define ATTRUI10_1( A, UI ) ATTRF( A, 1, (UI) & 0x3ff, 0, 0, 1 )
+#define ATTRUI10_2( A, UI ) ATTRF( A, 2, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 
0, 1 )
+#define ATTRUI10_3( A, UI ) ATTRF( A, 3, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 
((UI) >> 20) & 0x3ff, 1 )
+#define ATTRUI10_4( A, UI ) ATTRF( A, 4, (UI) & 0x3ff, ((UI) >> 10) & 0x3ff, 
((UI) >> 20) & 0x3ff, ((UI) >> 30) & 0x3 )
 
-#define ATTRUI10N_1( A, UI ) ATTR( A, 1, GL_FLOAT, 
conv_ui10_to_norm_float((UI) & 0x3ff), 0, 0, 1 )
-#define ATTRUI10N_2( A, UI ) ATTR( A, 2, GL_FLOAT, \
+#define ATTRUI10N_1( A, UI ) ATTRF

Re: [Mesa-dev] [PATCH] xlib: use strdup() instead of _mesa_strdup()

2015-03-11 Thread Ian Romanick
Assuming that this lands before patch 2 in the previous strdup series
(so that the build doesn't break at any point), all 3 patches are

Reviewed-by: Ian Romanick 

On 03/11/2015 07:12 AM, Brian Paul wrote:
> ---
>  src/mesa/drivers/x11/fakeglx.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c
> index 3869e94..4fd6d75 100644
> --- a/src/mesa/drivers/x11/fakeglx.c
> +++ b/src/mesa/drivers/x11/fakeglx.c
> @@ -40,6 +40,7 @@
>   */
>  
>  
> +#include 
>  #include 
>  #include "glxheader.h"
>  #include "glxapi.h"
> @@ -846,7 +847,7 @@ register_with_display(Display *dpy)
>ext = dpy->ext_procs;  /* new extension is at head of list */
>assert(c->extension == ext->codes.extension);
>(void) c; /* silence warning */
> -  ext->name = _mesa_strdup(extName);
> +  ext->name = strdup(extName);
>ext->close_display = close_display_callback;
> }
>  }
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] clover: use get_device_vendor instead of get_vendor

2015-03-11 Thread Giuseppe Bilotta
The pipe's get_vendor method returns something more akin to a driver
vendor string in most cases, instead of the actual device vendor. Use
get_device_vendor instead, which was introduced specifically for this
purpose.

Signed-off-by: Giuseppe Bilotta 

Reviewed-by: Michel Dänzer 
Reviewed-by: Francisco Jerez 
---
 src/gallium/state_trackers/clover/core/device.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index c3f3b4e..42b45b7 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -192,7 +192,7 @@ device::device_name() const {
 
 std::string
 device::vendor_name() const {
-   return pipe->get_vendor(pipe);
+   return pipe->get_device_vendor(pipe);
 }
 
 enum pipe_shader_ir
-- 
2.1.2.766.gaa23a90

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv4 0/4] Separate device from driver vendor

2015-03-11 Thread Giuseppe Bilotta
OpenCL (as opposed to OpenGL) has separate vendor strings for the
implementation/driver/platform and the device. CL_PLATFORM_VENDOR
is akin to the GL_VENDOR string, while CL_DEVICE_VENDOR is supposed to
return the actual device vendor.

(For example, the AMD OpenCL platform returns GenuineIntel as
CL_DEVICE_VENDOR for an Intel CPU.)

This patchset separates (where possible/necessary) the device vendor
from the driver vendor in existing gallium drivers, and makes clover use
the newly introduced device vendor for CL_DEVICE_VENDOR queries.

For some drivers, get_device_vendor is mapped to get_vendor: this
is usually done because get_vendor already returns an appropriate string,
except in the case of softpipe and llvmpipe, where the returned vendor
should be the CPU vendor (and I don't know enough about the structure
of these drivers to extract the information 8-P)

Changes since v3:

* fixed the prefix for the shortlogs
* minor docs tuning

Changes since v2:

* freedreno defined its device vendor function, but didn't actually use it
* the get_device_vendor entrypoint has been moved right after get_vendor,
  and RST documentation has been added


Changes since v1:

* vc4's get_device_vendor maps to vc4_screen_get_vendor() instead of the
  non-existing vc4_screen_get_device_vendor();
* freedreno should report Qualcomm, as device vendor, so it needs
  an actual implementation of get_device_vendor distinct from that of
  get_vendor.


Giuseppe Bilotta (4):
  gallium: remove trailing whitespace in p_screen.h
  gallium: introduce get_device_vendor() entrypoint for pipes
  gallium: implement get_device_vendor() for existing drivers
  clover: use get_device_vendor instead of get_vendor

 src/gallium/docs/source/screen.rst|  6 ++
 src/gallium/drivers/freedreno/freedreno_screen.c  |  8 
 src/gallium/drivers/galahad/glhd_screen.c | 10 ++
 src/gallium/drivers/i915/i915_screen.c|  7 +++
 src/gallium/drivers/ilo/ilo_screen.c  |  7 +++
 src/gallium/drivers/llvmpipe/lp_screen.c  |  1 +
 src/gallium/drivers/noop/noop_pipe.c  |  6 ++
 src/gallium/drivers/nouveau/nouveau_screen.c  |  7 +++
 src/gallium/drivers/r300/r300_screen.c|  6 ++
 src/gallium/drivers/radeon/r600_pipe_common.c |  6 ++
 src/gallium/drivers/rbug/rbug_screen.c| 10 ++
 src/gallium/drivers/softpipe/sp_screen.c  |  1 +
 src/gallium/drivers/svga/svga_screen.c|  1 +
 src/gallium/drivers/trace/tr_screen.c | 22 ++
 src/gallium/drivers/vc4/vc4_screen.c  |  1 +
 src/gallium/include/pipe/p_screen.h   | 10 +-
 src/gallium/state_trackers/clover/core/device.cpp |  2 +-
 17 files changed, 109 insertions(+), 2 deletions(-)

-- 
2.1.2.766.gaa23a90

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] gallium: remove trailing whitespace in p_screen.h

2015-03-11 Thread Giuseppe Bilotta
Signed-off-by: Giuseppe Bilotta 

Reviewed-by: Michel Dänzer 
---
 src/gallium/include/pipe/p_screen.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/include/pipe/p_screen.h 
b/src/gallium/include/pipe/p_screen.h
index ac88506..4018f8a 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -147,7 +147,7 @@ struct pipe_screen {
 */
boolean (*can_create_resource)(struct pipe_screen *screen,
   const struct pipe_resource *templat);
-   
+
/**
 * Create a new texture object, using the given template info.
 */
-- 
2.1.2.766.gaa23a90

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] mesa: replace _ASMAPI with __cdecl

2015-03-11 Thread Jose Fonseca

On 11/03/15 14:07, Brian Paul wrote:

On 03/11/2015 01:29 AM, Jose Fonseca wrote:

I don't know the story about this _ASMAPI macro, but __cdecl is also the
default calling convention for WIN32:

   https://msdn.microsoft.com/en-us/library/zkwh89ks.aspx


Yeah, I had read that too actually but I figured it was safer to keep
things as-is in case there was more to it than met the eye.



so it's redundant to add it.  (The other common calling convention --
__stdcall/APIENTRY/etc -- is the one that must always be explcitely
added when needed).

So we should just drop _ASMAPI/__cdecl completely.


Note that we have something similar in gallium (grep PIPE_CDECL).  Maybe
that could be removed too.


I suspect that's because we copied  src/mesa/x86/rtasm -> 
src/gallium/auxiliary/rtasm .


(BTW, it would be a nice beginner project to replace translate_sse to 
use LLVM and get rid of src/gallium/auxiliary/rtasm )



Anyway, I'll redo this patch series (rm all __cdecl) and do some testing
on Windows.

-Brian



Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] mesa: move fpclassify work-arounds into c99_math.h

2015-03-11 Thread Brian Paul

On 03/11/2015 01:23 AM, Jose Fonseca wrote:

On 11/03/15 01:41, Brian Paul wrote:

---
  include/c99_math.h  | 52
+
  src/mesa/main/querymatrix.c | 51
+---
  2 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/include/c99_math.h b/include/c99_math.h
index 0a49950..f1a6685 100644
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -161,4 +161,56 @@ llrintf(float f)
  #endif


+#if defined(fpclassify)
+/* ISO C99 says that fpclassify is a macro.  Assume that any
implementation
+ * of fpclassify, whether it's in a C99 compiler or not, will be a
macro.
+ */
+#elif defined(__cplusplus)
+/* For C++, fpclassify() should be defined in  */


Does MSVC's cmath implement fpclassify?  If not this #elif clause should
be moved after MSC_VER clause.


Looks like the answer is "no".  I'll move it.  Though things built fine 
on MSVC probably because we don't use fpclassify in any C++ files.






+#elif defined(_MSC_VER)
+/* Not required on VS2013 and above.  Oddly, the fpclassify() function
+ * doesn't exist in such a form on MSVC.  This is an implementation
using
+ * slightly different lower-level Windows functions.
+ */
+#include 
+
+static inline enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL,
FP_NORMAL}
+fpclassify(double x)
+{
+   switch(_fpclass(x)) {
+   case _FPCLASS_SNAN: /* signaling NaN */
+   case _FPCLASS_QNAN: /* quiet NaN */
+  return FP_NAN;
+   case _FPCLASS_NINF: /* negative infinity */
+   case _FPCLASS_PINF: /* positive infinity */
+  return FP_INFINITE;
+   case _FPCLASS_NN:   /* negative normal */
+   case _FPCLASS_PN:   /* positive normal */
+  return FP_NORMAL;
+   case _FPCLASS_ND:   /* negative denormalized */
+   case _FPCLASS_PD:   /* positive denormalized */
+  return FP_SUBNORMAL;
+   case _FPCLASS_NZ:   /* negative zero */
+   case _FPCLASS_PZ:   /* positive zero */
+  return FP_ZERO;
+   default:
+  /* Should never get here; but if we do, this will guarantee
+   * that the pattern is not treated like a number.
+   */
+  return FP_NAN;
+   }
+}
+
+#else


If might be better to simply #error here.  Or leave the code but put a
#warning.


I'll do the #error.  That seems to be the pattern we're using in the 
wrapper headers.


-Brian





Jose


+static inline enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL,
FP_NORMAL}
+fpclassify(double x)
+{
+   /* XXX do something better someday */
+   return FP_NORMAL;
+}
+
+#endif
+
+
  #endif /* #define _C99_MATH_H_ */
diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index ef85175..095817c 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -13,7 +13,7 @@


  #include 
-#include 
+#include "c99_math.h"
  #include "glheader.h"
  #include "querymatrix.h"
  #include "main/get.h"
@@ -37,55 +37,6 @@
  #define INT_TO_FIXED(x) ((GLfixed) ((x) << 16))
  #define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0))

-#if defined(fpclassify)
-/* ISO C99 says that fpclassify is a macro.  Assume that any
implementation
- * of fpclassify, whether it's in a C99 compiler or not, will be a
macro.
- */
-#elif defined(_MSC_VER)
-/* Not required on VS2013 and above. */
-/* Oddly, the fpclassify() function doesn't exist in such a form
- * on MSVC.  This is an implementation using slightly different
- * lower-level Windows functions.
- */
-#include 
-
-enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL}
-fpclassify(double x)
-{
-switch(_fpclass(x)) {
-case _FPCLASS_SNAN: /* signaling NaN */
-case _FPCLASS_QNAN: /* quiet NaN */
-return FP_NAN;
-case _FPCLASS_NINF: /* negative infinity */
-case _FPCLASS_PINF: /* positive infinity */
-return FP_INFINITE;
-case _FPCLASS_NN:   /* negative normal */
-case _FPCLASS_PN:   /* positive normal */
-return FP_NORMAL;
-case _FPCLASS_ND:   /* negative denormalized */
-case _FPCLASS_PD:   /* positive denormalized */
-return FP_SUBNORMAL;
-case _FPCLASS_NZ:   /* negative zero */
-case _FPCLASS_PZ:   /* positive zero */
-return FP_ZERO;
-default:
-/* Should never get here; but if we do, this will guarantee
- * that the pattern is not treated like a number.
- */
-return FP_NAN;
-}
-}
-
-#else
-
-enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL}
-fpclassify(double x)
-{
-   /* XXX do something better someday */
-   return FP_NORMAL;
-}
-
-#endif

  GLbitfield GLAPIENTRY _mesa_QueryMatrixxOES(GLfixed mantissa[16],
GLint exponent[16])
  {



Otherwise series looks good.

Jose


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >