Makefile.am                                               |    1 
 VERSION                                                   |    2 
 src/amd/common/ac_nir_to_llvm.c                           |    2 
 src/amd/vulkan/radv_cmd_buffer.c                          |    3 
 src/egl/drivers/dri2/platform_wayland.c                   |   15 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |  147 ++++++++------
 src/gallium/drivers/nouveau/nv50/nv50_program.c           |    3 
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c           |    7 
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c            |   87 ++++----
 src/gallium/drivers/radeonsi/si_shader.c                  |    2 
 src/intel/vulkan/.gitignore                               |    1 
 src/intel/vulkan/Makefile.am                              |   11 -
 src/intel/vulkan/intel_icd.json                           |    7 
 src/intel/vulkan/intel_icd.json.in                        |    7 
 src/mapi/Makefile.am                                      |    3 
 src/mapi/glapi/gen/gl_API.xml                             |   30 +-
 src/mapi/glapi/gen/static_data.py                         |   12 +
 src/mesa/state_tracker/st_draw.c                          |   15 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp                |   35 +++
 src/vulkan/wsi/wsi_common_wayland.c                       |    4 
 20 files changed, 261 insertions(+), 133 deletions(-)

New commits:
commit f623a8be3edc898dd13040cd06f91763b5973e48
Author: Emil Velikov <emil.veli...@collabora.com>
Date:   Mon Oct 24 12:09:15 2016 +0100

    Update version to 13.0.0-rc2
    
    Signed-off-by: Emil Velikov <emil.veli...@collabora.com>

diff --git a/VERSION b/VERSION
index 0ee82ac..4bb1cb3 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-13.0.0-rc1
+13.0.0-rc2

commit af81cdfec045f9e175edd5b5e8fcaa9e91f0dd38
Author: Jonathan Gray <j...@jsg.id.au>
Date:   Sat Oct 22 18:19:53 2016 +1100

    mapi: automake: set VISIBILITY_CFLAGS for shared glapi
    
    shared glapi was previously built without setting CFLAGS for
    AM_CFLAGS and VISIBILITY_CFLAGS.
    
    This resulted in symbols being exported that shouldn't be.
    
    The x86 and sparc assembly versions of the dispatch table partially
    mitigated this by using .hidden.  Otherwise shared_dispatch_stub_*
    were being exported.
    
    Signed-off-by: Jonathan Gray <j...@jsg.id.au>
    Cc: "11.2 12.0 13.0" <mesa-sta...@lists.freedesktop.org>
    Reviewed-by: Emil Velikov <emil.veli...@collabora.com>
    Reviewed-by: Eric Engestrom <eric.engest...@imgtec.com>

diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am
index 46afe3b..5013e9a 100644
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -64,6 +64,9 @@ BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
 
 lib_LTLIBRARIES += shared-glapi/libglapi.la
 shared_glapi_libglapi_la_SOURCES = $(MAPI_GLAPI_FILES) 
shared-glapi/glapi_mapi_tmp.h
+shared_glapi_libglapi_la_CFLAGS = \
+       $(AM_CFLAGS) \
+       $(VISIBILITY_CFLAGS)
 shared_glapi_libglapi_la_CPPFLAGS = \
        $(AM_CPPFLAGS) \
        -DMAPI_MODE_GLAPI \

commit 990f395e007c3204639daa34efc3049f350ee819
Author: Emil Velikov <emil.veli...@collabora.com>
Date:   Mon Oct 24 11:27:44 2016 +0100

    anv: automake: cleanup the generated json file during make clean
    
    Signed-off-by: Emil Velikov <emil.veli...@collabora.com>
    (cherry picked from commit 8df581520a823564be0ab5af7dbb7d501b1c9670)
    
    Conflicts:
        src/intel/vulkan/Makefile.am

diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 7bf68a0..4a7bb18 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -147,7 +147,7 @@ anv_timestamp.h:
        $(AM_V_GEN) echo "#define ANV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@
 
 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-CLEANFILES = $(BUILT_SOURCES) dev_icd.json
+CLEANFILES = $(BUILT_SOURCES) dev_icd.json intel_icd.@host_cpu@.json
 EXTRA_DIST = \
        $(top_srcdir)/include/vulkan/vk_icd.h \
        anv_entrypoints_gen.py \

commit 19e8270fe0333e1087653c4c1a46ac5052f58670
Author: Stencel, Joanna <joanna.sten...@intel.com>
Date:   Mon Oct 24 09:48:11 2016 +0100

    egl/wayland: add missing destroy_window callback
    
    The original patch by Joanna added the function pointer and callback yet
    things got only partially applied - the infra was added, but the
    implementation was missing.
    
    Cc: "12.0 13.0" <mesa-sta...@lists.freedesktop.org>
    Fixes: 690ead4a135 ("egl/wayland-egl: Fix for segfault in
    dri2_wl_destroy_surface.")
    Signed-off-by: Emil Velikov <emil.l.veli...@gmail.com>
    
    (cherry picked from commit 2e0ab61e29c4b44d349ab433c899b691a9b12f68)

diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index ccab192..789e035 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -118,6 +118,13 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
    (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);
 }
 
+static void
+destroy_window_callback(void *data)
+{
+   struct dri2_egl_surface *dri2_surf = data;
+   dri2_surf->wl_win = NULL;
+}
+
 /**
  * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
  */
@@ -159,6 +166,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
 
    dri2_surf->wl_win->private = dri2_surf;
    dri2_surf->wl_win->resize_callback = resize_callback;
+   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
 
    dri2_surf->base.Width =  -1;
    dri2_surf->base.Height = -1;
@@ -254,8 +262,11 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay 
*disp, _EGLSurface *surf)
    if (dri2_surf->throttle_callback)
       wl_callback_destroy(dri2_surf->throttle_callback);
 
-   dri2_surf->wl_win->private = NULL;
-   dri2_surf->wl_win->resize_callback = NULL;
+   if (dri2_surf->wl_win) {
+      dri2_surf->wl_win->private = NULL;
+      dri2_surf->wl_win->resize_callback = NULL;
+      dri2_surf->wl_win->destroy_window_callback = NULL;
+   }
 
    free(surf);
 

commit cac49ee2cd2998a27c49188101f3f1ba7196fcb5
Author: Emil Velikov <emil.veli...@collabora.com>
Date:   Thu Oct 20 18:41:22 2016 +0100

    automake: don't forget to pick wglext.h in the tarball
    
    Earlier commit reworked the header install rules, to ensure that the
    correct ones are installed only as needed.
    
    By doing so it dropped a wildcard which was effectively including the
    wglext.h header in the tarball.
    
    Add the header to the top-level noinst_HEADERS, since the it is not
    meant to be installed (autoconf is not used on Windows plaforms).
    
    Fixes: a89faa2022f ("autoconf: Make header install distinct for various
    APIs (v2)")
    Cc: "12.0 13.0" <mesa-sta...@lists.freedesktop.org>
    Cc: Chuck Atkins <chuck.atk...@kitware.com>
    Signed-off-by: Emil Velikov <emil.veli...@collabora.com>
    Reviewed-by: Matt Turner <matts...@gmail.com>
    
    (cherry picked from commit 3511a86111866f7233a337a24c9c6442b9aa05e6)

diff --git a/Makefile.am b/Makefile.am
index 49b99de..e6d1969 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -62,6 +62,7 @@ noinst_HEADERS = \
        include/c99_math.h \
        include/c11 \
        include/D3D9 \
+       include/GL/wglext.h \
        include/HaikuGL \
        include/no_extern_c.h \
        include/pci_ids

commit 0f8b7f90d1e52343e04128bc55948c141a71d0ca
Author: Dave Airlie <airl...@redhat.com>
Date:   Thu Oct 20 12:05:44 2016 +1000

    radv: allow cmask transitions without fast clear
    
    This fixes
    dEQP-VK.pipeline.multisample.sampled_image*
    
    These all render to multisampled image, and then
    sample from it, so we must transition it correctly,
    since we have a cmask and fmask this will cause
    the correct transition.
    
    Cc: "13.0" <mesa-sta...@lists.freedesktop.org>
    Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
    Signed-off-by: Dave Airlie <airl...@redhat.com>
    (cherry picked from commit a969548f59342330badf78ec7721a1ead7599a29)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 3f1a6f4..690c739 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2163,9 +2163,6 @@ static void radv_handle_cmask_image_transition(struct 
radv_cmd_buffer *cmd_buffe
                        radv_initialise_cmask(cmd_buffer, image, 0xffffffffu);
        } else if (radv_layout_has_cmask(image, src_layout) &&
                   !radv_layout_has_cmask(image, dst_layout)) {
-
-               if (!cmd_buffer->device->allow_fast_clears)
-                       return;
                radv_fast_clear_flush_image_inplace(cmd_buffer, image);
        }
 }

commit abf5327b86e53e664f312b3b441d0505d2b40032
Author: Jason Ekstrand <jason.ekstr...@intel.com>
Date:   Thu Oct 20 15:46:21 2016 -0700

    anv: Suffix the intel_icd file with the host CPU
    
    Vulkan has a multi-arch problem... The idea behind the Vulkan loader is
    that you have a little json file on your disk that tells the loader where
    to find drivers.  The loader looks for these json files in standard
    locations, and then goes and loads the my_driver.so's that they specify.
    This allows you as a driver implementer to put their driver wherever on the
    disk they want so long as the ICD points in the right place.
    
    For a multi-arch system, however, you may have multiple libvulkan_intel.so
    files installed that the loader needs to pick depending on architecture.
    Since the ICD file format does not specify any architecture information,
    you can't tell the loader where to find the 32-bit version vs. the 64-bit
    version.  The way that packagers have been dealing with this is to place
    libvulkan_intel.so in the top level lib directory and provide just a name
    (and no path) to the loader.  It will then use the regular system search
    paths and find the correct driver.  While this solution works fine for
    distro-installed Vulkan drivers, it doesn't work so well for user-installed
    drivers because they may put it in /opt or $HOME/.local or some other more
    exotic location.  In this case, you can't use an ICD json file with just a
    library name because it doesn't know where to find it; you also have to add
    that to your library lookup path via LD_LIBRARY_PATH or similar.
    
    This patch handles both use-cases by taking advantage of the fact that the
    loader dlopen()s each of the drivers and, if one dlopen() calls fails, it
    silently continues on to open other drivers.  By suffixing the icd file, we
    can provide two different json files: intel_icd.x86_64.json and
    intel_icd.i686.json with different paths.  Since dlopen() will only succeed
    on the libvulkan_intel.so of the right arch, the loader will happily ignore
    the others and load that one.  This allows us to properly handle multi-arch
    while still providing a full path so user installs will work fine.
    
    I tested this on my Fedora 25 machine with 32 and 64-bit builds of our
    Vulkan driver installed and 32 and 64-bit builds of crucible.  It seems to
    work just fine.
    
    Signed-off-by: Jason Ekstrand <ja...@jlekstrand.net>
    Reviewed-by: Emil Velikov <emil.veli...@collabora.com>
    Cc: "13.0" <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit d96345de989c8f9a0328cdc3588bfe186154c8ea)
    
    Squashed with commit:
    
    anv: Always use the full driver path in the intel_icd.*.json
    
    Signed-off-by: Jason Ekstrand <ja...@jlekstrand.net>
    Reviewed-by: Emil Velikov <emil.veli...@collabora.com>
    Cc: "13.0" <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit 7ea4ef8849c5cc158adbdff1187b91f591552196)
    
    Squashed with commit:
    
    configure: Get rid of the --disable-vulkan-icd-full-driver-path flag
    
    Signed-off-by: Jason Ekstrand <ja...@jlekstrand.net>
    Reviewed-by: Emil Velikov <emil.veli...@collabora.com>
    Cc: "13.0" <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit 3f05fc62f924c051bdb883482452fb37650d5768)

diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore
index bde5cd8..725a858 100644
--- a/src/intel/vulkan/.gitignore
+++ b/src/intel/vulkan/.gitignore
@@ -3,3 +3,4 @@
 /anv_entrypoints.h
 /anv_timestamp.h
 /dev_icd.json
+/intel_icd.*.json
diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 6e17188..7bf68a0 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -152,7 +152,7 @@ EXTRA_DIST = \
        $(top_srcdir)/include/vulkan/vk_icd.h \
        anv_entrypoints_gen.py \
        dev_icd.json.in \
-       intel_icd.json
+       intel_icd.json.in
 
 libvulkan_intel_la_LIBADD = $(VULKAN_LIB_DEPS)
 
@@ -167,7 +167,7 @@ libvulkan_intel_la_LDFLAGS = \
 
 
 icdconfdir = @VULKAN_ICD_INSTALL_DIR@
-icdconf_DATA = intel_icd.json
+icdconf_DATA = intel_icd.@host_cpu@.json
 # The following is used for development purposes, by setting VK_ICD_FILENAMES.
 noinst_DATA = dev_icd.json
 
@@ -176,6 +176,11 @@ dev_icd.json : dev_icd.json.in
                -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
                < $(srcdir)/dev_icd.json.in > $@
 
+intel_icd.@host_cpu@.json : intel_icd.json.in
+       $(AM_V_GEN) $(SED) \
+               -e "s#@install_libdir@#${libdir}#" \
+               < $(srcdir)/intel_icd.json.in > $@
+
 # Libvulkan with dummy gem. Used for unit tests.
 libvulkan_test_la_SOURCES = $(VULKAN_GEM_STUB_FILES)
 libvulkan_test_la_LIBADD = $(VULKAN_LIB_DEPS)
diff --git a/src/intel/vulkan/intel_icd.json b/src/intel/vulkan/intel_icd.json
deleted file mode 100644
index 277c14e..0000000
--- a/src/intel/vulkan/intel_icd.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "file_format_version": "1.0.0",
-    "ICD": {
-        "library_path": "libvulkan_intel.so",
-        "abi_versions": "1.0.3"
-    }
-}
diff --git a/src/intel/vulkan/intel_icd.json.in 
b/src/intel/vulkan/intel_icd.json.in
new file mode 100644
index 0000000..d9b363a
--- /dev/null
+++ b/src/intel/vulkan/intel_icd.json.in
@@ -0,0 +1,7 @@
+{
+    "file_format_version": "1.0.0",
+    "ICD": {
+        "library_path": "@install_libdir@/libvulkan_intel.so",
+        "abi_versions": "1.0.3"
+    }
+}

commit d0d3e721d02cc49693cc7518de9691bba2a7e471
Author: Francisco Jerez <curroje...@riseup.net>
Date:   Tue Oct 18 20:44:10 2016 -0700

    Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so""
    
    This reverts commit 85e9bbc14d93fa7166c9ae075ee7ae29a8313e3f.  The
    previous commit should help with the scons build failure caused by the
    original commit.
    
    Cc: mesa-sta...@lists.freedesktop.org
    Reviewed-by: Dylan Baker <dy...@pnwbakers.com>
    (cherry picked from commit 811eb7f178b8b85ac299121ac09a3180b9b55da2)

diff --git a/src/mapi/glapi/gen/static_data.py 
b/src/mapi/glapi/gen/static_data.py
index 2f403e9..25e78bf 100644
--- a/src/mapi/glapi/gen/static_data.py
+++ b/src/mapi/glapi/gen/static_data.py
@@ -484,17 +484,22 @@ functions = [
     "BindVertexBuffer",
     "BindVertexBuffers",
     "Bitmap",
+    "BlendBarrier",
     "BlendColor",
     "BlendColorEXT",
     "BlendEquation",
     "BlendEquationEXT",
+    "BlendEquationi",
     "BlendEquationiARB",
     "BlendEquationSeparate",
+    "BlendEquationSeparatei",
     "BlendEquationSeparateiARB",
     "BlendFunc",
+    "BlendFunci",
     "BlendFunciARB",
     "BlendFuncSeparate",
     "BlendFuncSeparateEXT",
+    "BlendFuncSeparatei",
     "BlendFuncSeparateiARB",
     "BlitFramebuffer",
     "BufferData",
@@ -825,6 +830,7 @@ functions = [
     "GetFramebufferAttachmentParameteriv",
     "GetFramebufferAttachmentParameterivEXT",
     "GetFramebufferParameteriv",
+    "GetGraphicsResetStatus",
     "GetGraphicsResetStatusARB",
     "GetHandleARB",
     "GetHistogram",
@@ -864,8 +870,11 @@ functions = [
     "GetnSeparableFilterARB",
     "GetnTexImageARB",
     "GetnUniformdvARB",
+    "GetnUniformfv",
     "GetnUniformfvARB",
+    "GetnUniformiv",
     "GetnUniformivARB",
+    "GetnUniformuiv",
     "GetnUniformuivARB",
     "GetObjectLabel",
     "GetObjectParameterfvARB",
@@ -1160,6 +1169,7 @@ functions = [
     "Orthof",
     "Orthox",
     "PassThrough",
+    "PatchParameteri",
     "PauseTransformFeedback",
     "PixelMapfv",
     "PixelMapuiv",
@@ -1191,6 +1201,7 @@ functions = [
     "PopDebugGroup",
     "PopMatrix",
     "PopName",
+    "PrimitiveBoundingBox",
     "PrimitiveRestartIndex",
     "PrimitiveRestartIndexNV",
     "PrimitiveRestartNV",
@@ -1273,6 +1284,7 @@ functions = [
     "RasterPos4s",
     "RasterPos4sv",
     "ReadBuffer",
+    "ReadnPixels",
     "ReadnPixelsARB",
     "ReadPixels",
     "Rectd",

commit 293e4585587b6e080ad637ec765260782630a872
Author: Francisco Jerez <curroje...@riseup.net>
Date:   Tue Oct 18 14:53:20 2016 -0700

    glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 
category.
    
    These two GLES 3.2 entry points were being defined in the category of
    the ARB_ES3_2_compatibility and KHR_blend_equation_advanced extensions
    respectively instead of in the ES3.2 category.  Defining them in the
    ES3.2 category makes sure that the gl_procs.py generator emits
    declarations in the glprocs.h header file for the unsuffixed GLES-only
    entry points that PrimitiveBoundingBoxARB and BlendBarrierKHR
    respectively alias.  This should avoid a compilation failure during
    scons builds in combination with "mapi: export all GLES 3.2 functions
    in libGLESv2.so".
    
    Cc: mesa-sta...@lists.freedesktop.org
    Reviewed-by: Dylan Baker <dy...@pnwbakers.com>
    (cherry picked from commit 15a084a03998c5c86206137fdaf6f43b5f98485a)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 5998ccf..00c9bb7 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8296,6 +8296,23 @@
 <!-- ARB extension 171 -->
 <xi:include href="ARB_pipeline_statistics_query.xml" 
xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
+<category name="es3.2">
+    <!-- This should be in es_EXT, but this file is included first and
+         the alias doesn't work otherwise. -->
+    <function name="PrimitiveBoundingBox" es2="3.2" desktop="false">
+        <param name="minX" type="GLfloat"/>
+        <param name="minY" type="GLfloat"/>
+        <param name="minZ" type="GLfloat"/>
+        <param name="minW" type="GLfloat"/>
+        <param name="maxX" type="GLfloat"/>
+        <param name="maxY" type="GLfloat"/>
+        <param name="maxZ" type="GLfloat"/>
+        <param name="maxW" type="GLfloat"/>
+    </function>
+
+    <function name="BlendBarrier" es2="3.2"/>
+</category>
+
 <category name="KHR_blend_equation_advanced" number="174">
     <enum name="BLEND_ADVANCED_COHERENT_KHR"              value="0x9285"/>
 
@@ -8316,7 +8333,6 @@
     <enum name="HSL_COLOR_KHR"                            value="0x92AF"/>
     <enum name="HSL_LUMINOSITY_KHR"                       value="0x92B0"/>
 
-    <function name="BlendBarrier" es2="3.2"/>
     <function name="BlendBarrierKHR" alias="BlendBarrier" es2="2.0"/>
 </category>
 
@@ -8332,18 +8348,6 @@
         <size name="Get" mode="get"/>
     </enum>
 
-    <!-- This should be in es_EXT, but this file is included first and
-         the alias doesn't work otherwise. -->
-    <function name="PrimitiveBoundingBox" es2="3.2" desktop="false">
-        <param name="minX" type="GLfloat"/>
-        <param name="minY" type="GLfloat"/>
-        <param name="minZ" type="GLfloat"/>
-        <param name="minW" type="GLfloat"/>
-        <param name="maxX" type="GLfloat"/>
-        <param name="maxY" type="GLfloat"/>
-        <param name="maxZ" type="GLfloat"/>
-        <param name="maxW" type="GLfloat"/>
-    </function>
     <function name="PrimitiveBoundingBoxARB" alias="PrimitiveBoundingBox">
         <param name="minX" type="GLfloat"/>
         <param name="minY" type="GLfloat"/>

commit 5798d602e0d7604cef6b9772ce794b6c409ca011
Author: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Date:   Thu Oct 20 00:41:00 2016 +0200

    nvc0: do not break 3D state by pushing MS coordinates on Fermi
    
    Long story short, 3D and CP are aliased on Fermi and initializing
    compute after pushing the MS sample coordinate offsets seems to
    corrupt 3D state for weird reasons.
    
    I still don't have the faintest clue what is going on, but
    this seems to only affect Fermi generation. A possible fix
    could be to use two different channels, one for 3D and one
    for CP.
    
    This fixes a bunch of regressions pinpointed by piglit.
    
    Fixes: "nvc0: fix up image support for allowing multiple samples"
    Cc: "13.0" <mesa-sta...@lists.freedesktop.org>
    Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
    Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>
    (cherry picked from commit 42273edf79c2500957f51690499aa3405cc689db)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index cfa2f76..2cac3c7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1002,49 +1002,6 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | 
NOUVEAU_BO_WR);
 
-   for (i = 0; i < 5; ++i) {
-      /* TIC and TSC entries for each unit (nve4+ only) */
-      /* auxiliary constants (6 user clip planes, base instance id) */
-      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
-      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
-      PUSH_DATA (push, (15 << 4) | 1);
-      if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
-         unsigned j;
-         BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
-         PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
-         for (j = 0; j < 8; ++j)
-            PUSH_DATA(push, j);
-      } else {
-         BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
-         PUSH_DATA (push, 0x54);
-      }
-
-      /* MS sample coordinate offsets: these do not work with _ALT modes ! */
-      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * 8);
-      PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
-      PUSH_DATA (push, 0); /* 0 */
-      PUSH_DATA (push, 0);
-      PUSH_DATA (push, 1); /* 1 */
-      PUSH_DATA (push, 0);
-      PUSH_DATA (push, 0); /* 2 */
-      PUSH_DATA (push, 1);
-      PUSH_DATA (push, 1); /* 3 */
-      PUSH_DATA (push, 1);
-      PUSH_DATA (push, 2); /* 4 */
-      PUSH_DATA (push, 0);
-      PUSH_DATA (push, 3); /* 5 */
-      PUSH_DATA (push, 0);
-      PUSH_DATA (push, 2); /* 6 */
-      PUSH_DATA (push, 1);
-      PUSH_DATA (push, 3); /* 7 */
-      PUSH_DATA (push, 1);
-   }
-   BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
-   PUSH_DATA (push, 0);
-
    /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
    PUSH_DATA (push, 256);
@@ -1214,6 +1171,50 @@ nvc0_screen_create(struct nouveau_device *dev)
    if (nvc0_screen_init_compute(screen))
       goto fail;
 
+   /* XXX: Compute and 3D are somehow aliased on Fermi. */
+   for (i = 0; i < 5; ++i) {
+      /* TIC and TSC entries for each unit (nve4+ only) */
+      /* auxiliary constants (6 user clip planes, base instance id) */
+      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
+      PUSH_DATA (push, (15 << 4) | 1);
+      if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
+         unsigned j;
+         BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
+         PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
+         for (j = 0; j < 8; ++j)
+            PUSH_DATA(push, j);
+      } else {
+         BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
+         PUSH_DATA (push, 0x54);
+      }
+
+      /* MS sample coordinate offsets: these do not work with _ALT modes ! */
+      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * 8);
+      PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
+      PUSH_DATA (push, 0); /* 0 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 1); /* 1 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 0); /* 2 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 1); /* 3 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 2); /* 4 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 3); /* 5 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 2); /* 6 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 3); /* 7 */
+      PUSH_DATA (push, 1);
+   }
+   BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
+   PUSH_DATA (push, 0);
+
    PUSH_KICK (push);
 
    screen->tic.entries = CALLOC(4096, sizeof(void *));

commit 039d1e6f11c69dfe5d380e1cf568ab579507ef07
Author: Nicolai Hähnle <nicolai.haeh...@amd.com>
Date:   Tue Oct 18 18:40:38 2016 +0200

    radeonsi: fix 64-bit loads from LDS
    
    Fixes spec/arb_tessellation_shader/execution/dvec[23]-vs-tcs-tes, among
    others.
    
    Cc: "12.0 13.0" <mesa-sta...@lists.freedesktop.org>
    Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
    Reviewed-by: Marek Olšák <marek.ol...@amd.com>
    (cherry picked from commit 4a2dbfff05f7be271c2aa72e783e24b31906db51)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 6a42a8f..0ee760f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -911,7 +911,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context 
*bld_base,
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
-                                      lp_build_const_int32(gallivm, swizzle + 
1));
+                                      lp_build_const_int32(gallivm, 1));
                value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false);
                return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
        }

commit ba6efd48c3ab36ea532d0b1b2f5493b6d9b1937b
Author: Nicolai Hähnle <nicolai.haeh...@amd.com>
Date:   Wed Oct 19 18:14:48 2016 +0200

    st/mesa: only set primitive_restart when the restart index is in range
    
    Even when enabled, primitive restart has no effect when the restart index
    is larger than the representable values in the index buffer.
    
    Fixes GL45-CTS.gtf31.GL3Tests.primitive_restart.primitive_restart_upconvert
    for radeonsi VI.
    
    v2: add an explanatory comment
    
    Cc: "12.0 13.0" <mesa-sta...@lists.freedesktop.org>
    Reviewed-by: Marek Olšák <marek.ol...@amd.com> (v1)
    (cherry picked from commit bfa50f88cea2ba9f4dc4b825828d2c8f02866fc3)

diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index f4af23d..5dcaff0 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -205,8 +205,19 @@ st_draw_vbo(struct gl_context *ctx,
       /* The VBO module handles restart for the non-indexed GLDrawArrays
        * so we only set these fields for indexed drawing:
        */
-      info.primitive_restart = ctx->Array._PrimitiveRestart;
-      info.restart_index = _mesa_primitive_restart_index(ctx, ib->type);
+      if (ctx->Array._PrimitiveRestart) {
+         info.restart_index = _mesa_primitive_restart_index(ctx, ib->type);
+
+         /* Enable primitive restart only when the restart index can have an
+          * effect. This is required for correctness in radeonsi VI support,
+          * though other hardware may also benefit from taking a faster,
+          * non-restart path when possible.
+          */
+         if ((ibuffer.index_size >= 4) ||
+             (ibuffer.index_size >= 2 && info.restart_index <= 0xffff) ||
+             (info.restart_index <= 0xff))
+            info.primitive_restart = true;
+      }
    }
    else {
       /* Transform feedback drawing is always non-indexed. */

commit 13f685cf11847ceda63d726031b56a7411f1a087
Author: Nicolai Hähnle <nicolai.haeh...@amd.com>
Date:   Tue Oct 18 17:35:45 2016 +0200

    st/glsl_to_tgsi: sort input and output decls by TGSI index
    
    Fixes a regression introduced by commit 777dcf81b.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98307
    Reviewed-by: Marek Olšák <marek.ol...@amd.com>
    Cc: 13.0 <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit 3d9b57e49304f9690c4a2b50afb8cdd4d253a1df)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 854decc..f376462 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -55,6 +55,7 @@
 #include "st_glsl_types.h"
 #include "st_nir.h"
 
+#include <algorithm>
 
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
                            (1 << PROGRAM_CONSTANT) |     \
@@ -5873,6 +5874,29 @@ emit_compute_block_size(const struct gl_program *program,
                        cp->LocalSize[2]);
 }
 
+struct sort_inout_decls {
+   bool operator()(const struct inout_decl &a, const struct inout_decl &b) 
const {
+      return mapping[a.mesa_index] < mapping[b.mesa_index];
+   }
+
+   const GLuint *mapping;
+};
+
+/* Sort the given array of decls by the corresponding slot (TGSI file index).
+ *
+ * This is for the benefit of older drivers which are broken when the
+ * declarations aren't sorted in this way.
+ */
+static void
+sort_inout_decls_by_slot(struct inout_decl *decls,
+                         unsigned count,
+                         const GLuint mapping[])
+{
+   sort_inout_decls sorter;
+   sorter.mapping = mapping;
+   std::sort(decls, decls + count, sorter);
+}
+
 /**
  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  * \param program  the program to translate
@@ -5945,6 +5969,8 @@ st_translate_program(
    case PIPE_SHADER_GEOMETRY:
    case PIPE_SHADER_TESS_EVAL:
    case PIPE_SHADER_TESS_CTRL:
+      sort_inout_decls_by_slot(program->inputs, program->num_inputs, 
inputMapping);
+
       for (i = 0; i < program->num_inputs; ++i) {
          struct inout_decl *decl = &program->inputs[i];
          unsigned slot = inputMapping[decl->mesa_index];
@@ -5997,6 +6023,8 @@ st_translate_program(
    case PIPE_SHADER_TESS_EVAL:
    case PIPE_SHADER_TESS_CTRL:
    case PIPE_SHADER_VERTEX:
+      sort_inout_decls_by_slot(program->outputs, program->num_outputs, 
outputMapping);
+
       for (i = 0; i < program->num_outputs; ++i) {
          struct inout_decl *decl = &program->outputs[i];
          unsigned slot = outputMapping[decl->mesa_index];

commit 8f807e914f8c7f30f8d57e1829d4cb0bd8836a21
Author: Nicolai Hähnle <nicolai.haeh...@amd.com>
Date:   Sun Oct 16 17:34:33 2016 +0200

    st/glsl_to_tgsi: fix block copies of arrays of structs
    
    Use a full writemask in this case. This is relevant e.g. when a function
    has an inout argument which is an array of structs.
    
    v2: use C-style comment (Timothy Arceri)
    
    Reviewed-by: Marek Olšák <marek.ol...@amd.com> (v1)
    Cc: 13.0 <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit a1895685f8f341e7facf3c5705bdee99860e3082)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 562587e..854decc 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2941,10 +2941,12 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       } else if (ir->write_mask == 0) {
          assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
 
-         if (ir->lhs->type->is_array() || ir->lhs->type->is_matrix()) {
-            unsigned num_elements = 
ir->lhs->type->without_array()->vector_elements;
+         unsigned num_elements = 
ir->lhs->type->without_array()->vector_elements;
+
+         if (num_elements) {
             l.writemask = u_bit_consecutive(0, num_elements);
          } else {
+            /* The type is a struct or an array of (array of) structs. */
             l.writemask = WRITEMASK_XYZW;
          }
       } else {

commit 3581e21d5b0c90a3f3858f0b92f7fe58ead95560
Author: Nicolai Hähnle <nicolai.haeh...@amd.com>
Date:   Sun Oct 16 17:33:51 2016 +0200

    st/glsl_to_tgsi: fix block copies of arrays of doubles
    
    Set the type of the left-hand side to the same as the right-hand side,
    so that when the base type is double, the writemask of the MOV instruction
    is properly fixed up.
    
    Reviewed-by: Marek Olšák <marek.ol...@amd.com>
    Cc: 13.0 <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit ca592af880b71feb8ebbf79f704380d0deb47b33)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 98ebe54..562587e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2880,6 +2880,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, 
const struct glsl_type *
 
    assert(type->is_scalar() || type->is_vector());
 
+   l->type = type->base_type;
    r->type = type->base_type;
    if (cond) {
       st_src_reg l_src = st_src_reg(*l);

commit 52df379d6b3a3485bf2814f8a0fb54c90b0af6ad
Author: Ilia Mirkin <imir...@alum.mit.edu>
Date:   Wed Oct 19 01:20:03 2016 -0400

    nv50/ir: process texture offset sources as regular sources
    
    With ARB_gpu_shader5, texture offsets can be any source, including TEMPs
    and IN's. Make sure to process them as regular sources so that we pick
    up masks, etc.
    
    This should fix some CTS tests that feed offsets directly to
    textureGatherOffset, and we were not picking up the input use, thus not
    advertising it in the shader header.
    
    Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
    Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
    Tested-by: Dave Airlie <airl...@redhat.com>
    Cc: 12.0 13.0 <mesa-sta...@lists.freedesktop.org>
    (cherry picked from commit cd45d758ff87305ceecca899fe7325779bb6755b)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index db03281..01e5808 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -182,6 +182,7 @@ public:
 
    // mask of used components of source s
    unsigned int srcMask(unsigned int s) const;
+   unsigned int texOffsetMask() const;
 
    SrcRegister getSrc(unsigned int s) const
    {
@@ -234,6 +235,35 @@ private:
    const struct tgsi_full_instruction *insn;
 };
 
+unsigned int Instruction::texOffsetMask() const
+{
+   const struct tgsi_instruction_texture *tex = &insn->Texture;
+   assert(insn->Instruction.Texture);
+
+   switch (tex->Texture) {
+   case TGSI_TEXTURE_BUFFER:
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+      return 0x1;
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_2D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_2D_MSAA:
+   case TGSI_TEXTURE_2D_ARRAY_MSAA:
+      return 0x3;
+   case TGSI_TEXTURE_3D:
+      return 0x7;
+   default:
+      assert(!"Unexpected texture target");
+      return 0xf;
+   }
+}
+
 unsigned int Instruction::srcMask(unsigned int s) const
 {
    unsigned int mask = insn->Dst[0].Register.WriteMask;
@@ -955,6 +985,9 @@ private:
    int inferSysValDirection(unsigned sn) const;
    bool scanDeclaration(const struct tgsi_full_declaration *);
    bool scanInstruction(const struct tgsi_full_instruction *);
+   void scanInstructionSrc(const Instruction& insn,
+                           const Instruction::SrcRegister& src,
+                           unsigned mask);
    void scanProperty(const struct tgsi_full_property *);
    void scanImmediate(const struct tgsi_full_immediate *);
 
@@ -1364,6 +1397,61 @@ inline bool Source::isEdgeFlagPassthrough(const 
Instruction& insn) const
       insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
 }
 
+void Source::scanInstructionSrc(const Instruction& insn,
+                                const Instruction::SrcRegister& src,
+                                unsigned mask)
+{
+   if (src.getFile() == TGSI_FILE_TEMPORARY) {
+      if (src.isIndirect(0))
+         indirectTempArrays.insert(src.getArrayId());
+   } else
+   if (src.getFile() == TGSI_FILE_BUFFER ||
+       src.getFile() == TGSI_FILE_IMAGE ||
+       (src.getFile() == TGSI_FILE_MEMORY &&
+        memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+      info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+         0x1 : 0x2;
+   } else
+   if (src.getFile() == TGSI_FILE_OUTPUT) {
+      if (src.isIndirect(0)) {
+         // We don't know which one is accessed, just mark everything for
+         // reading. This is an extremely unlikely occurrence.
+         for (unsigned i = 0; i < info->numOutputs; ++i)
+            info->out[i].oread = 1;
+      } else {
+         info->out[src.getIndex(0)].oread = 1;
+      }
+   }
+   if (src.getFile() != TGSI_FILE_INPUT)
+      return;
+
+   if (src.isIndirect(0)) {
+      for (unsigned i = 0; i < info->numInputs; ++i)
+         info->in[i].mask = 0xf;
+   } else {
+      const int i = src.getIndex(0);
+      for (unsigned c = 0; c < 4; ++c) {
+         if (!(mask & (1 << c)))
+            continue;
+         int k = src.getSwizzle(c);
+         if (k <= TGSI_SWIZZLE_W)
+            info->in[i].mask |= 1 << k;
+      }
+      switch (info->in[i].sn) {
+      case TGSI_SEMANTIC_PSIZE:
+      case TGSI_SEMANTIC_PRIMID:
+      case TGSI_SEMANTIC_FOG:
+         info->in[i].mask &= 0x1;
+         break;
+      case TGSI_SEMANTIC_PCOORD:
+         info->in[i].mask &= 0x3;
+         break;
+      default:
+         break;
+      }
+   }
+}
+
 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)

Reply via email to