[Mesa-dev] [Bug 107275] NIR segfaults after spirv-opt

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107275

--- Comment #1 from mais...@archlinux.us ---
n->next is NULL here.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util/disk_cache: Fix disk_cache_get_function_timestamp with disabled cache.

2018-07-18 Thread Bas Nieuwenhuizen
radv always needs it, so just check the header instead. Also
do not declare the function if the variable is not set, so we
get a nice compile error instead of failing to open a device
at runtime.

Fixes: b87ef9e606a "util: fix MSVC build issue in disk_cache.h"
---
 configure.ac  | 1 +
 meson.build   | 2 +-
 src/util/disk_cache.h | 8 +++-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index c946454cfae..ffb8424a07b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -872,6 +872,7 @@ AC_HEADER_MAJOR
 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_HEADERS([endian.h])
+AC_CHECK_HEADER([dlfcn.h], [DEFINES="$DEFINES -DHAVE_DLFCN_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
 AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
 AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
diff --git a/meson.build b/meson.build
index e05645cbf39..86a4a4ce6da 100644
--- a/meson.build
+++ b/meson.build
@@ -960,7 +960,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
   pre_args += '-DMAJOR_IN_MKDEV'
 endif
 
-foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h']
+foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 
'dlfcn.h']
   if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h))
 pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
   endif
diff --git a/src/util/disk_cache.h b/src/util/disk_cache.h
index f84840fb5ca..50bd9f41ac4 100644
--- a/src/util/disk_cache.h
+++ b/src/util/disk_cache.h
@@ -24,7 +24,7 @@
 #ifndef DISK_CACHE_H
 #define DISK_CACHE_H
 
-#ifdef ENABLE_SHADER_CACHE
+#ifdef HAVE_DLFCN_H
 #include 
 #endif
 #include 
@@ -88,10 +88,10 @@ disk_cache_format_hex_id(char *buf, const uint8_t *hex_id, 
unsigned size)
return buf;
 }
 
+#ifdef HAVE_DLFCN_H
 static inline bool
 disk_cache_get_function_timestamp(void *ptr, uint32_t* timestamp)
 {
-#ifdef ENABLE_SHADER_CACHE
Dl_info info;
struct stat st;
if (!dladdr(ptr, &info) || !info.dli_fname) {
@@ -102,10 +102,8 @@ disk_cache_get_function_timestamp(void *ptr, uint32_t* 
timestamp)
}
*timestamp = st.st_mtime;
return true;
-#else
-   return false;
-#endif
 }
+#endif
 
 /* Provide inlined stub functions if the shader cache is disabled. */
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v6] i965: Fix ETC2/EAC GetCompressed* functions on Gen7 GPUs

2018-07-18 Thread Eleni Maria Stea
Gen 7 GPUs store the compressed EAC/ETC2 images in other non-compressed
formats that can render. When GetCompressed* functions are called, the
pixels are returned in the non-compressed format that is used for the
rendering.

With this patch we store both the compressed and non-compressed versions
of the image, so that both rendering commands and GetCompressed*
commands work.

Also, the assertions for GL_MAP_WRITE_BIT and GL_MAP_INVALIDATE_RANGE_BIT
in intel_miptree_map_etc function have been removed because when the
miptree is mapped for reading (for example from a GetCompress*
function) the GL_MAP_WRITE_BIT won't be set (and shouldn't be set).

Fixes: the following test in CTS for gen7:
KHR-GL45.direct_state_access.textures_compressed_subimage test

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104272
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81843

v2: fixes issues:
   a) initialized uninitialized variables (Juan A. Suarez, Andres Gomez)
   b) fixed race condition where mt and cmt were mapped at the same time
   c) fixed indentation issues (Andres Gomez)
v3: adds bugzilla bug with id: 104272
v4: adds bugzilla bug with id: 81843
v5: replaced the flags with a bitfield, refactoring (Kenneth Graunke)
v6: renamed the r8stencil_mt secondary miptree that is now part of the
intel_miptree_struct to shadow_mt and used it to store the compressed
miptree (Nanley Chery)
---
 .../drivers/dri/i965/brw_wm_surface_state.c   |  8 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 27 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 14 ++-
 src/mesa/drivers/dri/i965/intel_tex.c | 90 ++-
 src/mesa/drivers/dri/i965/intel_tex_image.c   | 46 +-
 src/mesa/main/texstore.c  | 62 -
 src/mesa/main/texstore.h  |  8 ++
 7 files changed, 209 insertions(+), 46 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 9397b637c7..2097fabaeb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -563,15 +563,15 @@ static void brw_update_texture_surface(struct gl_context 
*ctx,
 
   if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) 
{
  if (devinfo->gen <= 7) {
-assert(mt->r8stencil_mt && 
!mt->stencil_mt->r8stencil_needs_update);
-mt = mt->r8stencil_mt;
+assert(mt->shadow_mt && !mt->stencil_mt->shadow_needs_update);
+mt = mt->shadow_mt;
  } else {
 mt = mt->stencil_mt;
  }
  format = ISL_FORMAT_R8_UINT;
   } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
- assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
- mt = mt->r8stencil_mt;
+ assert(mt->shadow_mt && !mt->shadow_needs_update);
+ mt = mt->shadow_mt;
  format = ISL_FORMAT_R8_UINT;
   }
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 7b1f0896ae..6d07fede52 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -719,8 +719,12 @@ miptree_create(struct brw_context *brw,
   }
}
 
-   mt->etc_format = (_mesa_is_format_color_format(format) && mt_fmt != format) 
?
-format : MESA_FORMAT_NONE;
+   if (!(flags & MIPTREE_CREATE_ETC)) {
+  mt->etc_format = (_mesa_is_format_color_format(format) &&
+mt_fmt != format) ? format : MESA_FORMAT_NONE;
+   } else {
+  mt->etc_format = MESA_FORMAT_NONE;
+   }
 
if (!(flags & MIPTREE_CREATE_NO_AUX))
   intel_miptree_choose_aux_usage(brw, mt);
@@ -1214,7 +1218,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
 
   brw_bo_unreference((*mt)->bo);
   intel_miptree_release(&(*mt)->stencil_mt);
-  intel_miptree_release(&(*mt)->r8stencil_mt);
+  intel_miptree_release(&(*mt)->shadow_mt);
   intel_miptree_aux_buffer_free((*mt)->aux_buf);
   free_aux_state_map((*mt)->aux_state);
 
@@ -2426,7 +2430,7 @@ intel_miptree_finish_write(struct brw_context *brw,
switch (mt->aux_usage) {
case ISL_AUX_USAGE_NONE:
   if (mt->format == MESA_FORMAT_S_UINT8 && devinfo->gen <= 7)
- mt->r8stencil_needs_update = true;
+ mt->shadow_needs_update = true;
   break;
 
case ISL_AUX_USAGE_MCS:
@@ -2919,9 +2923,9 @@ intel_update_r8stencil(struct brw_context *brw,
 
assert(src->surf.size > 0);
 
-   if (!mt->r8stencil_mt) {
+   if (!mt->shadow_mt) {
   assert(devinfo->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
-  mt->r8stencil_mt = make_surface(
+  mt->shadow_mt = make_surface(
 brw,
 src->target,
 MESA_FORMAT_R_UINT8,
@@ -2935,13 +2939,13 @@ intel_update_r8stencil(struct brw_con

[Mesa-dev] [Bug 107275] NIR segfaults after spirv-opt

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107275

--- Comment #2 from Bas Nieuwenhuizen  ---
Does 

https://patchwork.freedesktop.org/patch/238923/

fix this?

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/4] Android kms_swrast support

2018-07-18 Thread Rob Herring
On Tue, Jul 17, 2018 at 4:33 AM Robert Foss  wrote:
>
> This series implements kms_swrast support for the Android
> platform. And since having to debug a null pointer dereference,
> simplify that process for the next guy.

So is this working for you now?

> As it stands now, any kernel must have the following ioctls flagged with
> DRM_RENDER_ALLOW[1], which isn't the case in the mainline kernel.
>
> DRM_IOCTL_MODE_CREATE_DUMB
> DRM_IOCTL_MODE_MAP_DUMB

Ah, sorry. I should have mentioned this. We have discussed this issue
in the past, but to no further conclusion.

But as I recall, I thought the issue was also allowing import and
export of dumb buffers?

> While it would be possible to open a non-render node to pass the
> authentication check, this would still cause authentication issues
> when the /dev/dri/cardX node needs to be opened as master by both mesa
> and the compositor.

Right. We've pretty much stripped the support that was there out. Plus
I don't think it will work with Treble.

> I don't know how acceptable this series is for upstreaming, while relying on
> a non-mainline kernel. I think the policy is to not accept changes that
> don't have both a user and kernel space solution in place.
>
> Like I noted yesterday[2] the alternative to using dumb buffers and having
> authentication issues is using VGEM, which is new territory to me, and it 
> would
> take me a little bit of time to figure exactly how it fits into the current
> kms_swrast approach.
> Input, like noted before, is very much welcome.

I'm very much in favor of the former approach. VGEM seems like an
overly complicated solution when there's a very simple solution.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107276] radv: OpBitfieldUExtract returns incorrect result when count is zero

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107276

Bug ID: 107276
   Summary: radv: OpBitfieldUExtract returns incorrect result when
count is zero
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: philip.rebo...@tu-dortmund.de
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 140691
  --> https://bugs.freedesktop.org/attachment.cgi?id=140691&action=edit
One of the affected shaders (GLSL version)

Hello,

when building Mesa with LLVM 7.0-svn, OpBitfieldUExtract sometimes returns the
original source operand instead of zero when the bit count is zero. With LLVM
6.0.1, it works as expected.

This causes the following bug in DXVK:
https://github.com/doitsujin/dxvk/issues/497

Here's a renderdoc capture (recorded on Polaris) showing the issue:
https://mega.nz/#!8W5nDTxT!P7PI4BZ_gpmZDIoh1Iziq0PHpLp448hiUqJN9iEhukg

The attached shader is a GLSL version of the vertex shader used in the
bookmarked draw call. I added some comments around like 220 for observed vs.
expected behaviour of the shader code.

I tried to reproduce the problem outside of DXVK, but had no luck so far: A
small demo app testing the GLSL bitfieldExtract function in a compute shader
always returns the correct values.

Handling the count = 0 case explicitly in the NIR->LLVM translation fixes the
issue in Far Cry 5.

Regards,
- Philip

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107276] radv: OpBitfieldUExtract returns incorrect result when count is zero

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107276

--- Comment #1 from Philip Rebohle  ---
Created attachment 140692
  --> https://bugs.freedesktop.org/attachment.cgi?id=140692&action=edit
Workaround that fixes the rendering issue

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107276] radv: OpBitfieldUExtract returns incorrect result when count is zero

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107276

--- Comment #2 from Philip Rebohle  ---
Created attachment 140693
  --> https://bugs.freedesktop.org/attachment.cgi?id=140693&action=edit
Final image (correct)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107276] radv: OpBitfieldUExtract returns incorrect result when count is zero

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107276

--- Comment #3 from Philip Rebohle  ---
Created attachment 140694
  --> https://bugs.freedesktop.org/attachment.cgi?id=140694&action=edit
Final image (broken)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107276] radv: OpBitfieldUExtract returns incorrect result when count is zero

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107276

--- Comment #4 from Philip Rebohle  ---
Created attachment 140695
  --> https://bugs.freedesktop.org/attachment.cgi?id=140695&action=edit
Demo app that does not trigger the problem

I used this small sample app in an attempt to reproduce the problem, but for me
it always returns 0 for the count=0 case for some reason.

Please note that the queue family and memory type indices are hardcoded.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] winsys/amdgpu: fix VDPAU interop by having one amdgpu_winsys_bo per BO

2018-07-18 Thread Leo Liu

The series are

Acked-by: Leo Liu 


On 07/16/2018 06:03 PM, Leo Liu wrote:



On 2018-07-16 04:01 PM, Marek Olšák wrote:

From: Marek Olšák 

Dependencies between rings are inserted correctly if a buffer is
represented by only one unique amdgpu_winsys_bo instance.
Use a hash table keyed by amdgpu_bo_handle to have exactly one
amdgpu_winsys_bo per amdgpu_bo_handle.

The series are:
Tested-by: Leo Liu 



v2: return offset and stride properly
---
  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 36 ---
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  5 +++
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  5 +++
  3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c

index d9192c209e2..80563d3df98 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -21,20 +21,21 @@
   * USE OR OTHER DEALINGS IN THE SOFTWARE.
   *
   * The above copyright notice and this permission notice (including 
the
   * next paragraph) shall be included in all copies or substantial 
portions

   * of the Software.
   */
    #include "amdgpu_cs.h"
    #include "util/os_time.h"
+#include "util/u_hash_table.h"
  #include "state_tracker/drm_driver.h"
  #include 
  #include 
  #include 
  #include 
    #ifndef AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
  #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6)
  #endif
  @@ -172,20 +173,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
   assert(bo->bo && "must not be called for slab entries");
   if (ws->debug_all_bos) {
    simple_mtx_lock(&ws->global_bo_list_lock);
    LIST_DEL(&bo->u.real.global_list_item);
    ws->num_buffers--;
    simple_mtx_unlock(&ws->global_bo_list_lock);
 }
  +   simple_mtx_lock(&ws->bo_export_table_lock);
+   util_hash_table_remove(ws->bo_export_table, bo->bo);
+   simple_mtx_unlock(&ws->bo_export_table_lock);
+
 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, 
AMDGPU_VA_OP_UNMAP);

 amdgpu_va_range_free(bo->u.real.va_handle);
 amdgpu_bo_free(bo->bo);
   amdgpu_bo_remove_fences(bo);
   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
    ws->allocated_vram -= align64(bo->base.size, 
ws->info.gart_page_size);

 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
    ws->allocated_gtt -= align64(bo->base.size, 
ws->info.gart_page_size);
@@ -1278,24 +1283,41 @@ static struct pb_buffer 
*amdgpu_bo_from_handle(struct radeon_winsys *rws,

 case WINSYS_HANDLE_TYPE_SHARED:
    type = amdgpu_bo_handle_type_gem_flink_name;
    break;
 case WINSYS_HANDLE_TYPE_FD:
    type = amdgpu_bo_handle_type_dma_buf_fd;
    break;
 default:
    return NULL;
 }
  +   if (stride)
+  *stride = whandle->stride;
+   if (offset)
+  *offset = whandle->offset;
+
 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
 if (r)
    return NULL;
  +   simple_mtx_lock(&ws->bo_export_table_lock);
+   bo = util_hash_table_get(ws->bo_export_table, result.buf_handle);
+
+   /* If the amdgpu_winsys_bo instance already exists, bump the 
reference

+    * counter and return it.
+    */
+   if (bo) {
+  p_atomic_inc(&bo->base.reference.count);
+  simple_mtx_unlock(&ws->bo_export_table_lock);
+  return &bo->base;
+   }
+
 /* Get initial domains. */
 r = amdgpu_bo_query_info(result.buf_handle, &info);
 if (r)
    goto error;
   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
   result.alloc_size, 1 << 20, 0, &va, 
&va_handle,

   AMDGPU_VA_RANGE_HIGH);
 if (r)
    goto error;
@@ -1319,49 +1341,49 @@ static struct pb_buffer 
*amdgpu_bo_from_handle(struct radeon_winsys *rws,

 bo->bo = result.buf_handle;
 bo->base.size = result.alloc_size;
 bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
 bo->ws = ws;
 bo->va = va;
 bo->u.real.va_handle = va_handle;
 bo->initial_domain = initial;
 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
 bo->is_shared = true;
  -   if (stride)
-  *stride = whandle->stride;
-   if (offset)
-  *offset = whandle->offset;
-
 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
    ws->allocated_vram += align64(bo->base.size, 
ws->info.gart_page_size);

 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
    ws->allocated_gtt += align64(bo->base.size, 
ws->info.gart_page_size);

   amdgpu_add_buffer_to_global_list(bo);
  +   util_hash_table_set(ws->bo_export_table, bo->bo, bo);
+   simple_mtx_unlock(&ws->bo_export_table_lock);
+
 return &bo->base;
    error:
+   simple_mtx_unlock(&ws->bo_export_table_lock);
 if (bo)
    FREE(bo);
 if (va_handle)
    amdgpu_va_range_free(va_handle);
 amdgpu_bo_free(result.buf_handle);
 return NULL;
  }
    static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
   

[Mesa-dev] [Bug 107275] NIR segfaults after spirv-opt

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107275

--- Comment #3 from mais...@archlinux.us ---
Yes, doesn't segfault now. Seems to work as expected.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] radv: make use of radv_subpass_barrier() when resolving subpasses

2018-07-18 Thread Samuel Pitoiset
The goal is to use radv_barrier()/radv_subpass_barrier() as
much as possible for further optimizations.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  |  3 ++-
 src/amd/vulkan/radv_meta_resolve_cs.c | 16 +---
 src/amd/vulkan/radv_meta_resolve_fs.c | 13 ++---
 src/amd/vulkan/radv_private.h |  3 +++
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 041ebf0ca3..b67f0ffdbe 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2048,7 +2048,8 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
return flush_bits;
 }
 
-static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const 
struct radv_subpass_barrier *barrier)
+void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_subpass_barrier *barrier)
 {
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, 
barrier->src_access_mask,
  NULL);
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c 
b/src/amd/vulkan/radv_meta_resolve_cs.c
index daf11e0576..ad02594614 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -473,6 +473,8 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer 
*cmd_buffer)
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
struct radv_meta_saved_state saved_state;
+   struct radv_subpass_barrier barrier;
+
/* FINISHME(perf): Skip clears for resolve attachments.
 *
 * From the Vulkan 1.0 spec:
@@ -485,13 +487,13 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer 
*cmd_buffer)
if (!subpass->has_resolve)
return;
 
-   /* Resolves happen before the end-of-subpass barriers get executed,
-* so we have to make the attachment shader-readable */
-   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-   RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-   RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
-   RADV_CMD_FLAG_INV_GLOBAL_L2 |
-   RADV_CMD_FLAG_INV_VMEM_L1;
+   /* Resolves happen before the end-of-subpass barriers get executed, so
+* we have to make the attachment shader-readable.
+*/
+   barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+   barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+   radv_subpass_barrier(cmd_buffer, &barrier);
 
radv_decompress_resolve_subpass_src(cmd_buffer);
 
diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c 
b/src/amd/vulkan/radv_meta_resolve_fs.c
index 5f4f241893..0e4957b163 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -580,6 +580,7 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer 
*cmd_buffer)
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
struct radv_meta_saved_state saved_state;
+   struct radv_subpass_barrier barrier;
 
/* FINISHME(perf): Skip clears for resolve attachments.
 *
@@ -600,13 +601,11 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer 
*cmd_buffer)
 
/* Resolves happen before the end-of-subpass barriers get executed,
 * so we have to make the attachment shader-readable */
-   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
-   RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-   RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
-   RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-   RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
-   RADV_CMD_FLAG_INV_GLOBAL_L2 |
-   RADV_CMD_FLAG_INV_VMEM_L1;
+   barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+   barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+   barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+   radv_subpass_barrier(cmd_buffer, &barrier);
 
radv_decompress_resolve_subpass_src(cmd_buffer);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 338cb07b3e..f17efeeccd 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1713,6 +1713,9 @@ struct radv_subpass_barrier {
VkAccessFlagsdst_access_m

[Mesa-dev] [PATCH 4/4] radv: simplify a condition in radv_src_access_flush()

2018-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b67f0ffdbe..4a6b5fdcd9 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1970,10 +1970,12 @@ radv_src_access_flush(struct radv_cmd_buffer 
*cmd_buffer,
enum radv_cmd_flush_bits flush_bits = 0;
uint32_t b;
 
-   if (image && !radv_image_has_CB_metadata(image))
-   flush_CB_meta = false;
-   if (image && !radv_image_has_htile(image))
-   flush_DB_meta = false;
+   if (image) {
+   if (!radv_image_has_CB_metadata(image))
+   flush_CB_meta = false;
+   if (!radv_image_has_htile(image))
+   flush_DB_meta = false;
+   }
 
for_each_bit(b, src_flags) {
switch ((VkAccessFlagBits)(1 << b)) {
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radv: don't check if a subpass has resolve attachments twice

2018-07-18 Thread Samuel Pitoiset
We already check that in radv_cmd_buffer_resolve_subpass().

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_resolve_cs.c | 12 
 src/amd/vulkan/radv_meta_resolve_fs.c | 12 
 2 files changed, 24 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c 
b/src/amd/vulkan/radv_meta_resolve_cs.c
index ad02594614..2d79cb09fe 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -475,18 +475,6 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer 
*cmd_buffer)
struct radv_meta_saved_state saved_state;
struct radv_subpass_barrier barrier;
 
-   /* FINISHME(perf): Skip clears for resolve attachments.
-*
-* From the Vulkan 1.0 spec:
-*
-*If the first use of an attachment in a render pass is as a resolve
-*attachment, then the loadOp is effectively ignored as the resolve 
is
-*guaranteed to overwrite all pixels in the render area.
-*/
-
-   if (!subpass->has_resolve)
-   return;
-
/* Resolves happen before the end-of-subpass barriers get executed, so
 * we have to make the attachment shader-readable.
 */
diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c 
b/src/amd/vulkan/radv_meta_resolve_fs.c
index 0e4957b163..3feeb45897 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -582,18 +582,6 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer 
*cmd_buffer)
struct radv_meta_saved_state saved_state;
struct radv_subpass_barrier barrier;
 
-   /* FINISHME(perf): Skip clears for resolve attachments.
-*
-* From the Vulkan 1.0 spec:
-*
-*If the first use of an attachment in a render pass is as a resolve
-*attachment, then the loadOp is effectively ignored as the resolve 
is
-*guaranteed to overwrite all pixels in the render area.
-*/
-
-   if (!subpass->has_resolve)
-   return;
-
radv_meta_save(&saved_state, cmd_buffer,
   RADV_META_SAVE_GRAPHICS_PIPELINE |
   RADV_META_SAVE_CONSTANTS |
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] radv: save current state just before resolving with FS

2018-07-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_resolve_fs.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c 
b/src/amd/vulkan/radv_meta_resolve_fs.c
index 3feeb45897..7c6ddf513c 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -582,11 +582,6 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer 
*cmd_buffer)
struct radv_meta_saved_state saved_state;
struct radv_subpass_barrier barrier;
 
-   radv_meta_save(&saved_state, cmd_buffer,
-  RADV_META_SAVE_GRAPHICS_PIPELINE |
-  RADV_META_SAVE_CONSTANTS |
-  RADV_META_SAVE_DESCRIPTORS);
-
/* Resolves happen before the end-of-subpass barriers get executed,
 * so we have to make the attachment shader-readable */
barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
@@ -597,6 +592,11 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer 
*cmd_buffer)
 
radv_decompress_resolve_subpass_src(cmd_buffer);
 
+   radv_meta_save(&saved_state, cmd_buffer,
+  RADV_META_SAVE_GRAPHICS_PIPELINE |
+  RADV_META_SAVE_CONSTANTS |
+  RADV_META_SAVE_DESCRIPTORS);
+
for (uint32_t i = 0; i < subpass->color_count; ++i) {
struct radv_subpass_attachment src_att = 
subpass->color_attachments[i];
struct radv_subpass_attachment dest_att = 
subpass->resolve_attachments[i];
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel: tools: dump: remove command execution feature

2018-07-18 Thread Lionel Landwerlin
In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffer aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't that code anymore.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c  | 73 ---
 src/intel/tools/intel_dump_gpu.in | 27 +---
 2 files changed, 10 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 766ba662d91..6758cab13c4 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -254,7 +254,7 @@ static int (*libc_ioctl)(int fd, unsigned long request, 
...) = ioctl_init_helper
 
 static int drm_fd = -1;
 static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static FILE *aub_file = NULL;
 static struct gen_device_info devinfo = {0};
 static int verbose = 0;
 static bool device_override;
@@ -320,13 +320,8 @@ align_u32(uint32_t v, uint32_t a)
 static void
 dword_out(uint32_t data)
 {
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-  if (files[i] == NULL)
- continue;
-
-  fail_if(fwrite(&data, 1, 4, files[i]) == 0,
-  "Writing to output failed\n");
-   }
+   fail_if(fwrite(&data, 1, 4, aub_file) == 0,
+   "Writing to output failed\n");
 }
 
 static void
@@ -335,13 +330,8 @@ data_out(const void *data, size_t size)
if (size == 0)
   return;
 
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-  if (files[i] == NULL)
- continue;
-
-  fail_if(fwrite(data, 1, size, files[i]) == 0,
-  "Writing to output failed\n");
-   }
+   fail_if(fwrite(data, 1, size, aub_file) == 0,
+   "Writing to output failed\n");
 }
 
 static uint32_t
@@ -990,10 +980,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   ring_flag);
}
 
-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-  if (files[i] != NULL)
- fflush(files[i]);
-   }
+   fflush(aub_file);
 
if (device_override &&
(execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
@@ -1044,40 +1031,6 @@ close(int fd)
return libc_close(fd);
 }
 
-static FILE *
-launch_command(char *command)
-{
-   int i = 0, fds[2];
-   char **args = calloc(strlen(command), sizeof(char *));
-   char *iter = command;
-
-   args[i++] = iter = command;
-
-   while ((iter = strstr(iter, ",")) != NULL) {
-  *iter = '\0';
-  iter += 1;
-  args[i++] = iter;
-   }
-
-   if (pipe(fds) == -1)
-  return NULL;
-
-   switch (fork()) {
-   case 0:
-  dup2(fds[0], 0);
-  fail_if(execvp(args[0], args) == -1,
-  "intel_aubdump: failed to launch child command\n");
-  return NULL;
-
-   default:
-  free(args);
-  return fdopen(fds[1], "w");
-
-   case -1:
-  return NULL;
-   }
-}
-
 static void
 maybe_init(void)
 {
@@ -1105,15 +1058,10 @@ maybe_init(void)
  device_override = true;
   } else if (!strcmp(key, "file")) {
  filename = strdup(value);
- files[0] = fopen(filename, "w+");
- fail_if(files[0] == NULL,
+ aub_file = fopen(filename, "w+");
+ fail_if(aub_file == NULL,
  "intel_aubdump: failed to open file '%s'\n",
  filename);
-  } else if (!strcmp(key,  "command")) {
- files[1] = launch_command(value);
- fail_if(files[1] == NULL,
- "intel_aubdump: failed to launch command '%s'\n",
- value);
   } else {
  fprintf(stderr, "intel_aubdump: unknown option '%s'\n", key);
   }
@@ -1285,9 +1233,6 @@ static void __attribute__ ((destructor))
 fini(void)
 {
free(filename);
-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-  if (files[i] != NULL)
- fclose(files[i]);
-   }
+   fclose(aub_file);
free(bos);
 }
diff --git a/src/intel/tools/intel_dump_gpu.in 
b/src/intel/tools/intel_dump_gpu.in
index b9887f0ed2e..9eea37189db 100755
--- a/src/intel/tools/intel_dump_gpu.in
+++ b/src/intel/tools/intel_dump_gpu.in
@@ -10,9 +10,6 @@ contents and execution of the GEM application.
 
   -o, --output=FILE  Name of AUB file. Defaults to COMMAND.aub
 
-  -c, --command=CMD  Execute CMD and write the AUB file's content to its
- standard input
-
   --device=IDOverride PCI ID of the reported device
 
   -v Enable verbose output
@@ -27,7 +24,6 @@ EOF
 }
 
 args=""
-command=""
 file=""
 
 function add_arg() {
@@ -35,17 +31,6 @@ function add_arg() {
 args="$args$arg\n"
 }
 
-function build_command () {
-command=""
-for i in $1; do
-if [ -z $command ]; then
-command=$i
-else
-command="$command,$i"
-fi;
-done
-}
-
 while true

Re: [Mesa-dev] [PATCH 2/4] intel/tools: Refactor aub dumping to remove singletons

2018-07-18 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 18/07/18 00:05, Jason Ekstrand wrote:

Instead of having quite so many singletons, we use a struct aub_file to
organize the bits we need for writing an aub file.
---
  src/intel/tools/intel_dump_gpu.c | 498 ++-
  1 file changed, 287 insertions(+), 211 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 5a0283960c1..aad34cc24f4 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -255,11 +255,8 @@ static int (*libc_ioctl)(int fd, unsigned long request, 
...) = ioctl_init_helper
  static int drm_fd = -1;
  static char *filename = NULL;
  static FILE *files[2] = { NULL, NULL };
-static struct gen_device_info devinfo = {0};
  static int verbose = 0;
  static bool device_override;
-static uint32_t device;
-static int addr_bits = 0;
  
  #define MAX_BO_COUNT 64 * 1024
  
@@ -280,11 +277,6 @@ static struct bo *bos;

  #define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG)
  #define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) )
  
-static inline bool use_execlists(void)

-{
-   return devinfo.gen >= 8;
-}
-
  static void __attribute__ ((format(__printf__, 2, 3)))
  fail_if(int cond, const char *format, ...)
  {
@@ -317,82 +309,121 @@ align_u32(uint32_t v, uint32_t a)
 return (v + a - 1) & ~(a - 1);
  }
  
+struct aub_ppgtt_table {

+   uint64_t phys_addr;
+   struct aub_ppgtt_table *subtables[512];
+};
+
  static void
-dword_out(uint32_t data)
+aub_ppgtt_table_finish(struct aub_ppgtt_table *table)
  {
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-  if (files[i] == NULL)
- continue;
-
-  fail_if(fwrite(&data, 1, 4, files[i]) == 0,
-  "Writing to output failed\n");
+   for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) {
+  aub_ppgtt_table_finish(table->subtables[i]);
+  free(table->subtables[i]);
 }
  }
  
+struct aub_file {

+   FILE *file;
+
+   /* Set if you want extra logging */
+   FILE *verbose_log_file;
+
+   uint16_t pci_id;
+   struct gen_device_info devinfo;
+
+   int addr_bits;
+
+   struct aub_ppgtt_table pml4;
+};
+
  static void
-data_out(const void *data, size_t size)
+aub_file_init(struct aub_file *aub, FILE *file, uint16_t pci_id)
+{
+   memset(aub, 0, sizeof(*aub));
+
+   aub->file = file;
+   aub->pci_id = pci_id;
+   fail_if(!gen_get_device_info(pci_id, &aub->devinfo),
+   "failed to identify chipset=0x%x\n", pci_id);
+   aub->addr_bits = aub->devinfo.gen >= 8 ? 48 : 32;
+
+   aub->pml4.phys_addr = PML4_PHYS_ADDR;
+}
+
+static void
+aub_file_finish(struct aub_file *aub)
+{
+   aub_ppgtt_table_finish(&aub->pml4);
+   fclose(aub->file);
+}
+
+static inline bool aub_use_execlists(const struct aub_file *aub)
+{
+   return aub->devinfo.gen >= 8;
+}
+
+static void
+data_out(struct aub_file *aub, const void *data, size_t size)
  {
 if (size == 0)
return;
  
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {

-  if (files[i] == NULL)
- continue;
+   fail_if(fwrite(data, 1, size, aub->file) == 0,
+   "Writing to output failed\n");
+}
  
-  fail_if(fwrite(data, 1, size, files[i]) == 0,

-  "Writing to output failed\n");
-   }
+static void
+dword_out(struct aub_file *aub, uint32_t data)
+{
+   data_out(aub, &data, sizeof(data));
  }
  
  static uint32_t

-gtt_size(void)
+aub_gtt_size(struct aub_file *aub)
  {
-   return NUM_PT_ENTRIES * (addr_bits > 32 ? GEN8_PTE_SIZE : PTE_SIZE);
+   return NUM_PT_ENTRIES * (aub->addr_bits > 32 ? GEN8_PTE_SIZE : PTE_SIZE);
  }
  
  static void

-mem_trace_memory_write_header_out(uint64_t addr, uint32_t len,
-  uint32_t addr_space)
+mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
+  uint32_t len, uint32_t addr_space)
  {
 uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
  
-   dword_out(CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));

-   dword_out(addr & 0x);   /* addr lo */
-   dword_out(addr >> 32);   /* addr hi */
-   dword_out(addr_space);   /* gtt */
-   dword_out(len);
+   dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
+   dword_out(aub, addr & 0x);   /* addr lo */
+   dword_out(aub, addr >> 32);   /* addr hi */
+   dword_out(aub, addr_space);   /* gtt */
+   dword_out(aub, len);
  }
  
  static void

-register_write_out(uint32_t addr, uint32_t value)
+register_write_out(struct aub_file *aub, uint32_t addr, uint32_t value)
  {
 uint32_t dwords = 1;
  
-   dword_out(CMD_MEM_TRACE_REGISTER_WRITE | (5 + dwords - 1));

-   dword_out(addr);
-   dword_out(AUB_MEM_TRACE_REGISTER_SIZE_DWORD |
- AUB_MEM_TRACE_REGISTER_SPACE_MMIO);
-   dword_out(0x);   /* mask lo */
-   dword_out(0x);   /* mask hi */
-   dword_out(value);
+   dword_out(aub, CMD_MEM_TRACE_REGISTER_WRITE | (5 + dwords - 1));
+   dword_out(aub, addr);
+   dword_out(aub, AU

Re: [Mesa-dev] [PATCH 3/4] intel/tools: Break aub file writing into a helper

2018-07-18 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 18/07/18 00:05, Jason Ekstrand wrote:

---
  src/intel/tools/aub_write.c  | 764 +++
  src/intel/tools/aub_write.h  |  96 
  src/intel/tools/intel_dump_gpu.c | 762 +-
  src/intel/tools/meson.build  |   2 +-
  4 files changed, 862 insertions(+), 762 deletions(-)
  create mode 100644 src/intel/tools/aub_write.c
  create mode 100644 src/intel/tools/aub_write.h

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
new file mode 100644
index 000..2ec045830a8
--- /dev/null
+++ b/src/intel/tools/aub_write.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "aub_write.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include "i915_drm.h"
+#include "intel_aub.h"
+
+#ifndef ALIGN
+#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
+#endif
+
+#define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
+#define MI_LRI_FORCE_POSTED   (1<<12)
+
+#define MI_BATCH_NON_SECURE_I965 (1 << 8)
+
+#define MI_BATCH_BUFFER_END (0xA << 23)
+
+#define min(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a < _b ? _a : _b; \
+  })
+
+#define max(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a > _b ? _a : _b; \
+  })
+
+#define HWS_PGA_RCSUNIT  0x02080
+#define HWS_PGA_VCSUNIT0   0x12080
+#define HWS_PGA_BCSUNIT  0x22080
+
+#define GFX_MODE_RCSUNIT   0x0229c
+#define GFX_MODE_VCSUNIT0   0x1229c
+#define GFX_MODE_BCSUNIT   0x2229c
+
+#define EXECLIST_SUBMITPORT_RCSUNIT   0x02230
+#define EXECLIST_SUBMITPORT_VCSUNIT0   0x12230
+#define EXECLIST_SUBMITPORT_BCSUNIT   0x22230
+
+#define EXECLIST_STATUS_RCSUNIT  0x02234
+#define EXECLIST_STATUS_VCSUNIT0   0x12234
+#define EXECLIST_STATUS_BCSUNIT  0x22234
+
+#define EXECLIST_SQ_CONTENTS0_RCSUNIT   0x02510
+#define EXECLIST_SQ_CONTENTS0_VCSUNIT0   0x12510
+#define EXECLIST_SQ_CONTENTS0_BCSUNIT   0x22510
+
+#define EXECLIST_CONTROL_RCSUNIT   0x02550
+#define EXECLIST_CONTROL_VCSUNIT0   0x12550
+#define EXECLIST_CONTROL_BCSUNIT   0x22550
+
+#define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024)
+
+#define PTE_SIZE 4
+#define GEN8_PTE_SIZE 8
+
+#define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096)
+#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096)
+
+#define RING_SIZE (1 * 4096)
+#define PPHWSP_SIZE (1 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
+
+
+#define STATIC_GGTT_MAP_START 0
+
+#define RENDER_RING_ADDR STATIC_GGTT_MAP_START
+#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE)
+
+#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN10_LR_CONTEXT_RENDER_SIZE)
+#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE)
+
+#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE)
+
+#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
+
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
+
+#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
+ * PPGTT Enabled |
+ * Legacy Context with 64 bit VA support |
+ * Valid
+ */
+
+

Re: [Mesa-dev] [PATCH 4/4] intel/tools: Add an error state to aub translator

2018-07-18 Thread Lionel Landwerlin

Awesome tool! And not even too much code!

Reviewed-by: Lionel Landwerlin 

In the interest of consistency and because this is an installable 
binary, I would rename it.

Now for the hard part : intel_error2aub ?

-
Lionel

On 18/07/18 00:05, Jason Ekstrand wrote:

---
  src/intel/tools/error2aub.c | 332 
  src/intel/tools/meson.build |  11 ++
  2 files changed, 343 insertions(+)
  create mode 100644 src/intel/tools/error2aub.c

diff --git a/src/intel/tools/error2aub.c b/src/intel/tools/error2aub.c
new file mode 100644
index 000..ece41d93e6c
--- /dev/null
+++ b/src/intel/tools/error2aub.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright © 2007-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "aub_write.h"
+#include "i915_drm.h"
+#include "intel_aub.h"
+
+static void __attribute__ ((format(__printf__, 2, 3)))
+fail_if(int cond, const char *format, ...)
+{
+   va_list args;
+
+   if (!cond)
+  return;
+
+   va_start(args, format);
+   vfprintf(stderr, format, args);
+   va_end(args);
+
+   raise(SIGTRAP);
+}
+
+#define fail(...) fail_if(true, __VA_ARGS__)
+
+static int zlib_inflate(uint32_t **ptr, int len)
+{
+   struct z_stream_s zstream;
+   void *out;
+   const uint32_t out_size = 128*4096;  /* approximate obj size */
+
+   memset(&zstream, 0, sizeof(zstream));
+
+   zstream.next_in = (unsigned char *)*ptr;
+   zstream.avail_in = 4*len;
+
+   if (inflateInit(&zstream) != Z_OK)
+  return 0;
+
+   out = malloc(out_size);
+   zstream.next_out = out;
+   zstream.avail_out = out_size;
+
+   do {
+  switch (inflate(&zstream, Z_SYNC_FLUSH)) {
+  case Z_STREAM_END:
+ goto end;
+  case Z_OK:
+ break;
+  default:
+ inflateEnd(&zstream);
+ return 0;
+  }
+
+  if (zstream.avail_out)
+ break;
+
+  out = realloc(out, 2*zstream.total_out);
+  if (out == NULL) {
+ inflateEnd(&zstream);
+ return 0;
+  }
+
+  zstream.next_out = (unsigned char *)out + zstream.total_out;
+  zstream.avail_out = zstream.total_out;
+   } while (1);
+ end:
+   inflateEnd(&zstream);
+   free(*ptr);
+   *ptr = out;
+   return zstream.total_out / 4;
+}
+
+static int ascii85_decode(const char *in, uint32_t **out, bool inflate)
+{
+   int len = 0, size = 1024;
+
+   *out = realloc(*out, sizeof(uint32_t)*size);
+   if (*out == NULL)
+  return 0;
+
+   while (*in >= '!' && *in <= 'z') {
+  uint32_t v = 0;
+
+  if (len == size) {
+ size *= 2;
+ *out = realloc(*out, sizeof(uint32_t)*size);
+ if (*out == NULL)
+return 0;
+  }
+
+  if (*in == 'z') {
+ in++;
+  } else {
+ v += in[0] - 33; v *= 85;
+ v += in[1] - 33; v *= 85;
+ v += in[2] - 33; v *= 85;
+ v += in[3] - 33; v *= 85;
+ v += in[4] - 33;
+ in += 5;
+  }
+  (*out)[len++] = v;
+   }
+
+   if (!inflate)
+  return len;
+
+   return zlib_inflate(out, len);
+}
+
+static void
+print_help(const char *progname, FILE *file)
+{
+   fprintf(file,
+   "Usage: %s [OPTION]... [FILE]\n"
+   "Convert an Intel GPU i915 error state to an aub file.\n"
+   "  -h, --help  display this help and exit\n"
+   "  -o, --output=FILE   the output aub file (default FILE.aub)\n",
+   progname);
+}
+
+int
+main(int argc, char *argv[])
+{
+   int i, c;
+   bool help = false;
+   char *out_filename = NULL, *in_filename = NULL;
+   const struct option aubinator_opts[] = {
+  { "help",   no_argument,   NULL, 'h' },
+  { "output", required_argument, NULL, 'o' },
+  { NULL, 0, NULL, 0 }
+   };
+
+   i = 0;
+   while ((c = getopt_long(argc, argv, "ho:", aubinator_opts, &i)) !=

Re: [Mesa-dev] [PATCH v5] i965: Fix ETC2/EAC GetCompressed* functions on Gen7 GPUs

2018-07-18 Thread Eleni Maria Stea
On 07/10/2018 03:10 AM, Nanley Chery wrote:
> On Thu, Jun 14, 2018 at 10:50:57PM +0300, Eleni Maria Stea wrote:
>> On 06/14/2018 10:27 PM, Nanley Chery wrote:
>>
>>> +Jason, Ken
>>>
>>> Hello,
>>>
>>> I recently did some miptree work relating to the r8stencil_mt and I
>>> think I now have a more informed opinion about how things should be
>>> structured. I'd like to propose an alternative solution.
>>>
>>> I had initially thought we should have a separate miptree to hold the
>>> compressed data, like this patch does, but now I think we should
>>> actually have the compressed data be the main miptree and to store the
>>> decompressed miptree as part of the main one. The reasoning is that we
>>> could reuse this structure to handle the r8stencil workaround and to
>>> eventually handle the ASTC_LDR surfaces that are modified on gen9.
>>>
>>> I'm proposing something like the following:
>>>
>>> 1. Rename r8stencil_mt ->shadow_mt and
>>>r8stencil_needs_update -> shadow_needs_update.
>>> 2. Make shadow_mt hold the decompressed ETC miptree
>>> 3. Update shadow_needs_update whenever the main mt is modified
>>> 4. Add an function to update the shadow_mt using the main mt as a source
>>> 5. Sample from the shadow_mt as appropriate
>>> 6. Make the main miptree hold the compressed data
>>>
>>> This method should also be able to handle the CopyImage functions. What
>>> do you all think?
>>>
>>> -Nanley
>>
>> Hi Nanley,
>>
>> Thank you for your reply. I wasn't aware that there are other cases we
>> might need to store a 2nd image. I agree that it's more reasonable to
>> use one generic purpose miptree that can be accessible from different
>> parts of the i965 code for such cases instead of storing miptrees in
>> different places for different hacks when a feature is not supported.
>>
>> I will search your patch to get a look and I will also get a look at the
>> mesa code to see how easy this fix would be (which parts of the code it
>> might affect) and if everyone agrees that this is a good idea I will
>> modify this patch according to your suggestions.
>>
>> BR :)
>> Eleni
> 
> Hi Eleni,
> 
> I gave this more thought and am now thinking that what you have here is
> fine. Having two different ways of working with a shadow miptree
> suggests a refactor later on, but IMO this is ultimately a step in the
> right direction. Sorry for the noise.
> 
> With code-sharing among shadow miptrees in mind, my two main
> suggestions are 1) to perform mapping operations only with the cmt (if
> it's present) and 2) to update the decompressed mt, on demand. Maybe
> with intel_miptree_copy_slice_sw?
> 
> Regards,
> Nanley
> 

Hi Nanley,

I talked to you on IRC but I reply here as well:

Thank you for the suggestions, I had misunderstood something from our
IRC conversation that followed this e-mail, so the patch v6 has several
issues. I will send a new one soon and I will implement the solution you
suggested earlier (suggestions 1-6) instead. Sorry for the noise with
the patch v6.

Thanks,
Eleni



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Allow AMD_perfmon on GLES contexts

2018-07-18 Thread Rob Clark
From: Eric Anholt 

Reviewed-by: Rob Clark 
---
Not sure if this ever got sent to list.. but this extension is meant to
be exposed in GLES as well as GL

 .../glapi/gen/AMD_performance_monitor.xml | 22 +--
 src/mesa/main/extensions_table.h  |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mapi/glapi/gen/AMD_performance_monitor.xml 
b/src/mapi/glapi/gen/AMD_performance_monitor.xml
index b29dc5d9036..785ea076c6f 100644
--- a/src/mapi/glapi/gen/AMD_performance_monitor.xml
+++ b/src/mapi/glapi/gen/AMD_performance_monitor.xml
@@ -5,13 +5,13 @@
 
 
 
-
+
 
 
 
 
 
-
+
 
 
 
@@ -19,14 +19,14 @@
 
 
 
-
+
 
 
 
 
 
 
-
+
 
 
 
@@ -34,24 +34,24 @@
 
 
 
-
+
 
 
 
 
 
 
-
+
 
 
 
 
-
+
 
 
 
 
-
+
 
 
 
@@ -59,15 +59,15 @@
 
 
 
-
+
 
 
 
-
+
 
 
 
-
+
 
 
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index e24287b8581..af1ae73473d 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -10,7 +10,7 @@ EXT(3DFX_texture_compression_FXT1   , 
TDFX_texture_compression_FXT1
 
 EXT(AMD_conservative_depth  , ARB_conservative_depth   
  , GLL, GLC,  x ,  x , 2009)
 EXT(AMD_draw_buffers_blend  , ARB_draw_buffers_blend   
  , GLL, GLC,  x ,  x , 2009)
-EXT(AMD_performance_monitor , AMD_performance_monitor  
  , GLL, GLC,  x ,  x , 2007)
+EXT(AMD_performance_monitor , AMD_performance_monitor  
  , GLL, GLC,  x ,  ES2 , 2007)
 EXT(AMD_pinned_memory   , AMD_pinned_memory
  , GLL, GLC,  x ,  x , 2013)
 EXT(AMD_seamless_cubemap_per_texture, AMD_seamless_cubemap_per_texture 
  , GLL, GLC,  x ,  x , 2009)
 EXT(AMD_shader_stencil_export   , ARB_shader_stencil_export
  , GLL, GLC,  x ,  x , 2009)
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/4] Android kms_swrast support

2018-07-18 Thread Robert Foss

Hey Rob,

On 2018-07-18 15:30, Rob Herring wrote:

On Tue, Jul 17, 2018 at 4:33 AM Robert Foss  wrote:


This series implements kms_swrast support for the Android
platform. And since having to debug a null pointer dereference,
simplify that process for the next guy.


So is this working for you now?


I'm seeing page-flips happen in the logs, but have no graphical output on the 
Qemu-based setup I'm using now.


When using virgl I'm seeing the same page-flipping in the logs, but no graphical 
output.





As it stands now, any kernel must have the following ioctls flagged with
DRM_RENDER_ALLOW[1], which isn't the case in the mainline kernel.

DRM_IOCTL_MODE_CREATE_DUMB
DRM_IOCTL_MODE_MAP_DUMB


Ah, sorry. I should have mentioned this. We have discussed this issue
in the past, but to no further conclusion.

But as I recall, I thought the issue was also allowing import and
export of dumb buffers?


Yeah, it's a two-parter for any AOSP Treble build.
1) Allow dumb buffer ioctls fom render nodes
2) Support moving buffers across processes.




While it would be possible to open a non-render node to pass the
authentication check, this would still cause authentication issues
when the /dev/dri/cardX node needs to be opened as master by both mesa
and the compositor.


Right. We've pretty much stripped the support that was there out. Plus
I don't think it will work with Treble.


I don't know how acceptable this series is for upstreaming, while relying on
a non-mainline kernel. I think the policy is to not accept changes that
don't have both a user and kernel space solution in place.

Like I noted yesterday[2] the alternative to using dumb buffers and having
authentication issues is using VGEM, which is new territory to me, and it would
take me a little bit of time to figure exactly how it fits into the current
kms_swrast approach.
Input, like noted before, is very much welcome.


I'm very much in favor of the former approach. VGEM seems like an
overly complicated solution when there's a very simple solution.



The former solution being what we have now, dumb buffers?
I don't think dumb buffers are a viable path due to 2) listed above.

If there are any other options I'm not aware of, I'm very much listening.


Rob.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: MESA_framebuffer_flip_y extension [v3]

2018-07-18 Thread Fritz Koenig
On Wed, Jul 11, 2018 at 3:54 PM Chad Versace  wrote:
>
> +Ken, I had a question about GLboolean. I call you by name in the
> comments below.
>
> On Fri 29 Jun 2018, Fritz Koenig wrote:
> > Adds an extension to glFramebufferParameteri
> > that will specify if the framebuffer is vertically
> > flipped. Historically system framebuffers are
> > vertically flipped and user framebuffers are not.
> > Checking to see the state was done by looking at
> > the name field.  This adds an explicit field.
> >
> > v2:
> > * updated spec language [for chadv]
> > * correctly specifying ES 3.1 [for chadv]
> > * refactor access to rb->Name [for jason]
> > * handle GetFramebufferParameteriv [for chadv]
> > v3:
> > * correct _mesa_GetMultisamplefv [for kusmabite]
> > ---
>
> >  docs/specs/MESA_framebuffer_flip_y.spec| 84 ++
>
> Use file extension '.txt'. Khronos no longer uses the '.spec' extension.
>
> File docs/specs/enums.txt needs an update too.
>
> >  include/GLES2/gl2ext.h |  5 ++
> >  src/mapi/glapi/registry/gl.xml |  6 ++
> >  src/mesa/drivers/dri/i915/intel_fbo.c  |  7 +-
> >  src/mesa/drivers/dri/i965/intel_fbo.c  |  7 +-
> >  src/mesa/drivers/dri/nouveau/nouveau_fbo.c |  7 +-
> >  src/mesa/drivers/dri/radeon/radeon_fbo.c   |  7 +-
> >  src/mesa/drivers/dri/radeon/radeon_span.c  |  9 ++-
> >  src/mesa/drivers/dri/swrast/swrast.c   |  7 +-
> >  src/mesa/drivers/osmesa/osmesa.c   |  5 +-
> >  src/mesa/drivers/x11/xm_buffer.c   |  3 +-
> >  src/mesa/drivers/x11/xmesaP.h  |  3 +-
> >  src/mesa/main/accum.c  | 17 +++--
> >  src/mesa/main/dd.h |  3 +-
> >  src/mesa/main/extensions_table.h   |  1 +
> >  src/mesa/main/fbobject.c   | 18 -
> >  src/mesa/main/framebuffer.c|  1 +
> >  src/mesa/main/glheader.h   |  3 +
> >  src/mesa/main/mtypes.h |  3 +
> >  src/mesa/main/readpix.c| 20 +++---
> >  src/mesa/state_tracker/st_cb_fbo.c |  7 +-
> >  src/mesa/swrast/s_blit.c   | 17 +++--
> >  src/mesa/swrast/s_clear.c  |  3 +-
> >  src/mesa/swrast/s_copypix.c| 11 +--
> >  src/mesa/swrast/s_depth.c  |  6 +-
> >  src/mesa/swrast/s_drawpix.c| 26 ---
> >  src/mesa/swrast/s_renderbuffer.c   |  6 +-
> >  src/mesa/swrast/s_renderbuffer.h   |  3 +-
> >  src/mesa/swrast/s_stencil.c|  3 +-
> >  29 files changed, 241 insertions(+), 57 deletions(-)
> >  create mode 100644 docs/specs/MESA_framebuffer_flip_y.spec
> >
> > diff --git a/docs/specs/MESA_framebuffer_flip_y.spec 
> > b/docs/specs/MESA_framebuffer_flip_y.spec
> > new file mode 100644
> > index 00..dca77a9541
> > --- /dev/null
> > +++ b/docs/specs/MESA_framebuffer_flip_y.spec
> > @@ -0,0 +1,84 @@
> > +Name
> > +
> > +MESA_framebuffer_flip_y
> > +
> > +Name Strings
> > +
> > +GL_MESA_framebuffer_flip_y
> > +
> > +Contact
> > +
> > +Fritz Koenig 
> > +
> > +Contributors
> > +
> > +Fritz Koenig, Google
> > +Kristian Høgsberg, Google
> > +Chad Versace, Google
> > +
> > +Status
> > +
> > +Proposal
> > +
> > +Version
> > +
> > +Version 1, June 7, 2018
> > +
> > +Number
> > +
> > +TBD
> > +
> > +Dependencies
> > +
> > +OpenGL ES 3.1 is required, for FramebufferParameteri.
> > +
> > +Overview
> > +
> > +Rendered buffers are normally returned right side up, as accessed
> > +top to bottom.  This extension allows those buffers to be upside down
> > +when accessed top to bottom.
> > +
> > +This extension defines a new framebuffer parameter,
> > +GL_FRAMEBUFFER_FLIP_Y_MESA, that changes the behavior of the reads and
> > +writes to the framebuffer attachment points. When 
> > GL_FRAMEBUFFER_FLIP_Y_MESA
> > +is GL_TRUE, render commands and pixel transfer operations access the
> > +backing store of each attachment point with an y-inverted coordinate
> > +system. This y-inversion is relative to the coordinate system set when
> > +GL_FRAMEBUFFER_FLIP_Y_MESA is GL_FALSE.
> > +
> > +Access through TexSubImage2D and similar calls will notice the effect 
> > of
> > +the flip when they are not attached to framebuffer objects because
> > +GL_FRAMEBUFFER_FLIP_Y_MESA is associated with the framebuffer object 
> > and
> > +not the attachment points.
> > +
> > +IP Status
> > +
> > +None
> > +
> > +Issues
> > +
> > +None
> > +
> > +New Procedures and Functions
> > +
> > +None
> > +
> > +New Types
> > +
> > +None
> > +
> > +New Tokens
> > +
> > +Accepted by the  argument of FramebufferParameteri and
> > +GetFramebufferParameteriv:
> > +
> > +GL_FRAMEBUFFER_FLIP_Y_MESA  0x8BBB
> > +
> > +Errors
> > +GL_INVALID_OPERATION is returned from  GetFramebufferParameteriv if 
> > this
> > +is called on a winsys f

Re: [Mesa-dev] [PATCH 1/4] intel/dump_gpu: Fix corner cases in PPGTT range calculations

2018-07-18 Thread Jason Ekstrand
On Wed, Jul 18, 2018 at 3:08 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> On 18/07/18 00:05, Jason Ekstrand wrote:
> > For large buffers which span an entire l1 page table, we got the range
> > calculations wrong.  In this case, we end up with an l1_start which is
> > the first byte represented by the given l1 table and an l1_end which is
> > the first byte after the range represented by the l1 table.  Then
> > l2_start_index == L2_index(l2_end) due to roll-over.  Instead, compute
> > lN_end using (1Ull << shift) - 1 so that lN_end is the last byte in the
> > range represented by the Nth level page table.  When we do this, we
> > don't need the conditional expression anymore.
> > ---
> >   src/intel/tools/intel_dump_gpu.c | 12 ++--
> >   1 file changed, 6 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/intel/tools/intel_dump_gpu.c
> b/src/intel/tools/intel_dump_gpu.c
> > index 766ba662d91..5a0283960c1 100644
> > --- a/src/intel/tools/intel_dump_gpu.c
> > +++ b/src/intel/tools/intel_dump_gpu.c
> > @@ -457,28 +457,28 @@ map_ppgtt(uint64_t start, uint64_t size)
> >
> >  for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {
> > uint64_t l3_start = max(l4, start & 0xc000);
> > -  uint64_t l3_end = min(l4 + (1ULL << 39),
> > +  uint64_t l3_end = min(l4 + (1ULL << 39) - 1,
> >   ((start + size - 1) | 0x3fff) &
> 0x);
> > uint64_t l3_start_idx = L3_index(l3_start);
> > -  uint64_t l3_end_idx = L3_index(l3_start) >= l3_start_idx ?
> L3_index(l3_end) : 0x1ff;
> > +  uint64_t l3_end_idx = L3_index(l3_start);
>
> uint64_t l3_end_idx = L3_index(l3_end);
>

Right.  Fixed locally.  Review?


> >
> > populate_ppgtt_table(L3_table(l4), l3_start_idx, l3_end_idx, 3);
> >
> > for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL << 30)) {
> >uint64_t l2_start = max(l3, start & 0xffe0);
> > - uint64_t l2_end = min(l3 + (1ULL << 30),
> > + uint64_t l2_end = min(l3 + (1ULL << 30) - 1,
> >  ((start + size - 1) | 0x001f) &
> 0x);
> >uint64_t l2_start_idx = L2_index(l2_start);
> > - uint64_t l2_end_idx = L2_index(l2_end) >= l2_start_idx ?
> L2_index(l2_end) : 0x1ff;
> > + uint64_t l2_end_idx = L2_index(l2_end);
> >
> >populate_ppgtt_table(L2_table(l3), l2_start_idx, l2_end_idx,
> 2);
> >
> >for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL << 21))
> {
> >   uint64_t l1_start = max(l2, start & 0xf000);
> > -uint64_t l1_end = min(l2 + (1ULL << 21),
> > +uint64_t l1_end = min(l2 + (1ULL << 21) - 1,
> > ((start + size - 1) |
> 0x0fff) & 0x);
> >   uint64_t l1_start_idx = L1_index(l1_start);
> > -uint64_t l1_end_idx = L1_index(l1_end) >= l1_start_idx ?
> L1_index(l1_end) : 0x1ff;
> > +uint64_t l1_end_idx = L1_index(l1_end);
> >
> >   populate_ppgtt_table(L1_table(l2), l1_start_idx,
> l1_end_idx, 1);
> >}
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Allow AMD_perfmon on GLES contexts

2018-07-18 Thread Ilia Mirkin
On Wed, Jul 18, 2018 at 11:01 AM, Rob Clark  wrote:
> From: Eric Anholt 
>
> Reviewed-by: Rob Clark 
> ---
> Not sure if this ever got sent to list.. but this extension is meant to
> be exposed in GLES as well as GL
>
>  .../glapi/gen/AMD_performance_monitor.xml | 22 +--
>  src/mesa/main/extensions_table.h  |  2 +-
>  2 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/src/mapi/glapi/gen/AMD_performance_monitor.xml 
> b/src/mapi/glapi/gen/AMD_performance_monitor.xml
> index b29dc5d9036..785ea076c6f 100644
> --- a/src/mapi/glapi/gen/AMD_performance_monitor.xml
> +++ b/src/mapi/glapi/gen/AMD_performance_monitor.xml
> @@ -5,13 +5,13 @@
>
>  
>
> -
> +
>  
>  
>  
>  
>
> -
> +
>  
>  
>  
> @@ -19,14 +19,14 @@
>  
>  
>
> -
> +
>  
>  
>  
>  
>  
>
> -
> +
>  
>  
>  
> @@ -34,24 +34,24 @@
>  
>  
>
> -
> +
>  
>  
>  
>  
>  
>
> -
> +
>  
>  
>  
>
> -
> +
>  
>  
>  
>
> -
> +
>  
>  
>  
> @@ -59,15 +59,15 @@
>  
>  
>
> -
> +
>  
>  
>
> -
> +
>  
>  
>
> -
> +
>  
>  
>  
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index e24287b8581..af1ae73473d 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -10,7 +10,7 @@ EXT(3DFX_texture_compression_FXT1   , 
> TDFX_texture_compression_FXT1
>
>  EXT(AMD_conservative_depth  , ARB_conservative_depth 
> , GLL, GLC,  x ,  x , 2009)
>  EXT(AMD_draw_buffers_blend  , ARB_draw_buffers_blend 
> , GLL, GLC,  x ,  x , 2009)
> -EXT(AMD_performance_monitor , AMD_performance_monitor
> , GLL, GLC,  x ,  x , 2007)
> +EXT(AMD_performance_monitor , AMD_performance_monitor
> , GLL, GLC,  x ,  ES2 , 2007)

Please keep this aligned, as all the other entries are...

>  EXT(AMD_pinned_memory   , AMD_pinned_memory  
> , GLL, GLC,  x ,  x , 2013)
>  EXT(AMD_seamless_cubemap_per_texture, 
> AMD_seamless_cubemap_per_texture   , GLL, GLC,  x ,  x , 2009)
>  EXT(AMD_shader_stencil_export   , ARB_shader_stencil_export  
> , GLL, GLC,  x ,  x , 2009)
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] intel/dump_gpu: Fix corner cases in PPGTT range calculations

2018-07-18 Thread Lionel Landwerlin

Thanks,

Reviewed-by: Lionel Landwerlin 

On 18/07/18 16:24, Jason Ekstrand wrote:
On Wed, Jul 18, 2018 at 3:08 AM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


On 18/07/18 00:05, Jason Ekstrand wrote:
> For large buffers which span an entire l1 page table, we got the
range
> calculations wrong.  In this case, we end up with an l1_start
which is
> the first byte represented by the given l1 table and an l1_end
which is
> the first byte after the range represented by the l1 table.  Then
> l2_start_index == L2_index(l2_end) due to roll-over. Instead,
compute
> lN_end using (1Ull << shift) - 1 so that lN_end is the last byte
in the
> range represented by the Nth level page table.  When we do this, we
> don't need the conditional expression anymore.
> ---
>   src/intel/tools/intel_dump_gpu.c | 12 ++--
>   1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/tools/intel_dump_gpu.c
b/src/intel/tools/intel_dump_gpu.c
> index 766ba662d91..5a0283960c1 100644
> --- a/src/intel/tools/intel_dump_gpu.c
> +++ b/src/intel/tools/intel_dump_gpu.c
> @@ -457,28 +457,28 @@ map_ppgtt(uint64_t start, uint64_t size)
>
>      for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {
>         uint64_t l3_start = max(l4, start & 0xc000);
> -      uint64_t l3_end = min(l4 + (1ULL << 39),
> +      uint64_t l3_end = min(l4 + (1ULL << 39) - 1,
>                               ((start + size - 1) |
0x3fff) & 0x);
>         uint64_t l3_start_idx = L3_index(l3_start);
> -      uint64_t l3_end_idx = L3_index(l3_start) >= l3_start_idx
? L3_index(l3_end) : 0x1ff;
> +      uint64_t l3_end_idx = L3_index(l3_start);

uint64_t l3_end_idx = L3_index(l3_end);


Right.  Fixed locally.  Review?

>
>         populate_ppgtt_table(L3_table(l4), l3_start_idx,
l3_end_idx, 3);
>
>         for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL <<
30)) {
>            uint64_t l2_start = max(l3, start & 0xffe0);
> -         uint64_t l2_end = min(l3 + (1ULL << 30),
> +         uint64_t l2_end = min(l3 + (1ULL << 30) - 1,
>                                  ((start + size - 1) |
0x001f) & 0x);
>            uint64_t l2_start_idx = L2_index(l2_start);
> -         uint64_t l2_end_idx = L2_index(l2_end) >= l2_start_idx
? L2_index(l2_end) : 0x1ff;
> +         uint64_t l2_end_idx = L2_index(l2_end);
>
>            populate_ppgtt_table(L2_table(l3), l2_start_idx,
l2_end_idx, 2);
>
>            for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL
<< 21)) {
>               uint64_t l1_start = max(l2, start & 0xf000);
> -            uint64_t l1_end = min(l2 + (1ULL << 21),
> +            uint64_t l1_end = min(l2 + (1ULL << 21) - 1,
>                                     ((start + size - 1) |
0x0fff) & 0x);
>               uint64_t l1_start_idx = L1_index(l1_start);
> -            uint64_t l1_end_idx = L1_index(l1_end) >=
l1_start_idx ? L1_index(l1_end) : 0x1ff;
> +            uint64_t l1_end_idx = L1_index(l1_end);
>
>               populate_ppgtt_table(L1_table(l2), l1_start_idx,
l1_end_idx, 1);
>            }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: MESA_framebuffer_flip_y extension [v3]

2018-07-18 Thread Chad Versace
On Wed 11 Jul 2018, Chad Versace wrote:
> +Ken, I had a question about GLboolean. I call you by name in the
> comments below.
>
> On Fri 29 Jun 2018, Fritz Koenig wrote:
> > Adds an extension to glFramebufferParameteri
> > that will specify if the framebuffer is vertically
> > flipped. Historically system framebuffers are
> > vertically flipped and user framebuffers are not.
> > Checking to see the state was done by looking at
> > the name field.  This adds an explicit field.
> >
> > v2:
> > * updated spec language [for chadv]
> > * correctly specifying ES 3.1 [for chadv]
> > * refactor access to rb->Name [for jason]
> > * handle GetFramebufferParameteriv [for chadv]
> > v3:
> > * correct _mesa_GetMultisamplefv [for kusmabite]
> > ---
>
> >  docs/specs/MESA_framebuffer_flip_y.spec| 84 ++

[snip]

> > diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c 
> > b/src/mesa/drivers/dri/i915/intel_fbo.c
> > index 827a77f722..31b65fb53b 100644
> > --- a/src/mesa/drivers/dri/i915/intel_fbo.c
> > +++ b/src/mesa/drivers/dri/i915/intel_fbo.c
> > @@ -86,7 +86,8 @@ intel_map_renderbuffer(struct gl_context *ctx,
> >GLuint x, GLuint y, GLuint w, GLuint h,
> >GLbitfield mode,
> >GLubyte **out_map,
> > -  GLint *out_stride)
> > +  GLint *out_stride,
> > +  GLboolean inverted_y)

[snip]

> And I believe the internal APIs should use 'bool' instead of
> 'GLboolean'. See commit 786a6472450b50977e6906e27d5f481e00b05d73 .
>
> Ken, should Fritz also use plain 'bool' in struct gl_framebuffer? That
> is, should it be
>
> struct gl_framebuffer {
> ...
> GLboolean FlipY;
> or
> bool FlipY;

I received feedback on #intel-3d about the GLboolean-vs-bool question.

jekstrand | chadv: The extension?  GLboolean.  Internal calls?  bool.
jekstrand | chadv: GLboolean should die!
   Kayden | chadv: definitely bool, GLboolean is evil
   Kayden | anything not touching the GL API explicitly should be bool
   Kayden | even fields in gl_context should get converted IMO
   anholt | agreed
   Kayden | we've had bugs where somebody returned a pointer as a GLboolean
   Kayden | thinking it would get treated as true/false
   Kayden | and instead it truncated to signed char
   Kayden | so based on the address it would have a random truth value
   Kayden | sorry I neglected to email you back :(
jekstrand | unless you're on a 64-bit system and it just happens to be aligned
  | correctly...

Based on their emphatic feedback, definitely use bool in all driver internal 
APIs.
And you should probably use bool in struct framebuffer too.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] intel/blorp: Add a blorp_filter enum for use in blorp_blit

2018-07-18 Thread Chad Versace
On Thu 12 Jul 2018, Jason Ekstrand wrote:
> On Wed, Jun 27, 2018 at 5:55 PM Chad Versace <[1]chadvers...@chromium.org>
> wrote:
> 
> On Tue 26 Jun 2018, Jason Ekstrand wrote:
> > At the moment, this is entirely internal but we'll expose it to clients
> > of the BLORP API in the next commit.
> > ---
> >  src/intel/blorp/blorp.h      |   8 ++
> >  src/intel/blorp/blorp_blit.c | 212 +++
> >  src/intel/blorp/blorp_priv.h |  12 +-
> >  3 files changed, 123 insertions(+), 109 deletions(-)
> 
> Yup, I still read this list.
> 
> 
> \o/
>  
> 
> This patch makes the code easier to reason about. I like it.
> 
> [snip]
> 
> > +   case BLORP_FILTER_BILINEAR:
> > +      assert(!key->src_tiled_w);
> > +      assert(key->tex_samples == key->src_samples);
> > +      assert(key->tex_layout == key->src_layout);
> 
> What guarantees !key->src_tiled_w ? I can't deduce it from the patch.
> 
> 
> That's stencil and you can't do a filtered scaled blit with stencil, only
> nearest.  I believe this is required/checked fairly high up in the GL API 
> area.
>  
> 
> From my understanding of the patch, the patch allows the deduction
> below. What is the missing step to !key->src_tiled_w? Does GL not allow
> GL_LINEAR on stencil buffers? (If it does, though, then GL is dumb).
> 
> 
> Correct.  Which means that !stencil || LINEAR -> !stencil
>  
> 
> (key.filter == BLORP_FILTER_BILINEAR) <-> ((blend && blit_scaled) ||
> bilinear_filter)
>                                        -> (blend || bilinear_filter)
>                                        -> (!(src_surf.usage &
> ISL_SURF_USAGE_STENCIL_BIT) || (gl_filter == GL_LINEAR))
>                                        ?
>                                        -> !stencil
>                                        -> !key->src_tiled_w
> [snip]
> 
> > +   case BLORP_FILTER_AVERAGE:
> > +      assert(!key->src_tiled_w);
> > +      assert(key->tex_samples == key->src_samples);
> > +      assert(key->tex_layout == key->src_layout);
> > +
> 
> I expected to see assert(key->src_samples > 1) in this case.
> Just an observation.
> 
> [snip]
> 
> > +   /* We are downsampling a non-integer color buffer, so blend.
> 
> This phrase is no longer inside an if.  It should say "If we are...,
> then blend.". Or "Blend if we are...".
> 
> 
> I've changed it to "If we are..."

Ok. This patch is
Reviewed-by: Chad Versace 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] intel/tools: Add an error state to aub translator

2018-07-18 Thread Jason Ekstrand
On Wed, Jul 18, 2018 at 7:33 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> Awesome tool! And not even too much code!
>
> Reviewed-by: Lionel Landwerlin 
>

Thanks!


> In the interest of consistency and because this is an installable
> binary, I would rename it.
> Now for the hard part : intel_error2aub ?
>

Done.


> -
> Lionel
>
> On 18/07/18 00:05, Jason Ekstrand wrote:
> > ---
> >   src/intel/tools/error2aub.c | 332 
> >   src/intel/tools/meson.build |  11 ++
> >   2 files changed, 343 insertions(+)
> >   create mode 100644 src/intel/tools/error2aub.c
> >
> > diff --git a/src/intel/tools/error2aub.c b/src/intel/tools/error2aub.c
> > new file mode 100644
> > index 000..ece41d93e6c
> > --- /dev/null
> > +++ b/src/intel/tools/error2aub.c
> > @@ -0,0 +1,332 @@
> > +/*
> > + * Copyright © 2007-2017 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "aub_write.h"
> > +#include "i915_drm.h"
> > +#include "intel_aub.h"
> > +
> > +static void __attribute__ ((format(__printf__, 2, 3)))
> > +fail_if(int cond, const char *format, ...)
> > +{
> > +   va_list args;
> > +
> > +   if (!cond)
> > +  return;
> > +
> > +   va_start(args, format);
> > +   vfprintf(stderr, format, args);
> > +   va_end(args);
> > +
> > +   raise(SIGTRAP);
> > +}
> > +
> > +#define fail(...) fail_if(true, __VA_ARGS__)
> > +
> > +static int zlib_inflate(uint32_t **ptr, int len)
> > +{
> > +   struct z_stream_s zstream;
> > +   void *out;
> > +   const uint32_t out_size = 128*4096;  /* approximate obj size */
> > +
> > +   memset(&zstream, 0, sizeof(zstream));
> > +
> > +   zstream.next_in = (unsigned char *)*ptr;
> > +   zstream.avail_in = 4*len;
> > +
> > +   if (inflateInit(&zstream) != Z_OK)
> > +  return 0;
> > +
> > +   out = malloc(out_size);
> > +   zstream.next_out = out;
> > +   zstream.avail_out = out_size;
> > +
> > +   do {
> > +  switch (inflate(&zstream, Z_SYNC_FLUSH)) {
> > +  case Z_STREAM_END:
> > + goto end;
> > +  case Z_OK:
> > + break;
> > +  default:
> > + inflateEnd(&zstream);
> > + return 0;
> > +  }
> > +
> > +  if (zstream.avail_out)
> > + break;
> > +
> > +  out = realloc(out, 2*zstream.total_out);
> > +  if (out == NULL) {
> > + inflateEnd(&zstream);
> > + return 0;
> > +  }
> > +
> > +  zstream.next_out = (unsigned char *)out + zstream.total_out;
> > +  zstream.avail_out = zstream.total_out;
> > +   } while (1);
> > + end:
> > +   inflateEnd(&zstream);
> > +   free(*ptr);
> > +   *ptr = out;
> > +   return zstream.total_out / 4;
> > +}
> > +
> > +static int ascii85_decode(const char *in, uint32_t **out, bool inflate)
> > +{
> > +   int len = 0, size = 1024;
> > +
> > +   *out = realloc(*out, sizeof(uint32_t)*size);
> > +   if (*out == NULL)
> > +  return 0;
> > +
> > +   while (*in >= '!' && *in <= 'z') {
> > +  uint32_t v = 0;
> > +
> > +  if (len == size) {
> > + size *= 2;
> > + *out = realloc(*out, sizeof(uint32_t)*size);
> > + if (*out == NULL)
> > +return 0;
> > +  }
> > +
> > +  if (*in == 'z') {
> > + in++;
> > +  } else {
> > + v += in[0] - 33; v *= 85;
> > + v += in[1] - 33; v *= 85;
> > + v += in[2] - 33; v *= 85;
> > + v += in[3] - 33; v *= 85;
> > + v += in[4] - 33;
> > + in += 5;
> > +  }
> > +  (*out)[len++] = v;
> > +   }
> > +
> > +   if (!inflate)
> > +  return len;
> > +
> > +   return zlib_inflate(out, len);
> > +}
> > +
> > +static void
> > +print_help(const char *pro

Re: [Mesa-dev] [PATCH] intel: tools: dump: remove command execution feature

2018-07-18 Thread Jason Ekstrand
I'm happy for us to do this and it would have made my life easier when
refactoring the aub_write code.  Unfortunately, since I just landed that,
this will need some rebasing.   Sorry, I didn't realize how you'd intended
things to be ordered before I landed my refactors. :-(

On Wed, Jul 18, 2018 at 7:20 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
> processing option") we removed the ability to process aub as an input
> stream because we're now rely on mmapping the aub file to back the
> buffer aubinator is parsing.
>
> intel_aubdump was the provider of the standard input data and since
> we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
> we don't that code anymore.
>
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/tools/intel_dump_gpu.c  | 73 ---
>  src/intel/tools/intel_dump_gpu.in | 27 +---
>  2 files changed, 10 insertions(+), 90 deletions(-)
>
> diff --git a/src/intel/tools/intel_dump_gpu.c
> b/src/intel/tools/intel_dump_gpu.c
> index 766ba662d91..6758cab13c4 100644
> --- a/src/intel/tools/intel_dump_gpu.c
> +++ b/src/intel/tools/intel_dump_gpu.c
> @@ -254,7 +254,7 @@ static int (*libc_ioctl)(int fd, unsigned long
> request, ...) = ioctl_init_helper
>
>  static int drm_fd = -1;
>  static char *filename = NULL;
> -static FILE *files[2] = { NULL, NULL };
> +static FILE *aub_file = NULL;
>  static struct gen_device_info devinfo = {0};
>  static int verbose = 0;
>  static bool device_override;
> @@ -320,13 +320,8 @@ align_u32(uint32_t v, uint32_t a)
>  static void
>  dword_out(uint32_t data)
>  {
> -   for (int i = 0; i < ARRAY_SIZE (files); i++) {
> -  if (files[i] == NULL)
> - continue;
> -
> -  fail_if(fwrite(&data, 1, 4, files[i]) == 0,
> -  "Writing to output failed\n");
> -   }
> +   fail_if(fwrite(&data, 1, 4, aub_file) == 0,
> +   "Writing to output failed\n");
>  }
>
>  static void
> @@ -335,13 +330,8 @@ data_out(const void *data, size_t size)
> if (size == 0)
>return;
>
> -   for (int i = 0; i < ARRAY_SIZE (files); i++) {
> -  if (files[i] == NULL)
> - continue;
> -
> -  fail_if(fwrite(data, 1, size, files[i]) == 0,
> -  "Writing to output failed\n");
> -   }
> +   fail_if(fwrite(data, 1, size, aub_file) == 0,
> +   "Writing to output failed\n");
>  }
>
>  static uint32_t
> @@ -990,10 +980,7 @@ dump_execbuffer2(int fd, struct
> drm_i915_gem_execbuffer2 *execbuffer2)
>ring_flag);
> }
>
> -   for (int i = 0; i < ARRAY_SIZE(files); i++) {
> -  if (files[i] != NULL)
> - fflush(files[i]);
> -   }
> +   fflush(aub_file);
>
> if (device_override &&
> (execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
> @@ -1044,40 +1031,6 @@ close(int fd)
> return libc_close(fd);
>  }
>
> -static FILE *
> -launch_command(char *command)
> -{
> -   int i = 0, fds[2];
> -   char **args = calloc(strlen(command), sizeof(char *));
> -   char *iter = command;
> -
> -   args[i++] = iter = command;
> -
> -   while ((iter = strstr(iter, ",")) != NULL) {
> -  *iter = '\0';
> -  iter += 1;
> -  args[i++] = iter;
> -   }
> -
> -   if (pipe(fds) == -1)
> -  return NULL;
> -
> -   switch (fork()) {
> -   case 0:
> -  dup2(fds[0], 0);
> -  fail_if(execvp(args[0], args) == -1,
> -  "intel_aubdump: failed to launch child command\n");
> -  return NULL;
> -
> -   default:
> -  free(args);
> -  return fdopen(fds[1], "w");
> -
> -   case -1:
> -  return NULL;
> -   }
> -}
> -
>  static void
>  maybe_init(void)
>  {
> @@ -1105,15 +1058,10 @@ maybe_init(void)
>   device_override = true;
>} else if (!strcmp(key, "file")) {
>   filename = strdup(value);
> - files[0] = fopen(filename, "w+");
> - fail_if(files[0] == NULL,
> + aub_file = fopen(filename, "w+");
> + fail_if(aub_file == NULL,
>   "intel_aubdump: failed to open file '%s'\n",
>   filename);
> -  } else if (!strcmp(key,  "command")) {
> - files[1] = launch_command(value);
> - fail_if(files[1] == NULL,
> - "intel_aubdump: failed to launch command '%s'\n",
> - value);
>} else {
>   fprintf(stderr, "intel_aubdump: unknown option '%s'\n", key);
>}
> @@ -1285,9 +1233,6 @@ static void __attribute__ ((destructor))
>  fini(void)
>  {
> free(filename);
> -   for (int i = 0; i < ARRAY_SIZE(files); i++) {
> -  if (files[i] != NULL)
> - fclose(files[i]);
> -   }
> +   fclose(aub_file);
> free(bos);
>  }
> diff --git a/src/intel/tools/intel_dump_gpu.in b/src/intel/tools/
> intel_dump_gpu.in
> index b9887f0ed2e..9eea37189db 100755
> --- a/src/intel/tools/intel_dump_gpu.in
> +++ b/src/intel/tools/intel_dump_gpu.in
> @@ -10,9 +10,6 @@ contents and execution of the GEM application.

Re: [Mesa-dev] [PATCH] intel: tools: dump: remove command execution feature

2018-07-18 Thread Lionel Landwerlin

No worries, I should have removed it when I included the tool.

Sending a v2.

On 18/07/18 17:13, Jason Ekstrand wrote:
I'm happy for us to do this and it would have made my life easier when 
refactoring the aub_write code. Unfortunately, since I just landed 
that, this will need some rebasing.   Sorry, I didn't realize how 
you'd intended things to be ordered before I landed my refactors. :-(


On Wed, Jul 18, 2018 at 7:20 AM Lionel Landwerlin 
mailto:lionel.g.landwer...@intel.com>> 
wrote:


In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffer aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't that code anymore.

Signed-off-by: Lionel Landwerlin mailto:lionel.g.landwer...@intel.com>>
---
 src/intel/tools/intel_dump_gpu.c  | 73
---
 src/intel/tools/intel_dump_gpu.in  | 27
+---
 2 files changed, 10 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c
b/src/intel/tools/intel_dump_gpu.c
index 766ba662d91..6758cab13c4 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -254,7 +254,7 @@ static int (*libc_ioctl)(int fd, unsigned long
request, ...) = ioctl_init_helper

 static int drm_fd = -1;
 static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static FILE *aub_file = NULL;
 static struct gen_device_info devinfo = {0};
 static int verbose = 0;
 static bool device_override;
@@ -320,13 +320,8 @@ align_u32(uint32_t v, uint32_t a)
 static void
 dword_out(uint32_t data)
 {
-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-      if (files[i] == NULL)
-         continue;
-
-      fail_if(fwrite(&data, 1, 4, files[i]) == 0,
-              "Writing to output failed\n");
-   }
+   fail_if(fwrite(&data, 1, 4, aub_file) == 0,
+           "Writing to output failed\n");
 }

 static void
@@ -335,13 +330,8 @@ data_out(const void *data, size_t size)
    if (size == 0)
       return;

-   for (int i = 0; i < ARRAY_SIZE (files); i++) {
-      if (files[i] == NULL)
-         continue;
-
-      fail_if(fwrite(data, 1, size, files[i]) == 0,
-              "Writing to output failed\n");
-   }
+   fail_if(fwrite(data, 1, size, aub_file) == 0,
+           "Writing to output failed\n");
 }

 static uint32_t
@@ -990,10 +980,7 @@ dump_execbuffer2(int fd, struct
drm_i915_gem_execbuffer2 *execbuffer2)
                           ring_flag);
    }

-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-      if (files[i] != NULL)
-         fflush(files[i]);
-   }
+   fflush(aub_file);

    if (device_override &&
        (execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
@@ -1044,40 +1031,6 @@ close(int fd)
    return libc_close(fd);
 }

-static FILE *
-launch_command(char *command)
-{
-   int i = 0, fds[2];
-   char **args = calloc(strlen(command), sizeof(char *));
-   char *iter = command;
-
-   args[i++] = iter = command;
-
-   while ((iter = strstr(iter, ",")) != NULL) {
-      *iter = '\0';
-      iter += 1;
-      args[i++] = iter;
-   }
-
-   if (pipe(fds) == -1)
-      return NULL;
-
-   switch (fork()) {
-   case 0:
-      dup2(fds[0], 0);
-      fail_if(execvp(args[0], args) == -1,
-              "intel_aubdump: failed to launch child command\n");
-      return NULL;
-
-   default:
-      free(args);
-      return fdopen(fds[1], "w");
-
-   case -1:
-      return NULL;
-   }
-}
-
 static void
 maybe_init(void)
 {
@@ -1105,15 +1058,10 @@ maybe_init(void)
          device_override = true;
       } else if (!strcmp(key, "file")) {
          filename = strdup(value);
-         files[0] = fopen(filename, "w+");
-         fail_if(files[0] == NULL,
+         aub_file = fopen(filename, "w+");
+         fail_if(aub_file == NULL,
                  "intel_aubdump: failed to open file '%s'\n",
                  filename);
-      } else if (!strcmp(key,  "command")) {
-         files[1] = launch_command(value);
-         fail_if(files[1] == NULL,
-                 "intel_aubdump: failed to launch command '%s'\n",
-                 value);
       } else {
          fprintf(stderr, "intel_aubdump: unknown option '%s'\n",
key);
       }
@@ -1285,9 +1233,6 @@ static void __attribute__ ((destructor))
 fini(void)
 {
    free(f

Re: [Mesa-dev] [PATCH v5] i965: Fix ETC2/EAC GetCompressed* functions on Gen7 GPUs

2018-07-18 Thread Nanley Chery
On Wed, Jul 18, 2018 at 05:34:13PM +0300, Eleni Maria Stea wrote:
> On 07/10/2018 03:10 AM, Nanley Chery wrote:
> > On Thu, Jun 14, 2018 at 10:50:57PM +0300, Eleni Maria Stea wrote:
> >> On 06/14/2018 10:27 PM, Nanley Chery wrote:
> >>
> >>> +Jason, Ken
> >>>
> >>> Hello,
> >>>
> >>> I recently did some miptree work relating to the r8stencil_mt and I
> >>> think I now have a more informed opinion about how things should be
> >>> structured. I'd like to propose an alternative solution.
> >>>
> >>> I had initially thought we should have a separate miptree to hold the
> >>> compressed data, like this patch does, but now I think we should
> >>> actually have the compressed data be the main miptree and to store the
> >>> decompressed miptree as part of the main one. The reasoning is that we
> >>> could reuse this structure to handle the r8stencil workaround and to
> >>> eventually handle the ASTC_LDR surfaces that are modified on gen9.
> >>>
> >>> I'm proposing something like the following:
> >>>
> >>> 1. Rename r8stencil_mt ->shadow_mt and
> >>>r8stencil_needs_update -> shadow_needs_update.
> >>> 2. Make shadow_mt hold the decompressed ETC miptree
> >>> 3. Update shadow_needs_update whenever the main mt is modified
> >>> 4. Add an function to update the shadow_mt using the main mt as a source
> >>> 5. Sample from the shadow_mt as appropriate
> >>> 6. Make the main miptree hold the compressed data
> >>>
> >>> This method should also be able to handle the CopyImage functions. What
> >>> do you all think?
> >>>
> >>> -Nanley
> >>
> >> Hi Nanley,
> >>
> >> Thank you for your reply. I wasn't aware that there are other cases we
> >> might need to store a 2nd image. I agree that it's more reasonable to
> >> use one generic purpose miptree that can be accessible from different
> >> parts of the i965 code for such cases instead of storing miptrees in
> >> different places for different hacks when a feature is not supported.
> >>
> >> I will search your patch to get a look and I will also get a look at the
> >> mesa code to see how easy this fix would be (which parts of the code it
> >> might affect) and if everyone agrees that this is a good idea I will
> >> modify this patch according to your suggestions.
> >>
> >> BR :)
> >> Eleni
> > 
> > Hi Eleni,
> > 
> > I gave this more thought and am now thinking that what you have here is
> > fine. Having two different ways of working with a shadow miptree
> > suggests a refactor later on, but IMO this is ultimately a step in the
> > right direction. Sorry for the noise.
> > 
> > With code-sharing among shadow miptrees in mind, my two main
> > suggestions are 1) to perform mapping operations only with the cmt (if
> > it's present) and 2) to update the decompressed mt, on demand. Maybe
> > with intel_miptree_copy_slice_sw?
> > 
> > Regards,
> > Nanley
> > 
> 
> Hi Nanley,
> 
> I talked to you on IRC but I reply here as well:
> 
> Thank you for the suggestions, I had misunderstood something from our
> IRC conversation that followed this e-mail, so the patch v6 has several
> issues. I will send a new one soon and I will implement the solution you
> suggested earlier (suggestions 1-6) instead. Sorry for the noise with
> the patch v6.
> 

Sounds good. By the way, I think it'd be helpful if you sent out the
solution as a series of patches (see git format-patch - for example).
That way it's easier to confirm each step of the solution is correct.

-Nanley

> Thanks,
> Eleni
> 
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107276] radv: OpBitfieldUExtract returns incorrect result when count is zero

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107276

soredake  changed:

   What|Removed |Added

 CC||fds...@krutt.org

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 3/3] r600: Correct evaluation of cube array index and face

2018-07-18 Thread Roland Scheidegger
Am 17.07.2018 um 19:04 schrieb Gert Wollny:
> From: Gert Wollny 
> 
> The array index needs to be corrected and it must be insured that it is
> rounded and its value is non-negative before it is combined with the
> face id.
> 
> v5: Use RNDNE instead of ADD 0.5 and FLOOR (Ilia Mirkin)
> 
> Fixes 182 from android/cts/master/gles31-master.txt:
>   dEQP-GLES31.functional.texture.filtering.cube_array.formats.*
>   dEQP-GLES31.functional.texture.filtering.cube_array.sizes.*
>   
> dEQP-GLES31.functional.texture.filtering.cube_array.combinations.nearest_mipmap_*
>   
> dEQP-GLES31.functional.texture.filtering.cube_array.combinations.linear_mipmap_*
>   dEQP-GLES31.functional.texture.filtering.cube_array.no_edges_visible.*
> 
> Signed-off-by: Gert Wollny 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 34 
> +-
>  1 file changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 15e35f006c..f170b30aee 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -7720,11 +7720,43 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>   if (r)
>   return r;
>  
> + /* Evaluate the array index according to 
> floor(idx + 0.5). This
> +  * needs to be done before merging the face 
> select value, because
> +  * otherwise the fractional part of the array 
> indes will interfere
index

> +  * with the face select value */
> + memset(&alu, 0, sizeof(struct 
> r600_bytecode_alu));
> + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
> + alu.op = ALU_OP1_RNDNE;
> + alu.dst.sel = ctx->temp_reg;
> + alu.dst.chan = 3;
> + alu.dst.write = 1;
> + alu.last = 1;
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> +
> + /* Because the array slice index and the cube 
> face index are merged
> +  * into one value we have to make sure the 
> array slice index is >= 0,
> +  * otherwise the face selection will fail */
> + memset(&alu, 0, sizeof(struct 
> r600_bytecode_alu));
> + alu.op = ALU_OP2_MAX;
> + alu.src[0].sel = ctx->temp_reg;
> + alu.src[0].chan = 3;
> + alu.src[1].sel = V_SQ_ALU_SRC_0;
> + alu.dst.sel = ctx->temp_reg;
> + alu.dst.chan = 3;
> + alu.dst.write = 1;
> + alu.last = 1;
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> +
Kind of lame that we have to do shader workarounds for a corner case
(who in their right mind relies on negative array indices getting
clamped to 0...), and worse it's even all dependent alu instructions,
but I don't see another way.

For the series:
Reviewed-by: Roland Scheidegger 

/* have to multiply original layer by 8 and add 
to face id (temp.w)
in Z */
>   memset(&alu, 0, sizeof(struct 
> r600_bytecode_alu));
>   alu.op = ALU_OP3_MULADD;
>   alu.is_op3 = 1;
> - r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
> + alu.src[0].sel = ctx->temp_reg;
> + alu.src[0].chan = 3;
>   alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
>   alu.src[1].chan = 0;
>   alu.src[1].value = u_bitcast_f2u(8.0f);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] libGLw: Use newly introduced GLAPIVAR for variables

2018-07-18 Thread Brian Paul

On 07/17/2018 06:47 PM, Stefan Dirsch wrote:

On Tue, Jul 17, 2018 at 04:57:26PM -0600, Brian Paul wrote:

Reviewed-by: Brian Paul 

Do you need me to push this for you?


I'm afraid the answer is yes. Tried it but push hangs forever after this

# git push --verbose
Pushing to ssh://git.freedesktop.org/git/mesa/glw.git
Counting objects: 4, done.
Delta compression using up to 8 threads.
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 700 bytes | 350.00 KiB/s, done.
Total 4 (delta 3), reused 0 (delta 0)


Worked for me.

My first guess was glw was migrated to gitlab.freedesktop.org, but it 
looks like it has not.


-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/5] nv50/ir: further optimize multiplication by immediates

2018-07-18 Thread Rhys Perry
Strongly mitigates the harm from the previous commit, which made many
integer multiplications much more heavy on the register and instruction
count.

total instructions in shared programs : 5294693 -> 5268293 (-0.50%)
total gprs used in shared programs: 624962 -> 624196 (-0.12%)
total shared used in shared programs  : 360704 -> 360704 (0.00%)
total local used in shared programs   : 21048 -> 20952 (-0.46%)

local sharedgpr   inst  bytes
helped   1   0 36817721772
  hurt   0   0  74  23  23

Signed-off-by: Rhys Perry 
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 135 ++---
 1 file changed, 121 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 14cc4b32d4..5b23b816b3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -378,6 +378,10 @@ private:
 
CmpInstruction *findOriginForTestWithZero(Value *);
 
+   Value *createMulMethod1(Value *a, unsigned b, Value *c);
+   Value *createMulMethod2(Value *a, unsigned b, Value *c);
+   Value *createMul(Value *a, unsigned b, Value *c);
+
unsigned int foldCount;
 
BuildUtil bld;
@@ -952,6 +956,97 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue 
&imm2)
}
 }
 
+Value *
+ConstantFolding::createMulMethod1(Value *a, unsigned b, Value *c)
+{
+   if (b == 1)
+  return a;
+
+   // Basically constant folded shift and add multiplication.
+   Value *res = c ? c : bld.loadImm(NULL, 0u);
+   bool resZero = !c;
+   unsigned ashift = 0;
+   while (b) {
+  if ((b & 1) && ashift) {
+ if (resZero)
+res = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), a, 
bld.mkImm(ashift));
+ else
+res = bld.mkOp3v(OP_SHLADD, TYPE_U32, bld.getSSA(), a, 
bld.mkImm(ashift), res);
+ resZero = false;
+  } else if (b & 1) {
+ if (resZero)
+res = a;
+ else
+res = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), res, a);
+ resZero = false;
+  }
+  b >>= 1;
+  ashift++;
+   }
+   return res;
+}
+
+Value *
+ConstantFolding::createMulMethod2(Value *a, unsigned b, Value *c)
+{
+   uint64_t b2 = u_next_power_of_two(b);
+   unsigned b2shift = ffsll(b2) - 1;
+   if (b2 != b) { // a * b2 - a * (b2 - b)
+  // mul1 = a * (b2 - b)
+  Value *mul1 = createMulMethod1(a, b2 - b, NULL);
+
+  if (b2shift < 32 && c) { // a * b2 - mul1 + c (implemented as a * b2 + c 
- mul1)
+ return bld.mkOp2v(OP_SUB, TYPE_U32, bld.getSSA(),
+   bld.mkOp3v(OP_SHLADD, TYPE_U32, bld.getSSA(),
+  a, bld.mkImm(b2shift), c),
+   mul1);
+  } else
+  if (b2shift < 32) { // a * b2 - mul1
+ Value *res = bld.getSSA();
+ Instruction *i = bld.mkOp3(OP_SHLADD, TYPE_U32, res, a, 
bld.mkImm(b2shift), mul1);
+ if (bld.getProgram()->getTarget()->isModSupported(i, 2, 
NV50_IR_MOD_NEG))
+i->src(2).mod *= Modifier(NV50_IR_MOD_NEG);
+ else
+i->setSrc(2, bld.mkOp1v(OP_NEG, TYPE_U32, bld.getSSA(), mul1));
+ return res;
+  } else
+  if (c) { // - mul1 + c (implemented as c - mul1)
+ return bld.mkOp2v(OP_SUB, TYPE_U32, bld.getSSA(), c, mul1);
+  } else { // - mul1
+ return bld.mkOp1v(OP_NEG, TYPE_U32, bld.getSSA(), mul1);
+  }
+   } else {
+  if (c) // a * b2 + c
+ return bld.mkOp3v(OP_SHLADD, TYPE_U32, bld.getSSA(), a, 
bld.mkImm(b2shift), c);
+  else // a * b2
+ return bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), a, 
bld.loadImm(NULL, b2shift));
+   }
+}
+
+Value *
+ConstantFolding::createMul(Value *a, unsigned b, Value *c)
+{
+   unsigned cost[2];
+
+   // Estimate cost for first method (a << i) + (b << j) + ...
+   cost[0] = u_bit_count64(b >> 1);
+
+   // Estimate cost for second method (a << i) - ((a << j) + (a << k) + ...)
+   uint64_t rounded_b = u_next_power_of_two(b);
+   cost[1] = rounded_b == b ? 1 : (u_bit_count64((rounded_b - b) >> 1) + 2);
+   if (c) cost[1]++;
+
+   // The general method, multiplication by XMADs, costs three instructions.
+   // So nothing larger than that or it could be making things worse.
+   if (cost[0] > 3 && cost[1] > 3)
+  return NULL;
+
+   if (cost[0] < cost[1])
+  return createMulMethod1(a, b, c);
+   else
+  return createMulMethod2(a, b, c);
+}
+
 void
 ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 {
@@ -1039,13 +1134,25 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
&imm0, int s)
  i->setSrc(s, i->getSrc(t));
  i->src(s).mod = i->src(t).mod;
   } else
-  if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) {
- i->op = OP_SHL;
-  

[Mesa-dev] [PATCH v2 3/5] nv50/ir: optimize imul/imad to xmads

2018-07-18 Thread Rhys Perry
This hits the shader-db numbers a good bit, though a few xmads is way
faster than an imul or imad and the cost is mitigated by the next commit,
which optimizes many multiplications by immediates into shorter and less
register heavy instructions than the xmads.

total instructions in shared programs : 5256901 -> 5294693 (0.72%)
total gprs used in shared programs: 624328 -> 624962 (0.10%)
total shared used in shared programs  : 360704 -> 360704 (0.00%)
total local used in shared programs   : 20952 -> 21048 (0.46%)

local sharedgpr   inst  bytes
helped   0   0  39   0   0
  hurt   1   0 33422772277

Signed-off-by: Rhys Perry 
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 ++
 .../nouveau/codegen/nv50_ir_target_gm107.cpp   |  1 -
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 5fc1fba970..14cc4b32d4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2291,13 +2291,18 @@ AlgebraicOpt::visit(BasicBlock *bb)
 // 
=
 
 // ADD(SHL(a, b), c) -> SHLADD(a, b, c)
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
 class LateAlgebraicOpt : public Pass
 {
 private:
virtual bool visit(Instruction *);
 
void handleADD(Instruction *);
+   void handleMULMAD(Instruction *);
bool tryADDToSHLADD(Instruction *);
+
+   BuildUtil bld;
 };
 
 void
@@ -2357,6 +2362,47 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
 
return true;
 }
+ 
+// MUL(a, b) -> a few XMADs
+// MAD/FMA(a, b, c) -> a few XMADs
+void
+LateAlgebraicOpt::handleMULMAD(Instruction *i)
+{
+   // TODO: handle NV50_IR_SUBOP_MUL_HIGH
+   if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32))
+  return;
+   if (isFloatType(i->dType) || typeSizeof(i->dType) != 4)
+  return;
+   if (i->subOp || i->usesFlags() || i->flagsDef >= 0)
+  return;
+
+   assert(!i->src(0).mod);
+   assert(!i->src(1).mod);
+   assert(i->op == OP_MUL ? 1 : !i->src(2).mod);
+
+   bld.setPosition(i, true);
+
+   Value *a = i->getSrc(0);
+   Value *b = i->getSrc(1);
+   Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2);
+
+   Value *tmp0 = bld.getSSA();
+   Value *tmp1 = bld.getSSA();
+
+   Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c);
+   insn->setPredicate(i->cc, i->getPredicate());
+
+   insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0));
+   insn->setPredicate(i->cc, i->getPredicate());
+   insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1);
+
+   insn = bld.mkOp3(OP_XMAD, TYPE_U32, i->getDef(0), b, tmp1, tmp0);
+   insn->setPredicate(i->cc, i->getPredicate());
+   insn->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
+   insn->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);
+
+   delete_Instruction(prog, i);
+}
 
 bool
 LateAlgebraicOpt::visit(Instruction *i)
@@ -2365,6 +2411,11 @@ LateAlgebraicOpt::visit(Instruction *i)
case OP_ADD:
   handleADD(i);
   break;
+   case OP_MUL:
+   case OP_MAD:
+   case OP_FMA:
+  handleMULMAD(i);
+  break;
default:
   break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index f918fbfdd3..571d8a67c2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -165,7 +165,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) 
const
   }
   break;
case OPCLASS_ARITH:
-  // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
   if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
   !isFloatType(insn->dType))
  return true;
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/5] nv50/ir: add preliminary support for OP_XMAD

2018-07-18 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  | 23 ++
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 17 ++--
 .../drivers/nouveau/codegen/nv50_ir_print.cpp  | 18 +
 .../drivers/nouveau/codegen/nv50_ir_target.cpp |  7 ---
 .../nouveau/codegen/nv50_ir_target_gm107.cpp   |  1 +
 .../nouveau/codegen/nv50_ir_target_nv50.cpp|  1 +
 .../nouveau/codegen/nv50_ir_target_nvc0.cpp| 15 ++
 7 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 0b220cc48d..9798e98a1a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -58,6 +58,9 @@ enum operation
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
OP_SHLADD,
+   // extended multiply-add (GM107+), does a lot of things.
+   // see envytools for detailed documentation
+   OP_XMAD,
OP_ABS,
OP_NEG,
OP_NOT,
@@ -256,6 +259,26 @@ enum operation
 #define NV50_IR_SUBOP_MINMAX_MED  2
 #define NV50_IR_SUBOP_MINMAX_HIGH 3
 
+// xmad(src0, src1, 0) << 16 + src2
+#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
+// (xmad(src0, src1, src2) & 0x) | (src1 << 16)
+#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
+// xmad(src0, src1, src2.lo)
+#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
+// xmad(src0, src1, src2.hi)
+#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
+// if both operands to the multiplication are non-zero, subtract 65536 for each
+// negative operand
+#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
+// xmad(src0, src1, src2) + src1 << 16
+#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
+#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << 2)
+
+// use the high 16 bits instead of the low 16 bits for the multiplication.
+// if the instruction's sType is signed, sign extend the operand from 16 bits
+// to 32 before multiplication.
+#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (6 + (i)))
+
 enum DataType
 {
TYPE_NONE,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 4e08cfadec..5fc1fba970 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -191,9 +191,16 @@ void
 LoadPropagation::checkSwapSrc01(Instruction *insn)
 {
const Target *targ = prog->getTarget();
-   if (!targ->getOpInfo(insn).commutative)
-  if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB)
+   if (!targ->getOpInfo(insn).commutative) {
+  if (insn->op != OP_SET && insn->op != OP_SLCT &&
+  insn->op != OP_SUB && insn->op != OP_XMAD)
  return;
+  // XMAD is only commutative if both the CBCC and MRG flags are not set.
+  if (insn->op == OP_XMAD && (insn->subOp & 0x1c) == 
NV50_IR_SUBOP_XMAD_CBCC)
+ return;
+  if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
+ return;
+   }
if (insn->src(1).getFile() != FILE_GPR)
   return;
// This is the special OP_SET used for alphatesting, we can't reverse its
@@ -236,6 +243,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
if (insn->op == OP_SUB) {
   insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
   insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
+   } else
+   if (insn->op == OP_XMAD) {
+  // swap h1 flags
+  uint16_t h1 = (insn->subOp >> 6) & 0x3;
+  h1 = (h1 >> 1 & 0x1) | (h1 << 1 & 0x2);
+  insn->subOp = (insn->subOp & ~uint16_t(0x3 << 6)) | (h1 << 6);
}
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ee3506fbae..dc27674369 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
"fma",
"sad",
"shladd",
+   "xmad",
"abs",
"neg",
"not",
@@ -239,6 +240,11 @@ static const char *barOpStr[] =
 {
"sync", "arrive", "red and", "red or", "red popc"
 };
+ 
+static const char *xmadOpCModeStr[] =
+{
+   "clo", "chi", "csfu", "cbcc"
+};
 
 static const char *DataTypeStr[] =
 {
@@ -625,6 +631,18 @@ void Instruction::print() const
  if (subOp < ARRAY_SIZE(barOpStr))
 PRINT("%s ", barOpStr[subOp]);
  break;
+  case OP_XMAD: {
+ if (subOp & NV50_IR_SUBOP_XMAD_PSL)
+PRINT("psl ");
+ if (subOp & NV50_IR_SUBOP_XMAD_MRG)
+PRINT("mrg ");
+ unsigned cmode = (subOp >> 2) & 0x7;
+ if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr))
+PRINT("%s ", xmadOpCModeStr[cmode - 1]);
+ for (int i = 0; i < 2; i++)
+PRINT("h%d ", (subOp & NV50_IR_SUBOP_XMAD_H1(i)) ? 1 : 0);
+ break;
+  }
   default:
  if (subOp)
 PRI

[Mesa-dev] [PATCH v2 2/5] gm107/ir: add support for OP_XMAD on GM107+

2018-07-18 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 63 ++
 .../nouveau/codegen/nv50_ir_target_gm107.cpp   |  6 ++-
 .../nouveau/codegen/nv50_ir_target_nvc0.cpp|  1 +
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 694d1b10a3..caf853d58b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -155,6 +155,7 @@ private:
void emitIMUL();
void emitIMAD();
void emitISCADD();
+   void emitXMAD();
void emitIMNMX();
void emitICMP();
void emitISET();
@@ -1882,6 +1883,65 @@ CodeEmitterGM107::emitISCADD()
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
 }
+ 
+void
+CodeEmitterGM107::emitXMAD()
+{
+   assert(insn->src(0).getFile() == FILE_GPR);
+
+   bool constbuf = false;
+   bool psl_mrg = true;
+   bool immediate = false;
+   if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
+  assert(insn->src(1).getFile() == FILE_GPR);
+  constbuf = true;
+  psl_mrg = false;
+  emitInsn(0x5100);
+  emitGPR(0x27, insn->src(1));
+  emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
+   } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
+  assert(insn->src(2).getFile() == FILE_GPR);
+  constbuf = true;
+  emitInsn(0x4e00);
+  emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
+  emitGPR(0x27, insn->src(2));
+   } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
+  assert(insn->src(2).getFile() == FILE_GPR);
+  assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
+  immediate = false;
+  emitInsn(0x3600);
+  emitIMMD(0x14, 19, insn->src(1));
+  emitGPR(0x27, insn->src(2));
+   } else {
+  assert(insn->src(1).getFile() == FILE_GPR);
+  assert(insn->src(2).getFile() == FILE_GPR);
+  emitInsn(0x5b00);
+  emitGPR(0x14, insn->src(1));
+  emitGPR(0x27, insn->src(2));
+   }
+
+   if (psl_mrg)
+  emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
+   emitField(0x32, constbuf ? 2 : 3, (insn->subOp >> 2) & 0x7);
+
+   emitX(constbuf ? 0x36 : 0x26);
+   emitCC(0x2f);
+
+   emitGPR(0x0, insn->def(0));
+   emitGPR(0x8, insn->src(0));
+
+   // source flags
+   bool h1[2];
+   h1[0] = insn->subOp & NV50_IR_SUBOP_XMAD_H1(0);
+   h1[1] = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
+   bool isSigned = isSignedType(insn->sType);
+   bool sext[2] = {h1[0] && isSigned, h1[1] && isSigned};
+
+   emitField(0x30, 2, sext[0] | (sext[1] << 1));
+   emitField(0x35, 1, h1[0]);
+   if (!immediate)
+  emitField(constbuf ? 0x34 : 0x23, 1, h1[1]);
+}
 
 void
 CodeEmitterGM107::emitIMNMX()
@@ -3254,6 +3314,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_SHLADD:
   emitISCADD();
   break;
+   case OP_XMAD:
+  emitXMAD();
+  break;
case OP_MIN:
case OP_MAX:
   if (isFloatType(insn->dType)) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index 24a1cbb8da..f918fbfdd3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -60,8 +60,11 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
case OP_SQRT:
case OP_DIV:
case OP_MOD:
-   case OP_XMAD:
   return false;
+   case OP_XMAD:
+  if (isFloatType(ty))
+ return false;
+  break;
default:
   break;
}
@@ -230,6 +233,7 @@ TargetGM107::getLatency(const Instruction *insn) const
case OP_SUB:
case OP_VOTE:
case OP_XOR:
+   case OP_XMAD:
   if (insn->dType != TYPE_F64)
  return 6;
   break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 94e98ada5e..39869e3145 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -161,6 +161,7 @@ static const struct opProperties _initPropsGM107[] = {
{ OP_SUSTP,   0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
{ OP_SUREDB,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
{ OP_SUREDP,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+   { OP_XMAD,0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
 };
 
 void TargetNVC0::initProps(const struct opProperties *props, int size)
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/5] nv50/ir: Improve Performance of Integer Multiplication

2018-07-18 Thread Rhys Perry
Changes in v2:
- rebase
- bring back constant folding for multiplication by power-of-twos for nv50
- remove TODO in nv50_ir_target_gm107.cpp
- document XMAD's flags
- change how XMAD's per-operand flags are represented
- move util/bitscan.h stuff into a new patch
- stylistic changes

This series improve the performance of integer multiplication by removing
much usage of the very slow IMAD and IMUL on Maxwell+ and improving
multiplication by immediates on Fermi+. It depends on the
SHLADD/IndirectPropagation patches.

The first and second patch add support for the XMAD instruction in codegen

The third patch replaces most IMADs and IMULs with a sequence of XMADs on
Maxwell+. This is far faster but increases the total instructions in the
shader-db by 0.72%.

This number is significantly lowered with the next patch. It replaces many
multiplications by immediates with instructions that should be as fast or
faster than the XMAD approach. They are also typically smaller and less
register heavy, so they decrease the total instruction count by -0.50%.

This series gives about a ~50% speedup in fragment-heavy scenaries with
Dolphin 5.0 on my GTX 1060. All timings were made with interesting looking
fifos from Dolphin's bugtracker:
 Wind Waker: 18 FPS -> 26 FPS at 3x internal resolution
 Wind Waker:  8 FPS -> 11 FPS at 5x internal resolution
   Paper Mario?: 26 FPS -> 42 FPS at 5x internal resolution
SpongeBob Movie: 19 FPS -> 30 FPS at 5x internal resolution

Unigine Heaven and Unigine Valley seems to run the same at low quality with
no anti-aliasing and no tessellation. SuperTuxKart and 0 A.D. also show no
change.

It's possible these patches may break something. Piglit shows no functionality
regressions though they should probably be tested for improvements or breakage
with actual applications.

These patches can also be found on my github:
https://github.com/pendingchaos/mesa/tree/nv-xmad-v2

The final changes in shader-db are as follows:

total instructions in shared programs : 5256901 -> 5268293 (0.22%)
total gprs used in shared programs: 624328 -> 624196 (-0.02%)
total shared used in shared programs  : 360704 -> 360704 (0.00%)
total local used in shared programs   : 20952 -> 20952 (0.00%)

local sharedgpr   inst  bytes
helped   0   0 255 680 680
  hurt   0   0 12814841484 

Rhys Perry (5):
  nv50/ir: add preliminary support for OP_XMAD
  gm107/ir: add support for OP_XMAD on GM107+
  nv50/ir: optimize imul/imad to xmads
  util: Add u_bit_count64 and u_next_power_of_two
  nv50/ir: further optimize multiplication by immediates

 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  23 +++
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp |  63 +++
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 203 +++--
 .../drivers/nouveau/codegen/nv50_ir_print.cpp  |  18 ++
 .../drivers/nouveau/codegen/nv50_ir_target.cpp |   7 +-
 .../nouveau/codegen/nv50_ir_target_gm107.cpp   |   6 +-
 .../nouveau/codegen/nv50_ir_target_nv50.cpp|   1 +
 .../nouveau/codegen/nv50_ir_target_nvc0.cpp|  16 ++
 src/util/bitscan.h |  28 +++
 9 files changed, 345 insertions(+), 20 deletions(-)

-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/5] util: Add u_bit_count64 and u_next_power_of_two

2018-07-18 Thread Rhys Perry
Signed-off-by: Rhys Perry 
---
 src/util/bitscan.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index dc89ac93f2..cae61d3f71 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -286,6 +286,34 @@ u_bit_consecutive64(unsigned start, unsigned count)
return (((uint64_t)1 << count) - 1) << start;
 }
 
+/* Returns the number of bits set.
+ *
+ * based on
+ * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
+ */
+static inline unsigned
+u_bit_count64(uint64_t val)
+{
+#ifdef __POPCNT__
+   return _mm_popcnt_u64(v);
+#else
+   unsigned result;
+   for (result = 0; val; result++)
+  val &= val - 1; /* clear the least significant bit set */
+   return result;
+#endif
+}
+
+/* Round the input to the next power of two.
+ * Zero is rounded to one.
+ */
+static inline uint64_t
+u_next_power_of_two(unsigned val)
+{
+   bool power_of_two_nonzero = util_is_power_of_two_or_zero64(val) && val;
+   return power_of_two_nonzero ? val : ((uint64_t)1 << util_last_bit64(val));
+}
+
 
 #ifdef __cplusplus
 }
-- 
2.14.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/13] gallium: add PIPE_SHADER_CAP_MAX_VARYINGS

2018-07-18 Thread Marek Olšák
Shouldn't this be a non-shader CAP? Because the value is not different
between shader stages and doesn't make sense for non-fragment shaders.

Marek

On Sun, Jul 15, 2018 at 2:15 PM, Karol Herbst  wrote:
> From: Karol Herbst 
>
> this way Nouveau can report 128 inputs, but only 124 varyings.
>
> Fixes: 'KHR-GL45.limits.max_fragment_input_components'
>
> Signed-off-by: Karol Herbst 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_limits.h|  1 +
>  src/gallium/auxiliary/tgsi/tgsi_exec.h   |  1 +
>  src/gallium/docs/source/screen.rst   |  1 +
>  src/gallium/drivers/etnaviv/etnaviv_screen.c |  1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
>  src/gallium/drivers/i915/i915_screen.c   |  1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  2 ++
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 11 +++
>  src/gallium/drivers/r300/r300_screen.c   |  2 ++
>  src/gallium/drivers/r600/r600_pipe.c |  1 +
>  src/gallium/drivers/svga/svga_screen.c   |  3 +++
>  src/gallium/drivers/vc4/vc4_screen.c |  1 +
>  src/gallium/drivers/virgl/virgl_screen.c |  1 +
>  src/gallium/include/pipe/p_defines.h |  1 +
>  src/mesa/state_tracker/st_extensions.c   |  4 +---
>  16 files changed, 22 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
> b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> index 7b66b758729..17c23a342c8 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> @@ -98,6 +98,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
> case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
>return LP_MAX_TGSI_NESTING;
> case PIPE_SHADER_CAP_MAX_INPUTS:
> +   case PIPE_SHADER_CAP_MAX_VARYINGS:
>return 32;
> case PIPE_SHADER_CAP_MAX_OUTPUTS:
>return 32;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
> b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index ed8b9e88693..eab75c06c2f 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -500,6 +500,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
> case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
>return TGSI_EXEC_MAX_NESTING;
> case PIPE_SHADER_CAP_MAX_INPUTS:
> +   case PIPE_SHADER_CAP_MAX_VARYINGS:
>return TGSI_EXEC_MAX_INPUT_ATTRIBS;
> case PIPE_SHADER_CAP_MAX_OUTPUTS:
>return 32;
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index d7ce521c2c3..a46b255031f 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -480,6 +480,7 @@ support different features.
>  * ``PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS``: The maximum number of texture 
> indirections.
>  * ``PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH``: The maximum nested control 
> flow depth.
>  * ``PIPE_SHADER_CAP_MAX_INPUTS``: The maximum number of input registers.
> +* ``PIPE_SHADER_CAP_MAX_VARYINGS``: The maximum number of varyings.
>  * ``PIPE_SHADER_CAP_MAX_OUTPUTS``: The maximum number of output registers.
>This is valid for all shaders except the fragment shader.
>  * ``PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE``: The maximum size per constant 
> buffer in bytes.
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index 04c7a873de6..d060e27397d 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -422,6 +422,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
> case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
>return ETNA_MAX_DEPTH; /* XXX */
> case PIPE_SHADER_CAP_MAX_INPUTS:
> +   case PIPE_SHADER_CAP_MAX_VARYINGS:
>/* Maximum number of inputs for the vertex shader is the number
> * of vertex elements - each element defines one vertex shader
> * input register.  For the fragment shader, this is the number
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
> b/src/gallium/drivers/freedreno/freedreno_screen.c
> index 1baebd71789..1c1c11944c3 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -535,6 +535,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
> return 8; /* XXX */
> case PIPE_SHADER_CAP_MAX_INPUTS:
> case PIPE_SHADER_CAP_MAX_OUTPUTS:
> +   case PIPE_SHADER_CAP_MAX_VARYINGS:
> return 16;
> case PIPE_SHADER_CAP_MAX_TEMPS:
> return 64; /* Max native temporaries. */
> diff --git a/src/gallium/drivers/i915/i915_screen.c 
> b/src/gallium/drivers/i915/i915_screen.c
> index a03840a3aa4..f4a58c3272e 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/driver

[Mesa-dev] [PATCH v2 4/4] intel: tools: dump: trace memory writes

2018-07-18 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aub_write.c | 45 ++---
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index de4ce33..9c140553542 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -313,10 +313,17 @@ dword_out(struct aub_file *aub, uint32_t data)
 
 static void
 mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
-  uint32_t len, uint32_t addr_space)
+  uint32_t len, uint32_t addr_space,
+  const char *desc)
 {
uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
 
+   if (aub->verbose_log_file) {
+  fprintf(aub->verbose_log_file,
+  "  MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ")\n",
+  addr, addr + len);
+   }
+
dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
dword_out(aub, addr & 0x);   /* addr lo */
dword_out(aub, addr >> 32);   /* addr hi */
@@ -387,7 +394,8 @@ populate_ppgtt_table(struct aub_file *aub, struct 
aub_ppgtt_table *table,
   uint64_t write_size = (dirty_end - dirty_start + 1) *
  sizeof(uint64_t);
   mem_trace_memory_write_header_out(aub, write_addr, write_size,
-
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL);
+
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
+"PPGTT update");
   data_out(aub, entries + dirty_start, write_size);
}
 }
@@ -476,7 +484,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
mem_trace_memory_write_header_out(aub, STATIC_GGTT_MAP_START >> 12,
  ggtt_ptes * GEN8_PTE_SIZE,
- 
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY);
+ 
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
+ "GGTT PT");
for (uint32_t i = 0; i < ggtt_ptes; i++) {
   dword_out(aub, 1 + 0x1000 * i + STATIC_GGTT_MAP_START);
   dword_out(aub, 0);
@@ -484,7 +493,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
/* RENDER_RING */
mem_trace_memory_write_header_out(aub, RENDER_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "RENDER RING");
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -492,7 +502,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
mem_trace_memory_write_header_out(aub, RENDER_CONTEXT_ADDR,
  PPHWSP_SIZE +
  sizeof(render_context_init),
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "RENDER PPHWSP");
for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -501,7 +512,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
/* BLITTER_RING */
mem_trace_memory_write_header_out(aub, BLITTER_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "BLITTER RING");
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -509,7 +521,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
mem_trace_memory_write_header_out(aub, BLITTER_CONTEXT_ADDR,
  PPHWSP_SIZE +
  sizeof(blitter_context_init),
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "BLITTER PPHWSP");
for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -518,7 +531,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
 
/* VIDEO_RING */
mem_trace_memory_write_header_out(aub, VIDEO_RING_ADDR, RING_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "VIDEO RING");
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
   dword_out(aub, 0);
 
@@ -526,7 +540,8 @@ write_execlists_header(struct aub_file *aub, const char 
*name)
mem_trace_memory_write_header_out(aub, VIDEO_CONTEXT_ADD

[Mesa-dev] [PATCH v2 3/4] intel: tools: aubwrite: fix invalid frees on finish

2018-07-18 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aub_write.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
index 1224e8f6b7f..de4ce33 100644
--- a/src/intel/tools/aub_write.c
+++ b/src/intel/tools/aub_write.c
@@ -255,11 +255,16 @@ align_u32(uint32_t v, uint32_t a)
 }
 
 static void
-aub_ppgtt_table_finish(struct aub_ppgtt_table *table)
+aub_ppgtt_table_finish(struct aub_ppgtt_table *table, int level)
 {
+   if (level == 1)
+  return;
+
for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) {
-  aub_ppgtt_table_finish(table->subtables[i]);
-  free(table->subtables[i]);
+  if (table->subtables[i]) {
+ aub_ppgtt_table_finish(table->subtables[i], level - 1);
+ free(table->subtables[i]);
+  }
}
 }
 
@@ -280,7 +285,7 @@ aub_file_init(struct aub_file *aub, FILE *file, uint16_t 
pci_id)
 void
 aub_file_finish(struct aub_file *aub)
 {
-   aub_ppgtt_table_finish(&aub->pml4);
+   aub_ppgtt_table_finish(&aub->pml4, 4);
fclose(aub->file);
 }
 
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/4] intel: tools: dump: remove mentions of intel_aubdump

2018-07-18 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 5fd2c8ea723..6ce7d452afb 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -86,6 +86,7 @@ fail_if(int cond, const char *format, ...)
   return;
 
va_start(args, format);
+   fprintf(stderr, "intel_dump_gpu: ");
vfprintf(stderr, format, args);
va_end(args);
 
@@ -125,10 +126,10 @@ relocate_bo(struct bo *bo, const struct 
drm_i915_gem_execbuffer2 *execbuffer2,
int handle;
 
relocated = malloc(bo->size);
-   fail_if(relocated == NULL, "intel_aubdump: out of memory\n");
+   fail_if(relocated == NULL, "out of memory\n");
memcpy(relocated, GET_PTR(bo->map), bo->size);
for (size_t i = 0; i < obj->relocation_count; i++) {
-  fail_if(relocs[i].offset >= bo->size, "intel_aubdump: reloc outside 
bo\n");
+  fail_if(relocs[i].offset >= bo->size, "reloc outside bo\n");
 
   if (execbuffer2->flags & I915_EXEC_HANDLE_LUT)
  handle = exec_objects[relocs[i].target_handle].handle;
@@ -211,8 +212,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   aub_write_header(&aub_file, program_invocation_short_name);
 
   if (verbose)
- printf("[intel_aubdump: running, "
-"output file %s, chipset id 0x%04x, gen %d]\n",
+ printf("[running, output file %s, chipset id 0x%04x, gen %d]\n",
 output_filename, device, devinfo.gen);
}
 
@@ -254,7 +254,7 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
 
   if (bo->map == NULL && bo->size > 0)
  bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
-  fail_if(bo->map == MAP_FAILED, "intel_aubdump: bo mmap failed\n");
+  fail_if(bo->map == MAP_FAILED, "bo mmap failed\n");
 
   if (aub_use_execlists(&aub_file))
  aub_map_ppgtt(&aub_file, bo->offset, bo->size);
@@ -310,8 +310,8 @@ add_new_bo(int handle, uint64_t size, void *map)
 {
struct bo *bo = &bos[handle];
 
-   fail_if(handle >= MAX_BO_COUNT, "intel_aubdump: bo handle out of range\n");
-   fail_if(size == 0, "intel_aubdump: bo size is invalid\n");
+   fail_if(handle >= MAX_BO_COUNT, "bo handle out of range\n");
+   fail_if(size == 0, "bo size is invalid\n");
 
bo->size = size;
bo->map = map;
@@ -359,17 +359,17 @@ maybe_init(void)
  }
   } else if (!strcmp(key, "device")) {
  fail_if(sscanf(value, "%i", &device) != 1,
- "intel_aubdump: failed to parse device id '%s'",
+ "failed to parse device id '%s'",
  value);
  device_override = true;
   } else if (!strcmp(key, "file")) {
  output_filename = strdup(value);
  output_file = fopen(output_filename, "w+");
  fail_if(output_file == NULL,
- "intel_aubdump: failed to open file '%s'\n",
+ "failed to open file '%s'\n",
  output_filename);
   } else {
- fprintf(stderr, "intel_aubdump: unknown option '%s'\n", key);
+ fprintf(stderr, "unknown option '%s'\n", key);
   }
 
   free(key);
@@ -378,7 +378,7 @@ maybe_init(void)
fclose(config);
 
bos = calloc(MAX_BO_COUNT, sizeof(bos[0]));
-   fail_if(bos == NULL, "intel_aubdump: out of memory\n");
+   fail_if(bos == NULL, "out of memory\n");
 }
 
 __attribute__ ((visibility ("default"))) int
@@ -398,7 +398,7 @@ ioctl(int fd, unsigned long request, ...)
(buf.st_mode & S_IFMT) == S_IFCHR && major(buf.st_rdev) == DRM_MAJOR) {
   drm_fd = fd;
   if (verbose)
- printf("[intel_aubdump: intercept drm ioctl on fd %d]\n", fd);
+ printf("[intercept drm ioctl on fd %d]\n", fd);
}
 
if (fd == drm_fd) {
@@ -428,7 +428,7 @@ ioctl(int fd, unsigned long request, ...)
   case DRM_IOCTL_I915_GEM_EXECBUFFER: {
  static bool once;
  if (!once) {
-fprintf(stderr, "intel_aubdump: "
+fprintf(stderr,
 "application uses DRM_IOCTL_I915_GEM_EXECBUFFER, not 
handled\n");
 once = true;
  }
@@ -490,7 +490,7 @@ ioctl(int fd, unsigned long request, ...)
 off_t size;
 
 size = lseek(prime->fd, 0, SEEK_END);
-fail_if(size == -1, "intel_aubdump: failed to get prime bo 
size\n");
+fail_if(size == -1, "failed to get prime bo size\n");
 add_new_bo(prime->handle, size, NULL);
  }
 
@@ -511,7 +511,7 @@ init(void)
libc_close = dlsym(RTLD_NEXT, "close");
libc_ioctl = dlsym(RTLD_NEXT, "ioctl");
fail_if(libc_close == NULL || libc_ioctl == NULL,
-   "intel_aubdump: failed to get libc ioctl or close\n");
+   "failed to get libc ioctl or close\n");
 }
 
 static int
-- 
2.18.0

___

[Mesa-dev] [PATCH v2 1/4] intel: tools: dump: remove command execution feature

2018-07-18 Thread Lionel Landwerlin
In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
processing option") we removed the ability to process aub as an input
stream because we're now rely on mmapping the aub file to back the
buffers aubinator is parsing.

intel_aubdump was the provider of the standard input data and since
we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
we don't need that code anymore.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c  | 121 +++---
 src/intel/tools/intel_dump_gpu.in |  27 +--
 2 files changed, 29 insertions(+), 119 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 6d2c4b7f983..5fd2c8ea723 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -53,8 +53,8 @@ static int (*libc_close)(int fd) = close_init_helper;
 static int (*libc_ioctl)(int fd, unsigned long request, ...) = 
ioctl_init_helper;
 
 static int drm_fd = -1;
-static char *filename = NULL;
-static FILE *files[2] = { NULL, NULL };
+static char *output_filename = NULL;
+static FILE *output_file = NULL;
 static int verbose = 0;
 static bool device_override;
 
@@ -111,7 +111,7 @@ align_u32(uint32_t v, uint32_t a)
 
 static struct gen_device_info devinfo = {0};
 static uint32_t device;
-static struct aub_file aubs[2];
+static struct aub_file aub_file;
 
 static void *
 relocate_bo(struct bo *bo, const struct drm_i915_gem_execbuffer2 *execbuffer2,
@@ -205,28 +205,21 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   fail_if(!gen_get_device_info(device, &devinfo),
   "failed to identify chipset=0x%x\n", device);
 
-  for (int i = 0; i < ARRAY_SIZE(files); i++) {
- if (files[i] != NULL) {
-aub_file_init(&aubs[i], files[i], device);
-if (verbose == 2)
-   aubs[i].verbose_log_file = stdout;
-aub_write_header(&aubs[i], program_invocation_short_name);
- }
-  }
+  aub_file_init(&aub_file, output_file, device);
+  if (verbose == 2)
+ aub_file.verbose_log_file = stdout;
+  aub_write_header(&aub_file, program_invocation_short_name);
 
   if (verbose)
  printf("[intel_aubdump: running, "
 "output file %s, chipset id 0x%04x, gen %d]\n",
-filename, device, devinfo.gen);
+output_filename, device, devinfo.gen);
}
 
-   /* Any aub */
-   struct aub_file *any_aub = files[0] ? &aubs[0] : &aubs[1];;
-
-   if (aub_use_execlists(any_aub))
+   if (aub_use_execlists(&aub_file))
   offset = 0x1000;
else
-  offset = aub_gtt_size(any_aub);
+  offset = aub_gtt_size(&aub_file);
 
if (verbose)
   printf("Dumping execbuffer2:\n");
@@ -263,13 +256,8 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
  bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
   fail_if(bo->map == MAP_FAILED, "intel_aubdump: bo mmap failed\n");
 
-  for (int i = 0; i < ARRAY_SIZE(files); i++) {
- if (files[i] == NULL)
-continue;
-
- if (aub_use_execlists(&aubs[i]))
-aub_map_ppgtt(&aubs[i], bo->offset, bo->size);
-  }
+  if (aub_use_execlists(&aub_file))
+ aub_map_ppgtt(&aub_file, bo->offset, bo->size);
}
 
batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 :
@@ -284,30 +272,21 @@ dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 
*execbuffer2)
   else
  data = bo->map;
 
-  for (int i = 0; i < ARRAY_SIZE(files); i++) {
- if (files[i] == NULL)
-continue;
-
- if (bo == batch_bo) {
-aub_write_trace_block(&aubs[i], AUB_TRACE_TYPE_BATCH,
-  GET_PTR(data), bo->size, bo->offset);
- } else {
-aub_write_trace_block(&aubs[i], AUB_TRACE_TYPE_NOTYPE,
-  GET_PTR(data), bo->size, bo->offset);
- }
+  if (bo == batch_bo) {
+ aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_BATCH,
+   GET_PTR(data), bo->size, bo->offset);
+  } else {
+ aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_NOTYPE,
+   GET_PTR(data), bo->size, bo->offset);
   }
+
   if (data != bo->map)
  free(data);
}
 
-   for (int i = 0; i < ARRAY_SIZE(files); i++) {
-  if (files[i] != NULL)
- continue;
-
-  aub_write_exec(&aubs[i],
- batch_bo->offset + execbuffer2->batch_start_offset,
- offset, ring_flag);
-   }
+   aub_write_exec(&aub_file,
+  batch_bo->offset + execbuffer2->batch_start_offset,
+  offset, ring_flag);
 
if (device_override &&
(execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
@@ -358,40 +337,6 @@ close(int fd)
return libc_close(fd);
 }
 
-static FILE *
-launch_command(char *command)
-{
-   int i

Re: [Mesa-dev] [PATCH v2 4/5] util: Add u_bit_count64 and u_next_power_of_two

2018-07-18 Thread Marek Olšák
On Wed, Jul 18, 2018 at 1:05 PM, Rhys Perry  wrote:
> Signed-off-by: Rhys Perry 
> ---
>  src/util/bitscan.h | 28 
>  1 file changed, 28 insertions(+)
>
> diff --git a/src/util/bitscan.h b/src/util/bitscan.h
> index dc89ac93f2..cae61d3f71 100644
> --- a/src/util/bitscan.h
> +++ b/src/util/bitscan.h
> @@ -286,6 +286,34 @@ u_bit_consecutive64(unsigned start, unsigned count)
> return (((uint64_t)1 << count) - 1) << start;
>  }
>
> +/* Returns the number of bits set.
> + *
> + * based on
> + * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
> + */
> +static inline unsigned
> +u_bit_count64(uint64_t val)
> +{
> +#ifdef __POPCNT__
> +   return _mm_popcnt_u64(v);
> +#else
> +   unsigned result;
> +   for (result = 0; val; result++)
> +  val &= val - 1; /* clear the least significant bit set */
> +   return result;
> +#endif
> +}

There is also util_bitcount64.

> +
> +/* Round the input to the next power of two.
> + * Zero is rounded to one.
> + */
> +static inline uint64_t
> +u_next_power_of_two(unsigned val)
> +{
> +   bool power_of_two_nonzero = util_is_power_of_two_or_zero64(val) && val;
> +   return power_of_two_nonzero ? val : ((uint64_t)1 << util_last_bit64(val));
> +}
> +

val is unsigned (32 bits), you are treating it as 64-bit.

There are also util_next_power_of_two and util_next_power_of_two64.

Marek

>
>  #ifdef __cplusplus
>  }
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Allow AMD_perfmon on GLES contexts

2018-07-18 Thread Marek Olšák
With the alignment fixed, this is:

Reviewed-by: Marek Olšák 

Marek

On Wed, Jul 18, 2018 at 11:01 AM, Rob Clark  wrote:
> From: Eric Anholt 
>
> Reviewed-by: Rob Clark 
> ---
> Not sure if this ever got sent to list.. but this extension is meant to
> be exposed in GLES as well as GL
>
>  .../glapi/gen/AMD_performance_monitor.xml | 22 +--
>  src/mesa/main/extensions_table.h  |  2 +-
>  2 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/src/mapi/glapi/gen/AMD_performance_monitor.xml 
> b/src/mapi/glapi/gen/AMD_performance_monitor.xml
> index b29dc5d9036..785ea076c6f 100644
> --- a/src/mapi/glapi/gen/AMD_performance_monitor.xml
> +++ b/src/mapi/glapi/gen/AMD_performance_monitor.xml
> @@ -5,13 +5,13 @@
>
>  
>
> -
> +
>  
>  
>  
>  
>
> -
> +
>  
>  
>  
> @@ -19,14 +19,14 @@
>  
>  
>
> -
> +
>  
>  
>  
>  
>  
>
> -
> +
>  
>  
>  
> @@ -34,24 +34,24 @@
>  
>  
>
> -
> +
>  
>  
>  
>  
>  
>
> -
> +
>  
>  
>  
>
> -
> +
>  
>  
>  
>
> -
> +
>  
>  
>  
> @@ -59,15 +59,15 @@
>  
>  
>
> -
> +
>  
>  
>
> -
> +
>  
>  
>
> -
> +
>  
>  
>  
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index e24287b8581..af1ae73473d 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -10,7 +10,7 @@ EXT(3DFX_texture_compression_FXT1   , 
> TDFX_texture_compression_FXT1
>
>  EXT(AMD_conservative_depth  , ARB_conservative_depth 
> , GLL, GLC,  x ,  x , 2009)
>  EXT(AMD_draw_buffers_blend  , ARB_draw_buffers_blend 
> , GLL, GLC,  x ,  x , 2009)
> -EXT(AMD_performance_monitor , AMD_performance_monitor
> , GLL, GLC,  x ,  x , 2007)
> +EXT(AMD_performance_monitor , AMD_performance_monitor
> , GLL, GLC,  x ,  ES2 , 2007)
>  EXT(AMD_pinned_memory   , AMD_pinned_memory  
> , GLL, GLC,  x ,  x , 2013)
>  EXT(AMD_seamless_cubemap_per_texture, 
> AMD_seamless_cubemap_per_texture   , GLL, GLC,  x ,  x , 2009)
>  EXT(AMD_shader_stencil_export   , ARB_shader_stencil_export  
> , GLL, GLC,  x ,  x , 2009)
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: surface remove width/height removal comment

2018-07-18 Thread Marek Olšák
On Tue, Jul 17, 2018 at 9:44 AM, Roland Scheidegger  wrote:
> Initially, width/height were actually needed because not all
> pipe_surface objects were backed by pipe_resource objects (that was ages
> ago...). Hence the comment when that was fixed, since it was always
> actually possible to derive this from the resource (but a bit too
> complex to change all the code...).
> Are there now situations when you can't figure this out from the
> resource? Or do you think it's actually useful as a convenience member?

It's useful and it would complicated to remove.

Marek

>
> Roland
>
> Am 17.07.2018 um 07:37 schrieb Dave Airlie:
>> From: Dave Airlie 
>>
>> We do need width/height for things, remove the outdated
>> comment.
>> ---
>>  src/gallium/include/pipe/p_state.h | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/src/gallium/include/pipe/p_state.h 
>> b/src/gallium/include/pipe/p_state.h
>> index 809aa087ce0..b55624d698d 100644
>> --- a/src/gallium/include/pipe/p_state.h
>> +++ b/src/gallium/include/pipe/p_state.h
>> @@ -435,7 +435,6 @@ struct pipe_surface
>> struct pipe_resource *texture; /**< resource into which this is a view  
>> */
>> struct pipe_context *context; /**< context this surface belongs to */
>>
>> -   /* XXX width/height should be removed */
>> uint16_t width;   /**< logical width in pixels */
>> uint16_t height;  /**< logical height in pixels */
>>
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Refuse to accept code with unhandled relocations

2018-07-18 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Jul 17, 2018 at 10:36 AM, Jan Vesely  wrote:
> They might lead to unrecoverable GPU hang.
> Signed-off-by: Jan Vesely 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 5d3341ff61..2349be9584 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -238,6 +238,12 @@ static void *si_create_compute_state(
> const amd_kernel_code_t *code_object =
> si_compute_get_code_object(program, 0);
> code_object_to_config(code_object, 
> &program->shader.config);
> +   if (program->shader.binary.reloc_count != 0) {
> +   fprintf(stderr, "Error: %d unsupported 
> relocations\n",
> +   program->shader.binary.reloc_count);
> +   FREE(program);
> +   return NULL;
> +   }
> } else {
> si_shader_binary_read_config(&program->shader.binary,
>  &program->shader.config, 0);
> --
> 2.16.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Use signed char for color_interp_vgpr_index

2018-07-18 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Jul 16, 2018 at 3:20 PM, Timothy Pearson
 wrote:
>
> color_interp_vgpr_index was declared as a generic char value.
> Because signed values are used in this variable, the result
> was not safe across architectures and crashed on ppc64[el]
> and arm.
>
> Declare color_interp_vgpr_index as a signed type.
>
> Signed-off-by: Timothy Pearson 
> ---
>  src/gallium/drivers/radeonsi/si_shader.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 9e8d573625..791ed27324 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -507,7 +507,7 @@ union si_shader_part_key {
> unsignedancillary_vgpr_index:5;
> unsignedwqm:1;
> charcolor_attr_index[2];
> -   charcolor_interp_vgpr_index[2]; /* -1 == constant 
> */
> +   signed char color_interp_vgpr_index[2]; /* -1 == constant 
> */
> } ps_prolog;
> struct {
> struct si_ps_epilog_bits states;
> --
> 2.17.1
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Use signed char for color_interp_vgpr_index

2018-07-18 Thread Marek Olšák
I pushed the patch. Thanks!

Marek

On Wed, Jul 18, 2018 at 1:30 PM, Marek Olšák  wrote:
> Reviewed-by: Marek Olšák 
>
> Marek
>
> On Mon, Jul 16, 2018 at 3:20 PM, Timothy Pearson
>  wrote:
>>
>> color_interp_vgpr_index was declared as a generic char value.
>> Because signed values are used in this variable, the result
>> was not safe across architectures and crashed on ppc64[el]
>> and arm.
>>
>> Declare color_interp_vgpr_index as a signed type.
>>
>> Signed-off-by: Timothy Pearson 
>> ---
>>  src/gallium/drivers/radeonsi/si_shader.h | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
>> b/src/gallium/drivers/radeonsi/si_shader.h
>> index 9e8d573625..791ed27324 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -507,7 +507,7 @@ union si_shader_part_key {
>> unsignedancillary_vgpr_index:5;
>> unsignedwqm:1;
>> charcolor_attr_index[2];
>> -   charcolor_interp_vgpr_index[2]; /* -1 == 
>> constant */
>> +   signed char color_interp_vgpr_index[2]; /* -1 == 
>> constant */
>> } ps_prolog;
>> struct {
>> struct si_ps_epilog_bits states;
>> --
>> 2.17.1
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi/nir: make use of nir_lower_load_const_to_scalar()

2018-07-18 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Jul 16, 2018 at 12:01 AM, Timothy Arceri  wrote:
> This allows NIR to CSE more operations. LLVM does this also so the
> impact is limited, however doing this in NIR allows other opts to
> make progress. For example some loops in Civilization Beyond Earth
> shaders are unrolled.
> ---
>  src/gallium/drivers/radeonsi/si_shader_nir.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
> b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 6eb114ad5c0..4ae5b006593 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -802,6 +802,8 @@ si_lower_nir(struct si_shader_selector* sel)
>
> ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class);
>
> +   NIR_PASS_V(sel->nir, nir_lower_load_const_to_scalar);
> +
> bool progress;
> do {
> progress = false;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Allow AMD_perfmon on GLES contexts

2018-07-18 Thread Eric Anholt
Rob Clark  writes:

> From: Eric Anholt 
>
> Reviewed-by: Rob Clark 
> ---
> Not sure if this ever got sent to list.. but this extension is meant to
> be exposed in GLES as well as GL

Did the trivial whitespace fix and pushed.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: surface remove width/height removal comment

2018-07-18 Thread Roland Scheidegger
Am 18.07.2018 um 19:29 schrieb Marek Olšák:
> On Tue, Jul 17, 2018 at 9:44 AM, Roland Scheidegger  
> wrote:
>> Initially, width/height were actually needed because not all
>> pipe_surface objects were backed by pipe_resource objects (that was ages
>> ago...). Hence the comment when that was fixed, since it was always
>> actually possible to derive this from the resource (but a bit too
>> complex to change all the code...).
>> Are there now situations when you can't figure this out from the
>> resource? Or do you think it's actually useful as a convenience member?
> 
> It's useful and it would complicated to remove.
Alright. I was really just more wondering if the values couldn't be
recalculated always nowadays for some reason. I'm ok with removing the
comment, even if it's just convenience members.

Roland


> 
> Marek
> 
>>
>> Roland
>>
>> Am 17.07.2018 um 07:37 schrieb Dave Airlie:
>>> From: Dave Airlie 
>>>
>>> We do need width/height for things, remove the outdated
>>> comment.
>>> ---
>>>  src/gallium/include/pipe/p_state.h | 1 -
>>>  1 file changed, 1 deletion(-)
>>>
>>> diff --git a/src/gallium/include/pipe/p_state.h 
>>> b/src/gallium/include/pipe/p_state.h
>>> index 809aa087ce0..b55624d698d 100644
>>> --- a/src/gallium/include/pipe/p_state.h
>>> +++ b/src/gallium/include/pipe/p_state.h
>>> @@ -435,7 +435,6 @@ struct pipe_surface
>>> struct pipe_resource *texture; /**< resource into which this is a view  
>>> */
>>> struct pipe_context *context; /**< context this surface belongs to */
>>>
>>> -   /* XXX width/height should be removed */
>>> uint16_t width;   /**< logical width in pixels */
>>> uint16_t height;  /**< logical height in pixels */
>>>
>>>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fmesa-dev&data=02%7C01%7Csroland%40vmware.com%7C452ce5815db4453ae7b908d5ecd42871%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636675318315245680&sdata=P59Ow0j2JORvL0GySNtJZJEIon5%2BEnBAfAFXZa2%2FTYQ%3D&reserved=0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium/u_vbuf: handle indirect multidraws correctly and efficiently

2018-07-18 Thread Marek Olšák
I think draw_count won't be very large. I'll send v2, because I made
some mistakes there.

Marek

On Tue, Jul 17, 2018 at 3:09 PM, Eric Anholt  wrote:
> Marek Olšák  writes:
>
>> From: Marek Olšák 
>>
>> ---
>>  src/gallium/auxiliary/util/u_vbuf.c | 189 
>>  1 file changed, 165 insertions(+), 24 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
>> b/src/gallium/auxiliary/util/u_vbuf.c
>> index 87b159ec1bf..b0b92f7e966 100644
>> --- a/src/gallium/auxiliary/util/u_vbuf.c
>> +++ b/src/gallium/auxiliary/util/u_vbuf.c
>> @@ -1132,20 +1132,45 @@ static void u_vbuf_set_driver_vertex_buffers(struct 
>> u_vbuf *mgr)
>> unsigned start_slot, count;
>>
>> start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
>> count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
>>
>> pipe->set_vertex_buffers(pipe, start_slot, count,
>>  mgr->real_vertex_buffer + start_slot);
>> mgr->dirty_real_vb_mask = 0;
>>  }
>>
>> +static void
>> +u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info 
>> *info,
>> +   unsigned *indirect_data, unsigned stride,
>> +   unsigned draw_count)
>> +{
>> +   assert(info->index_size);
>> +   info->indirect = NULL;
>> +
>> +   for (unsigned i = 0; i < draw_count; i++) {
>> +  unsigned offset = i * stride / 4;
>> +
>> +  info->count = indirect_data[offset + 0];
>> +  info->instance_count = indirect_data[offset + 1];
>> +
>> +  if (!info->count || !info->instance_count)
>> + continue;
>> +
>> +  info->start = indirect_data[offset + 2];
>> +  info->index_bias = indirect_data[offset + 3];
>> +  info->start_instance = indirect_data[offset + 4];
>> +
>> +  u_vbuf_draw_vbo(mgr, info);
>> +   }
>> +}
>> +
>>  void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
>>  {
>> struct pipe_context *pipe = mgr->pipe;
>> int start_vertex, min_index;
>> unsigned num_vertices;
>> boolean unroll_indices = FALSE;
>> const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
>> uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
>> const uint32_t incompatible_vb_mask =
>>mgr->incompatible_vb_mask & used_vb_mask;
>> @@ -1160,47 +1185,162 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const 
>> struct pipe_draw_info *info)
>>if (mgr->dirty_real_vb_mask & used_vb_mask) {
>>   u_vbuf_set_driver_vertex_buffers(mgr);
>>}
>>
>>pipe->draw_vbo(pipe, info);
>>return;
>> }
>>
>> new_info = *info;
>>
>> -   /* Fallback. We need to know all the parameters. */
>> +   /* Handle indirect (multi)draws. */
>> if (new_info.indirect) {
>> -  struct pipe_transfer *transfer = NULL;
>> -  int *data;
>> -
>> -  if (new_info.index_size) {
>> - data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
>> -  new_info.indirect->offset, 20,
>> -  PIPE_TRANSFER_READ, &transfer);
>> - new_info.index_bias = data[3];
>> - new_info.start_instance = data[4];
>> -  }
>> -  else {
>> - data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
>> -  new_info.indirect->offset, 16,
>> -  PIPE_TRANSFER_READ, &transfer);
>> - new_info.start_instance = data[3];
>> +  const struct pipe_draw_indirect_info *indirect = new_info.indirect;
>> +  unsigned draw_count = 0;
>> +
>> +  /* Get the number of draws. */
>> +  if (indirect->indirect_draw_count) {
>> + pipe_buffer_read(pipe, indirect->indirect_draw_count,
>> +  indirect->indirect_draw_count_offset,
>> +  4, &draw_count);
>> +  } else {
>> + draw_count = indirect->draw_count;
>>}
>>
>> -  new_info.count = data[0];
>> -  new_info.instance_count = data[1];
>> -  new_info.start = data[2];
>> -  pipe_buffer_unmap(pipe, transfer);
>> -  new_info.indirect = NULL;
>> -
>> -  if (!new_info.count)
>> +  if (!draw_count)
>>   return;
>> +
>> +  unsigned data_size = (draw_count - 1) * indirect->stride +
>> +   (new_info.index_size ? 20 : 16);
>> +  unsigned *data = alloca(data_size);
>
> draw_count can be potentially huge, right?  This should be a malloc, I
> think.
>
> Other than that, wow.  I don't think I would have gone to this effort
> and would have just done the split path instead.  Still, with the alloca
> change:
>
> Reviewed-by: Eric Anholt 
>
> Thanks for the "The driver will not look at these values because
> indirect != NULL" comments -- the code would be really surprising
> without that!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-de

Re: [Mesa-dev] [PATCH] gallium: surface remove width/height removal comment

2018-07-18 Thread Marek Olšák
On Wed, Jul 18, 2018 at 1:46 PM, Roland Scheidegger  wrote:
> Am 18.07.2018 um 19:29 schrieb Marek Olšák:
>> On Tue, Jul 17, 2018 at 9:44 AM, Roland Scheidegger  
>> wrote:
>>> Initially, width/height were actually needed because not all
>>> pipe_surface objects were backed by pipe_resource objects (that was ages
>>> ago...). Hence the comment when that was fixed, since it was always
>>> actually possible to derive this from the resource (but a bit too
>>> complex to change all the code...).
>>> Are there now situations when you can't figure this out from the
>>> resource? Or do you think it's actually useful as a convenience member?
>>
>> It's useful and it would complicated to remove.
> Alright. I was really just more wondering if the values couldn't be
> recalculated always nowadays for some reason. I'm ok with removing the
> comment, even if it's just convenience members.

They can be recalculated, but too many drivers and u_blitter already
rely on the variables and u_blitter expects different values for
blitting compressed textures as integer.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] egl: Refuse EGL_MESA_image_dma_buf_export if we don't have a DRM fourcc.

2018-07-18 Thread Eric Anholt
The EGL CTS expects that you can make images from all sorts of things,
including things like z16 and s8, which we don't have DRM fourccs for.
Just return an error when trying to export one of those.
---
 src/egl/drivers/dri2/egl_dri2.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 63679566c1ea..c3024795a100 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -2568,6 +2568,28 @@ dri2_export_drm_image_mesa(_EGLDriver *drv, _EGLDisplay 
*disp, _EGLImage *img,
return EGL_TRUE;
 }
 
+/**
+ * Checks if we can support EGL_MESA_image_dma_buf_export on this image.
+
+ * The spec provides a boolean return for the driver to reject exporting for
+ * basically any reason, but doesn't specify any particular error cases.  For
+ * now, we just fail if we don't have a DRM fourcc for the format.
+ */
+static bool
+dri2_can_export_dma_buf_image(_EGLDisplay *disp, _EGLImage *img)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_image *dri2_img = dri2_egl_image(img);
+   EGLint fourcc;
+
+   if (!dri2_dpy->image->queryImage(dri2_img->dri_image,
+__DRI_IMAGE_ATTRIB_FOURCC, &fourcc)) {
+  return false;
+   }
+
+   return true;
+}
+
 static EGLBoolean
 dri2_export_dma_buf_image_query_mesa(_EGLDriver *drv, _EGLDisplay *disp,
  _EGLImage *img,
@@ -2579,6 +2601,8 @@ dri2_export_dma_buf_image_query_mesa(_EGLDriver *drv, 
_EGLDisplay *disp,
 
(void) drv;
 
+   if (!dri2_can_export_dma_buf_image(disp, img))
+  return EGL_FALSE;
 
if (nplanes)
   dri2_dpy->image->queryImage(dri2_img->dri_image,
@@ -2602,6 +2626,9 @@ dri2_export_dma_buf_image_mesa(_EGLDriver *drv, 
_EGLDisplay *disp, _EGLImage *im
 
(void) drv;
 
+   if (!dri2_can_export_dma_buf_image(disp, img))
+  return EGL_FALSE;
+
/* rework later to provide multiple fds/strides/offsets */
if (fds)
   dri2_dpy->image->queryImage(dri2_img->dri_image,
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/dri: Don't require a dri_format for image creation.

2018-07-18 Thread Eric Anholt
Nothing in EGL_KHR_gl_image.txt seems to let us deny creation based on
formats, and doing so causes many failures in
dEQP-EGL.functional.image.api.*

The NONE value we were protecting from only gets looked at in the
__DRI_IMAGE_ATTRIB_FORMAT and __DRI_IMAGE_ATTRIB_FOURCC queries, which are
used from wayland and gbm (which throw an error cleanly on unknown format)
and DMABUF export.
---
 src/gallium/state_trackers/dri/dri2.c|  2 +-
 src/gallium/state_trackers/dri/dri_helpers.c | 12 
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index e980698cfb6f..34205853335c 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1318,7 +1318,7 @@ dri2_query_image(__DRIimage *image, int attrib, int 
*value)
   return GL_TRUE;
case __DRI_IMAGE_ATTRIB_FOURCC:
   *value = convert_to_fourcc(image->dri_format);
-  return GL_TRUE;
+  return *value != -1;
case __DRI_IMAGE_ATTRIB_NUM_PLANES:
   *value = 1;
   return GL_TRUE;
diff --git a/src/gallium/state_trackers/dri/dri_helpers.c 
b/src/gallium/state_trackers/dri/dri_helpers.c
index 5d42873a208a..25095bbce890 100644
--- a/src/gallium/state_trackers/dri/dri_helpers.c
+++ b/src/gallium/state_trackers/dri/dri_helpers.c
@@ -296,12 +296,6 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context,
img->dri_format = driGLFormatToImageFormat(rb->Format);
img->loader_private = loaderPrivate;
 
-   if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) {
-  *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
-  free(img);
-  return NULL;
-   }
-
pipe_resource_reference(&img->texture, tex);
 
*error = __DRI_IMAGE_ERROR_SUCCESS;
@@ -379,12 +373,6 @@ dri2_create_from_texture(__DRIcontext *context, int 
target, unsigned texture,
 
img->loader_private = loaderPrivate;
 
-   if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) {
-  *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
-  free(img);
-  return NULL;
-   }
-
pipe_resource_reference(&img->texture, tex);
 
*error = __DRI_IMAGE_ERROR_SUCCESS;
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/5] nv50/ir: add preliminary support for OP_XMAD

2018-07-18 Thread Karol Herbst
uint16_t

On Wed, Jul 18, 2018 at 7:05 PM, Rhys Perry  wrote:
> Signed-off-by: Rhys Perry 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h  | 23 
> ++
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 17 ++--
>  .../drivers/nouveau/codegen/nv50_ir_print.cpp  | 18 +
>  .../drivers/nouveau/codegen/nv50_ir_target.cpp |  7 ---
>  .../nouveau/codegen/nv50_ir_target_gm107.cpp   |  1 +
>  .../nouveau/codegen/nv50_ir_target_nv50.cpp|  1 +
>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp| 15 ++
>  7 files changed, 77 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index 0b220cc48d..9798e98a1a 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -58,6 +58,9 @@ enum operation
> OP_FMA,
> OP_SAD, // abs(src0 - src1) + src2
> OP_SHLADD,
> +   // extended multiply-add (GM107+), does a lot of things.
> +   // see envytools for detailed documentation
> +   OP_XMAD,
> OP_ABS,
> OP_NEG,
> OP_NOT,
> @@ -256,6 +259,26 @@ enum operation
>  #define NV50_IR_SUBOP_MINMAX_MED  2
>  #define NV50_IR_SUBOP_MINMAX_HIGH 3
>
> +// xmad(src0, src1, 0) << 16 + src2
> +#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
> +// (xmad(src0, src1, src2) & 0x) | (src1 << 16)
> +#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
> +// xmad(src0, src1, src2.lo)
> +#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
> +// xmad(src0, src1, src2.hi)
> +#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
> +// if both operands to the multiplication are non-zero, subtract 65536 for 
> each
> +// negative operand
> +#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
> +// xmad(src0, src1, src2) + src1 << 16
> +#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
> +#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << 2)
> +

yeah, this looks nice and quite understandable!

> +// use the high 16 bits instead of the low 16 bits for the multiplication.
> +// if the instruction's sType is signed, sign extend the operand from 16 bits
> +// to 32 before multiplication.
> +#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (6 + (i)))

wouldn't it be enough to do (1 << (5 + (i)))?

> +
>  enum DataType
>  {
> TYPE_NONE,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 4e08cfadec..5fc1fba970 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -191,9 +191,16 @@ void
>  LoadPropagation::checkSwapSrc01(Instruction *insn)
>  {
> const Target *targ = prog->getTarget();
> -   if (!targ->getOpInfo(insn).commutative)
> -  if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB)
> +   if (!targ->getOpInfo(insn).commutative) {NV50_IR_SUBOP_XMAD_CBCC
> +  if (insn->op != OP_SET && insn->op != OP_SLCT &&
> +  insn->op != OP_SUB && insn->op != OP_XMAD)
>   return;
> +  // XMAD is only commutative if both the CBCC and MRG flags are not set.
> +  if (insn->op == OP_XMAD && (insn->subOp & 0x1c) == 
> NV50_IR_SUBOP_XMAD_CBCC)
> + return;
> +  if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
> + return;

maybe it makes sense to make both check look a like. And I think
NV50_IR_SUBOP_XMAD_CMODE_MASK should be used instead of 0x1c.

> +   }
> if (insn->src(1).getFile() != FILE_GPR)
>return;
> // This is the special OP_SET used for alphatesting, we can't reverse its
> @@ -236,6 +243,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
> if (insn->op == OP_SUB) {
>insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
>insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
> +   } else
> +   if (insn->op == OP_XMAD) {
> +  // swap h1 flags
> +  uint16_t h1 = (insn->subOp >> 6) & 0x3;
> +  h1 = (h1 >> 1 & 0x1) | (h1 << 1 & 0x2);
> +  insn->subOp = (insn->subOp & ~uint16_t(0x3 << 6)) | (h1 << 6);

I think I would prefer this code being less magic numbers and more
constants, but I am not able to come up with a nice way of doing it
macros either :/

but I think you can skip on moving the bits alltogether:

uint16_t h1 = insn->subOp & 0xc0;
h1 = (h1 >> 1 & 0x40) | (h1 << 1 & 0x80);
insn->subOp = (insn->subOp & ~(0x3 << 6)) | h1;

and then we can probably get rid of the first mask as well:

uint16_t h1 = (insn->subOp >> 1 & 0x40) | (insn->subOp << 1 & 0x80);
insn->subOp = (insn->subOp & ~(0x3 << 6)) | h1;

and maybe use the above macros?

uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0) |
(insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1));
insn->subOp = (insn->subOp & ~(0x3 << 6)) | h1;

still looks a bit weird with the 0x3 << 6 thing though, but I prefer
to have no magic numbers here. Maybe have a macro for selecting all H1
bits as w

Re: [Mesa-dev] [PATCH v2 2/5] gm107/ir: add support for OP_XMAD on GM107+

2018-07-18 Thread Karol Herbst
On Wed, Jul 18, 2018 at 7:05 PM, Rhys Perry  wrote:
> Signed-off-by: Rhys Perry 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 63 
> ++
>  .../nouveau/codegen/nv50_ir_target_gm107.cpp   |  6 ++-
>  .../nouveau/codegen/nv50_ir_target_nvc0.cpp|  1 +
>  3 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> index 694d1b10a3..caf853d58b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> @@ -155,6 +155,7 @@ private:
> void emitIMUL();
> void emitIMAD();
> void emitISCADD();
> +   void emitXMAD();
> void emitIMNMX();
> void emitICMP();
> void emitISET();
> @@ -1882,6 +1883,65 @@ CodeEmitterGM107::emitISCADD()
> emitGPR (0x08, insn->src(0));
> emitGPR (0x00, insn->def(0));
>  }
> +
> +void
> +CodeEmitterGM107::emitXMAD()
> +{
> +   assert(insn->src(0).getFile() == FILE_GPR);
> +
> +   bool constbuf = false;
> +   bool psl_mrg = true;
> +   bool immediate = false;
> +   if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
> +  assert(insn->src(1).getFile() == FILE_GPR);
> +  constbuf = true;
> +  psl_mrg = false;
> +  emitInsn(0x5100);
> +  emitGPR(0x27, insn->src(1));
> +  emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
> +   } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
> +  assert(insn->src(2).getFile() == FILE_GPR);
> +  constbuf = true;
> +  emitInsn(0x4e00);
> +  emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
> +  emitGPR(0x27, insn->src(2));
> +   } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
> +  assert(insn->src(2).getFile() == FILE_GPR);
> +  assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
> +  immediate = false;
> +  emitInsn(0x3600);
> +  emitIMMD(0x14, 19, insn->src(1));
> +  emitGPR(0x27, insn->src(2));
> +   } else {
> +  assert(insn->src(1).getFile() == FILE_GPR);
> +  assert(insn->src(2).getFile() == FILE_GPR);
> +  emitInsn(0x5b00);
> +  emitGPR(0x14, insn->src(1));
> +  emitGPR(0x27, insn->src(2));
> +   }
> +
> +   if (psl_mrg)
> +  emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
> +   emitField(0x32, constbuf ? 2 : 3, (insn->subOp >> 2) & 0x7);
> +
> +   emitX(constbuf ? 0x36 : 0x26);
> +   emitCC(0x2f);
> +
> +   emitGPR(0x0, insn->def(0));
> +   emitGPR(0x8, insn->src(0));
> +
> +   // source flags
> +   bool h1[2];
> +   h1[0] = insn->subOp & NV50_IR_SUBOP_XMAD_H1(0);
> +   h1[1] = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
> +   bool isSigned = isSignedType(insn->sType);
> +   bool sext[2] = {h1[0] && isSigned, h1[1] && isSigned};
> +
> +   emitField(0x30, 2, sext[0] | (sext[1] << 1));

maybe better do this instead?:
if (isSignedType(insn->sType)) {
   uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
   emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
}

> +   emitField(0x35, 1, h1[0]);
> +   if (!immediate)
> +  emitField(constbuf ? 0x34 : 0x23, 1, h1[1]);
> +}
>
>  void
>  CodeEmitterGM107::emitIMNMX()
> @@ -3254,6 +3314,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
> case OP_SHLADD:
>emitISCADD();
>break;
> +   case OP_XMAD:
> +  emitXMAD();
> +  break;
> case OP_MIN:
> case OP_MAX:
>if (isFloatType(insn->dType)) {
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> index 24a1cbb8da..f918fbfdd3 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> @@ -60,8 +60,11 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
> case OP_SQRT:
> case OP_DIV:
> case OP_MOD:
> -   case OP_XMAD:
>return false;
> +   case OP_XMAD:
> +  if (isFloatType(ty))
> + return false;
> +  break;
> default:
>break;
> }
> @@ -230,6 +233,7 @@ TargetGM107::getLatency(const Instruction *insn) const
> case OP_SUB:
> case OP_VOTE:
> case OP_XOR:
> +   case OP_XMAD:
>if (insn->dType != TYPE_F64)
>   return 6;
>break;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index 94e98ada5e..39869e3145 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -161,6 +161,7 @@ static const struct opProperties _initPropsGM107[] = {
> { OP_SUSTP,   0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
> { OP_SUREDB,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
> { OP_SUREDP,  0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
> +   { OP_XMAD,0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
>  };
>
>  void TargetNVC0::initPr

[Mesa-dev] [PATCH] radv: fix wmaybe-uninitialized in radv_meta_fast_clear.c

2018-07-18 Thread Andres Rodriguez
Assignment and usage of this variable both happen inside an
if(rad_image_has_dcc()) {} blocks. It seems gcc plays it safe and
assumes that both function calls could have different return values.

But in this case we should be safe.
---
 src/amd/vulkan/radv_meta_fast_clear.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index 932a6c93aa2..b42a6783fd2 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -586,7 +586,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
-   bool old_predicating;
+   bool old_predicating = false;
VkPipeline pipeline;
 
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: fix wmaybe-uninitialized in radv_meta_fast_clear.c

2018-07-18 Thread Andres Rodriguez
Assignment and usage of this variable both happen inside an
if(rad_image_has_dcc()) {} blocks. It seems gcc plays it safe and
assumes that both function calls could have different return values.

But in this case we should be safe.
---
 src/amd/vulkan/radv_meta_fast_clear.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index 932a6c93aa2..b42a6783fd2 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -586,7 +586,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
-   bool old_predicating;
+   bool old_predicating = false;
VkPipeline pipeline;
 
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/5] nv50/ir: optimize imul/imad to xmads

2018-07-18 Thread Karol Herbst
some nitpicks, but with those fixed:

Reviewed-by: Karol Herbst 

On Wed, Jul 18, 2018 at 7:05 PM, Rhys Perry  wrote:
> This hits the shader-db numbers a good bit, though a few xmads is way
> faster than an imul or imad and the cost is mitigated by the next commit,
> which optimizes many multiplications by immediates into shorter and less
> register heavy instructions than the xmads.
>
> total instructions in shared programs : 5256901 -> 5294693 (0.72%)
> total gprs used in shared programs: 624328 -> 624962 (0.10%)
> total shared used in shared programs  : 360704 -> 360704 (0.00%)
> total local used in shared programs   : 20952 -> 21048 (0.46%)
>
> local sharedgpr   inst  bytes
> helped   0   0  39   0   0
>   hurt   1   0 33422772277
>
> Signed-off-by: Rhys Perry 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 
> ++
>  .../nouveau/codegen/nv50_ir_target_gm107.cpp   |  1 -
>  2 files changed, 51 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 5fc1fba970..14cc4b32d4 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -2291,13 +2291,18 @@ AlgebraicOpt::visit(BasicBlock *bb)
>  // 
> =
>
>  // ADD(SHL(a, b), c) -> SHLADD(a, b, c)
> +// MUL(a, b) -> a few XMADs
> +// MAD/FMA(a, b, c) -> a few XMADs
>  class LateAlgebraicOpt : public Pass
>  {
>  private:
> virtual bool visit(Instruction *);
>
> void handleADD(Instruction *);
> +   void handleMULMAD(Instruction *);
> bool tryADDToSHLADD(Instruction *);
> +
> +   BuildUtil bld;
>  };
>
>  void
> @@ -2357,6 +2362,47 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
>
> return true;
>  }
> +
> +// MUL(a, b) -> a few XMADs
> +// MAD/FMA(a, b, c) -> a few XMADs
> +void
> +LateAlgebraicOpt::handleMULMAD(Instruction *i)
> +{
> +   // TODO: handle NV50_IR_SUBOP_MUL_HIGH
> +   if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32))
> +  return;
> +   if (isFloatType(i->dType) || typeSizeof(i->dType) != 4)
> +  return;
> +   if (i->subOp || i->usesFlags() || i->flagsDef >= 0)
> +  return;
> +
> +   assert(!i->src(0).mod);
> +   assert(!i->src(1).mod);
> +   assert(i->op == OP_MUL ? 1 : !i->src(2).mod);
> +
> +   bld.setPosition(i, true);
> +
> +   Value *a = i->getSrc(0);
> +   Value *b = i->getSrc(1);
> +   Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2);
> +
> +   Value *tmp0 = bld.getSSA();
> +   Value *tmp1 = bld.getSSA();
> +
> +   Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c);
> +   insn->setPredicate(i->cc, i->getPredicate());
> +
> +   insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0));
> +   insn->setPredicate(i->cc, i->getPredicate());
> +   insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1);
> +
> +   insn = bld.mkOp3(OP_XMAD, TYPE_U32, i->getDef(0), b, tmp1, tmp0);
> +   insn->setPredicate(i->cc, i->getPredicate());
> +   insn->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
> +   insn->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);
> +
> +   delete_Instruction(prog, i);

I think you can simply adjust the current op instead, then we don't
need to set the predicate, create and delete an instruction:

insn->op = OP_XMAD;
insn->setSrc(0, b);
insn->setSrc(1, tmp1);
insn->setSrc(2, tmp0);
insn->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
insn->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);

> +}
>
>  bool
>  LateAlgebraicOpt::visit(Instruction *i)
> @@ -2365,6 +2411,11 @@ LateAlgebraicOpt::visit(Instruction *i)
> case OP_ADD:
>handleADD(i);
>break;
> +   case OP_MUL:
> +   case OP_MAD:
> +   case OP_FMA:
> +  handleMULMAD(i);
> +  break;
> default:
>break;
> }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> index f918fbfdd3..571d8a67c2 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> @@ -165,7 +165,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) 
> const
>}
>break;
> case OPCLASS_ARITH:
> -  // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
>if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
>!isFloatType(insn->dType))
>   return true;
> --
> 2.14.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-de

Re: [Mesa-dev] [PATCH] swrast: remove unneeded _glapi_check_multithread() call

2018-07-18 Thread Eric Anholt
Emil Velikov  writes:

> From: Emil Velikov 
>
> With commit c6c0f947142, back in 2006 Brian removed the
> _glapi_check_multithread() call from core mesa - _mesa_make_current.
>
> He moved it to a DRI loader (type of) decision - with equivalents in
> the monolithic OSMesa and libGL-Xlib.
>
> Some time afterwords, swrast (a DRI driver) was introduced as a copy
> paste of the latter two. Remove the call since the DRI loader handles
> that for us.
>
> Signed-off-by: Emil Velikov 

Does anything make sure it gets called for classic drivers under EGL?

Actually, wouldn't it make sense to move this back to the core, at this
point?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix wmaybe-uninitialized in radv_meta_fast_clear.c

2018-07-18 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 07/18/2018 08:24 PM, Andres Rodriguez wrote:

Assignment and usage of this variable both happen inside an
if(rad_image_has_dcc()) {} blocks. It seems gcc plays it safe and
assumes that both function calls could have different return values.

But in this case we should be safe.
---
  src/amd/vulkan/radv_meta_fast_clear.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index 932a6c93aa2..b42a6783fd2 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -586,7 +586,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
-   bool old_predicating;
+   bool old_predicating = false;
VkPipeline pipeline;
  
  	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] st/dri: Don't require a dri_format for image creation.

2018-07-18 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Wed, Jul 18, 2018 at 2:02 PM, Eric Anholt  wrote:
> Nothing in EGL_KHR_gl_image.txt seems to let us deny creation based on
> formats, and doing so causes many failures in
> dEQP-EGL.functional.image.api.*
>
> The NONE value we were protecting from only gets looked at in the
> __DRI_IMAGE_ATTRIB_FORMAT and __DRI_IMAGE_ATTRIB_FOURCC queries, which are
> used from wayland and gbm (which throw an error cleanly on unknown format)
> and DMABUF export.
> ---
>  src/gallium/state_trackers/dri/dri2.c|  2 +-
>  src/gallium/state_trackers/dri/dri_helpers.c | 12 
>  2 files changed, 1 insertion(+), 13 deletions(-)
>
> diff --git a/src/gallium/state_trackers/dri/dri2.c 
> b/src/gallium/state_trackers/dri/dri2.c
> index e980698cfb6f..34205853335c 100644
> --- a/src/gallium/state_trackers/dri/dri2.c
> +++ b/src/gallium/state_trackers/dri/dri2.c
> @@ -1318,7 +1318,7 @@ dri2_query_image(__DRIimage *image, int attrib, int 
> *value)
>return GL_TRUE;
> case __DRI_IMAGE_ATTRIB_FOURCC:
>*value = convert_to_fourcc(image->dri_format);
> -  return GL_TRUE;
> +  return *value != -1;
> case __DRI_IMAGE_ATTRIB_NUM_PLANES:
>*value = 1;
>return GL_TRUE;
> diff --git a/src/gallium/state_trackers/dri/dri_helpers.c 
> b/src/gallium/state_trackers/dri/dri_helpers.c
> index 5d42873a208a..25095bbce890 100644
> --- a/src/gallium/state_trackers/dri/dri_helpers.c
> +++ b/src/gallium/state_trackers/dri/dri_helpers.c
> @@ -296,12 +296,6 @@ dri2_create_image_from_renderbuffer2(__DRIcontext 
> *context,
> img->dri_format = driGLFormatToImageFormat(rb->Format);
> img->loader_private = loaderPrivate;
>
> -   if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) {
> -  *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
> -  free(img);
> -  return NULL;
> -   }
> -
> pipe_resource_reference(&img->texture, tex);
>
> *error = __DRI_IMAGE_ERROR_SUCCESS;
> @@ -379,12 +373,6 @@ dri2_create_from_texture(__DRIcontext *context, int 
> target, unsigned texture,
>
> img->loader_private = loaderPrivate;
>
> -   if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) {
> -  *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
> -  free(img);
> -  return NULL;
> -   }
> -
> pipe_resource_reference(&img->texture, tex);
>
> *error = __DRI_IMAGE_ERROR_SUCCESS;
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: emit_spi_map packets optimization

2018-07-18 Thread Marek Olšák
Hi Sonny,

I've pushed the first 2 patches.

See below for feedback on this patch.

On Tue, Jul 17, 2018 at 10:22 AM, Sonny Jiang  wrote:
> Signed-off-by: Sonny Jiang 
> ---
>  src/gallium/drivers/radeonsi/si_build_pm4.h | 23 +++
>  src/gallium/drivers/radeonsi/si_gfx_cs.c|  2 ++
>  src/gallium/drivers/radeonsi/si_state.h |  1 +
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 20 
>  4 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h 
> b/src/gallium/drivers/radeonsi/si_build_pm4.h
> index b339cd5..4fa44f2 100644
> --- a/src/gallium/drivers/radeonsi/si_build_pm4.h
> +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
> @@ -214,4 +214,27 @@ static inline void radeon_opt_set_context_reg4(struct 
> si_context *sctx, unsigned
> }
>  }
>
> +/**
> + * Set consecutive registers if any registers value is different.
> + */
> +static inline void radeon_opt_set_context_regn(struct si_context *sctx, 
> unsigned offset,
> +  unsigned * value, unsigned num)
> +{
> +   struct radeon_cmdbuf *cs = sctx->gfx_cs;
> +   int i, j;
> +
> +   for (i = 0; i < num; i++) {
> +   if (sctx->tracked_regs.reg_val_seq[i] != value[i]) {
> +   radeon_set_context_reg_seq(cs, offset, num);
> +   for (j = 0; j < num; j++) {
> +   radeon_emit(cs, value[j]);
> +   }
> +
> +   memcpy(sctx->tracked_regs.reg_val_seq, value, 
> sizeof(uint32_t) * num);

This function is not reusable in other places, because it always uses
the same reg_val_seq array.

> +
> +   break;
> +   }
> +   }
> +}
> +
>  #endif
> diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
> b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> index 628b6c5..cfbcfb0 100644
> --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> @@ -353,4 +353,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
> /* Set all saved registers state to unknown. */
> ctx->tracked_regs.reg_saved = 0;
> }
> +
> +   memset(ctx->tracked_regs.reg_val_seq, 0xff, sizeof(uint32_t) * 32);

A comment why clearing to 0xff is correct would be useful, because
it's not obvious from the first glance.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] egl/surfaceless: Allow DRMless fallback.

2018-07-18 Thread Chad Versace
The two patches look correct to me.
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC][PATCH] gallivm: Clean up llvm data structures upon destruction.

2018-07-18 Thread David Riley
Without this change, dlopen()/dlclose() of any driver results in memory
being leaked which becomes problematic if things are being reinitialized
repeatedly (eg from a fuzzer).

Even with this change, repeated dlopen()/dlclose() results in a single
LLVM mutex being allocated and never freed (used to synchronize 
ManagedStatic).  I've spoken to some LLVM folks and haven't come up with
a great answer on avoiding that memory leak and not running into issues
with signal handlers and/or global destructors being called after
llvm_shutdown().

With regards to the RFC, there's potentially some issues here with
multiple drivers being loaded with separate LLVM instances and being
shared due to the dlopen() being done with RTLD_GLOBAL, but this falls
squarely outside my realm of confidence.

---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 79dbedbb56..d537ae6029 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -68,6 +68,7 @@
 #endif
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -813,3 +814,12 @@ lp_is_function(LLVMValueRef v)
return llvm::isa(llvm::unwrap(v));
 #endif
 }
+
+/*
+ * Attempt to clean up to allow drivers to be loaded/unloaded without
+ * leaking excessive amounts of memory.
+ */
+__attribute__((destructor)) static void llvm_fini()
+{
+   llvm::llvm_shutdown();
+}
-- 
2.18.0.203.gfac676dfb9-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/4] intel: tools: dump: remove command execution feature

2018-07-18 Thread Jason Ekstrand
Very sketchily

Reviewed-by: Jason Ekstrand 

On Wed, Jul 18, 2018 at 10:21 AM Lionel Landwerlin <
lionel.g.landwer...@intel.com> wrote:

> In commit 86cb05a6d35a52 ("intel: aubinator: remove standard input
> processing option") we removed the ability to process aub as an input
> stream because we're now rely on mmapping the aub file to back the
> buffers aubinator is parsing.
>
> intel_aubdump was the provider of the standard input data and since
> we've copied/reworked intel_aubdump into intel_dump_gpu within Mesa,
> we don't need that code anymore.
>
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/tools/intel_dump_gpu.c  | 121 +++---
>  src/intel/tools/intel_dump_gpu.in |  27 +--
>  2 files changed, 29 insertions(+), 119 deletions(-)
>
> diff --git a/src/intel/tools/intel_dump_gpu.c
> b/src/intel/tools/intel_dump_gpu.c
> index 6d2c4b7f983..5fd2c8ea723 100644
> --- a/src/intel/tools/intel_dump_gpu.c
> +++ b/src/intel/tools/intel_dump_gpu.c
> @@ -53,8 +53,8 @@ static int (*libc_close)(int fd) = close_init_helper;
>  static int (*libc_ioctl)(int fd, unsigned long request, ...) =
> ioctl_init_helper;
>
>  static int drm_fd = -1;
> -static char *filename = NULL;
> -static FILE *files[2] = { NULL, NULL };
> +static char *output_filename = NULL;
> +static FILE *output_file = NULL;
>  static int verbose = 0;
>  static bool device_override;
>
> @@ -111,7 +111,7 @@ align_u32(uint32_t v, uint32_t a)
>
>  static struct gen_device_info devinfo = {0};
>  static uint32_t device;
> -static struct aub_file aubs[2];
> +static struct aub_file aub_file;
>
>  static void *
>  relocate_bo(struct bo *bo, const struct drm_i915_gem_execbuffer2
> *execbuffer2,
> @@ -205,28 +205,21 @@ dump_execbuffer2(int fd, struct
> drm_i915_gem_execbuffer2 *execbuffer2)
>fail_if(!gen_get_device_info(device, &devinfo),
>"failed to identify chipset=0x%x\n", device);
>
> -  for (int i = 0; i < ARRAY_SIZE(files); i++) {
> - if (files[i] != NULL) {
> -aub_file_init(&aubs[i], files[i], device);
> -if (verbose == 2)
> -   aubs[i].verbose_log_file = stdout;
> -aub_write_header(&aubs[i], program_invocation_short_name);
> - }
> -  }
> +  aub_file_init(&aub_file, output_file, device);
> +  if (verbose == 2)
> + aub_file.verbose_log_file = stdout;
> +  aub_write_header(&aub_file, program_invocation_short_name);
>
>if (verbose)
>   printf("[intel_aubdump: running, "
>  "output file %s, chipset id 0x%04x, gen %d]\n",
> -filename, device, devinfo.gen);
> +output_filename, device, devinfo.gen);
> }
>
> -   /* Any aub */
> -   struct aub_file *any_aub = files[0] ? &aubs[0] : &aubs[1];;
> -
> -   if (aub_use_execlists(any_aub))
> +   if (aub_use_execlists(&aub_file))
>offset = 0x1000;
> else
> -  offset = aub_gtt_size(any_aub);
> +  offset = aub_gtt_size(&aub_file);
>
> if (verbose)
>printf("Dumping execbuffer2:\n");
> @@ -263,13 +256,8 @@ dump_execbuffer2(int fd, struct
> drm_i915_gem_execbuffer2 *execbuffer2)
>   bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
>fail_if(bo->map == MAP_FAILED, "intel_aubdump: bo mmap failed\n");
>
> -  for (int i = 0; i < ARRAY_SIZE(files); i++) {
> - if (files[i] == NULL)
> -continue;
> -
> - if (aub_use_execlists(&aubs[i]))
> -aub_map_ppgtt(&aubs[i], bo->offset, bo->size);
> -  }
> +  if (aub_use_execlists(&aub_file))
> + aub_map_ppgtt(&aub_file, bo->offset, bo->size);
> }
>
> batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 :
> @@ -284,30 +272,21 @@ dump_execbuffer2(int fd, struct
> drm_i915_gem_execbuffer2 *execbuffer2)
>else
>   data = bo->map;
>
> -  for (int i = 0; i < ARRAY_SIZE(files); i++) {
> - if (files[i] == NULL)
> -continue;
> -
> - if (bo == batch_bo) {
> -aub_write_trace_block(&aubs[i], AUB_TRACE_TYPE_BATCH,
> -  GET_PTR(data), bo->size, bo->offset);
> - } else {
> -aub_write_trace_block(&aubs[i], AUB_TRACE_TYPE_NOTYPE,
> -  GET_PTR(data), bo->size, bo->offset);
> - }
> +  if (bo == batch_bo) {
> + aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_BATCH,
> +   GET_PTR(data), bo->size, bo->offset);
> +  } else {
> + aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_NOTYPE,
> +   GET_PTR(data), bo->size, bo->offset);
>}
> +
>if (data != bo->map)
>   free(data);
> }
>
> -   for (int i = 0; i < ARRAY_SIZE(files); i++) {
> -  if (files[i] != NULL)
> - continue;
> -
> -  aub_write_exec(&aubs[i],
> - batch_bo->offset + execbuffer2->batch_start_offset,
> - offset, ring

Re: [Mesa-dev] [PATCH v2 4/4] intel: tools: dump: trace memory writes

2018-07-18 Thread Rafael Antognolli
On Wed, Jul 18, 2018 at 06:21:32PM +0100, Lionel Landwerlin wrote:
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/tools/aub_write.c | 45 ++---
>  1 file changed, 32 insertions(+), 13 deletions(-)
> 
> diff --git a/src/intel/tools/aub_write.c b/src/intel/tools/aub_write.c
> index de4ce33..9c140553542 100644
> --- a/src/intel/tools/aub_write.c
> +++ b/src/intel/tools/aub_write.c
> @@ -313,10 +313,17 @@ dword_out(struct aub_file *aub, uint32_t data)
>  
>  static void
>  mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
> -  uint32_t len, uint32_t addr_space)
> +  uint32_t len, uint32_t addr_space,
> +  const char *desc)

Looks like you are not using desc anywhere...

Other than that, things look good.

>  {
> uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
>  
> +   if (aub->verbose_log_file) {
> +  fprintf(aub->verbose_log_file,
> +  "  MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ")\n",
> +  addr, addr + len);
> +   }
> +
> dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
> dword_out(aub, addr & 0x);   /* addr lo */
> dword_out(aub, addr >> 32);   /* addr hi */
> @@ -387,7 +394,8 @@ populate_ppgtt_table(struct aub_file *aub, struct 
> aub_ppgtt_table *table,
>uint64_t write_size = (dirty_end - dirty_start + 1) *
>   sizeof(uint64_t);
>mem_trace_memory_write_header_out(aub, write_addr, write_size,
> -
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL);
> +
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
> +"PPGTT update");
>data_out(aub, entries + dirty_start, write_size);
> }
>  }
> @@ -476,7 +484,8 @@ write_execlists_header(struct aub_file *aub, const char 
> *name)
>  
> mem_trace_memory_write_header_out(aub, STATIC_GGTT_MAP_START >> 12,
>   ggtt_ptes * GEN8_PTE_SIZE,
> - 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY);
> + 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
> + "GGTT PT");
> for (uint32_t i = 0; i < ggtt_ptes; i++) {
>dword_out(aub, 1 + 0x1000 * i + STATIC_GGTT_MAP_START);
>dword_out(aub, 0);
> @@ -484,7 +493,8 @@ write_execlists_header(struct aub_file *aub, const char 
> *name)
>  
> /* RENDER_RING */
> mem_trace_memory_write_header_out(aub, RENDER_RING_ADDR, RING_SIZE,
> - 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
> + AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
> + "RENDER RING");
> for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
>dword_out(aub, 0);
>  
> @@ -492,7 +502,8 @@ write_execlists_header(struct aub_file *aub, const char 
> *name)
> mem_trace_memory_write_header_out(aub, RENDER_CONTEXT_ADDR,
>   PPHWSP_SIZE +
>   sizeof(render_context_init),
> - 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
> + AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
> + "RENDER PPHWSP");
> for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
>dword_out(aub, 0);
>  
> @@ -501,7 +512,8 @@ write_execlists_header(struct aub_file *aub, const char 
> *name)
>  
> /* BLITTER_RING */
> mem_trace_memory_write_header_out(aub, BLITTER_RING_ADDR, RING_SIZE,
> - 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
> + AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
> + "BLITTER RING");
> for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
>dword_out(aub, 0);
>  
> @@ -509,7 +521,8 @@ write_execlists_header(struct aub_file *aub, const char 
> *name)
> mem_trace_memory_write_header_out(aub, BLITTER_CONTEXT_ADDR,
>   PPHWSP_SIZE +
>   sizeof(blitter_context_init),
> - 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT);
> + AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
> + "BLITTER PPHWSP");
> for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
>dword_out(aub, 0);
>  
> @@ -518,7 +531,8 @@ write_execlists_header(struct aub_file *aub, const char 
> *name)
>  
> /* VIDEO_RING */
> mem_trace_memory_write_header_out(aub, VIDEO_RING_ADDR, RING_SIZE,
> - 
> AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_

Re: [Mesa-dev] [PATCH] glsl: Allow ES2 function parameters to be hidden by variable declarations.

2018-07-18 Thread Ian Romanick
On 07/16/2018 02:46 PM, Eric Anholt wrote:
> This fixes dEQP case:
> 
> dEQP-GLES2.functional.shaders.scoping.valid.local_variable_hides_function_parameter_fragment

Are we sure that test is correct?  I'm sure I already know the answer,
but does the test contain any justification or spec references?  I just
re-read section 4.2 (Scoping) of the ESSL 1.00 spec, and I don't see
anything to support this.  Did I miss something?

In fact, the grammar says:

function_definition:
function_prototype compound_statement_no_new_scope

So... I think this test is just wrong.

> without breaking
> 
> dEQP-GLES3.functional.shaders.scoping.invalid.local_variable_hides_function_parameter_fragment
> ---
>  src/compiler/glsl/ast_to_hir.cpp | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/src/compiler/glsl/ast_to_hir.cpp 
> b/src/compiler/glsl/ast_to_hir.cpp
> index dd60a2a87fd5..28f074ca8a39 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -6180,10 +6180,19 @@ ast_function_definition::hir(exec_list *instructions,
>}
> }
>  
> +   /* On ES2, function parameters may be redeclared to be hidden within the
> +* function.  Do this by creating a new scope inside the function.
> +*/
> +   if (state->is_version(0, 100) && !state->is_version(0, 300))
> +  state->symbols->push_scope();
> +
> /* Convert the body of the function to HIR. */
> this->body->hir(&signature->body, state);
> signature->is_defined = true;
>  
> +   if (state->is_version(0, 100) && !state->is_version(0, 300))
> +  state->symbols->pop_scope();
> +
> state->symbols->pop_scope();
>  
> assert(state->current_function == signature);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 94747] Convert phi nodes to logical operations

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=94747

--- Comment #3 from Ian Romanick  ---
(In reply to Timothy Arceri from comment #2)
> As (In reply to Jason Ekstrand from comment #1)
> > The code for this already exists.  It's called nir_opt_peephole_select.  The
> > only problem is that it only triggers if both sides of the if are empty. 
> > I've been wanting to add some sort of heuristic to it for some time now. 
> > The only problem is that it's really back-end specific.
> 
> Although i965 doesn't use it I believe this is otherwise fixed by the
> following commit. We should probably close this bug:

Thanks for reminding me about these "don't forget" bugs that I filed. :)  I
clearly forgot about them anyway.

I have a patch for this that has been on the list for almost 3 weeks.

https://patchwork.freedesktop.org/patch/233182/

That will just eliminate the flow control (but it might not since there are 2
instructions... I wish I could remember which shader this was).  Even then
we'll get something like

ssa_177 = ssa_153 ? true : abs(ssa_171 - ssa_135) >= ssa_145

when we really want (ssa_153 || (abs(ssa_171 - ssa_135) >= ssa_145)).  I have
another patch series in progress that will take care of that.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] radeonsi: emit_spi_map packets optimization

2018-07-18 Thread Sonny Jiang
Signed-off-by: Sonny Jiang 
---
 src/gallium/drivers/radeonsi/si_build_pm4.h | 24 
 src/gallium/drivers/radeonsi/si_gfx_cs.c|  3 +++
 src/gallium/drivers/radeonsi/si_state.h |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 21 +
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h 
b/src/gallium/drivers/radeonsi/si_build_pm4.h
index b339cd5..8fc08f7 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -214,4 +214,28 @@ static inline void radeon_opt_set_context_reg4(struct 
si_context *sctx, unsigned
}
 }
 
+/**
+ * Set consecutive registers if any registers value is different.
+ */
+static inline void radeon_opt_set_context_regn(struct si_context *sctx, 
unsigned offset,
+  unsigned * value, unsigned * 
saved_val,
+  unsigned num)
+{
+   struct radeon_cmdbuf *cs = sctx->gfx_cs;
+   int i, j;
+
+   for (i = 0; i < num; i++) {
+   if (saved_val[i] != value[i]) {
+   radeon_set_context_reg_seq(cs, offset, num);
+   for (j = 0; j < num; j++) {
+   radeon_emit(cs, value[j]);
+   }
+
+   memcpy(saved_val, value, sizeof(uint32_t) * num);
+
+   break;
+   }
+   }
+}
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 628b6c5..16aa4f9 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -353,4 +353,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
/* Set all saved registers state to unknown. */
ctx->tracked_regs.reg_saved = 0;
}
+
+   /* 0x is a impossible value to register SPI_PS_INPUT_CNTL_n */
+   memset(ctx->tracked_regs.reg_val_seq, 0xff, sizeof(uint32_t) * 32);
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 71056c7..c2d0287 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -287,6 +287,7 @@ enum si_tracked_reg {
 struct si_tracked_regs {
uint32_treg_saved;
uint32_treg_value[SI_NUM_TRACKED_REGS];
+   uint32_treg_val_seq[32];
 };
 
 /* Private read-write buffer slots. */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ffc8821..a903df8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2634,27 +2634,25 @@ static unsigned si_get_ps_input_cntl(struct si_context 
*sctx,
 
 static void si_emit_spi_map(struct si_context *sctx)
 {
-   struct radeon_cmdbuf *cs = sctx->gfx_cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
unsigned i, num_interp, num_written = 0, bcol_interp[2];
+   unsigned spi_ps_input_cntl[32];
 
if (!ps || !ps->selector->info.num_inputs)
return;
 
num_interp = si_get_ps_num_interp(ps);
assert(num_interp > 0);
-   radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, 
num_interp);
 
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned name = psinfo->input_semantic_name[i];
unsigned index = psinfo->input_semantic_index[i];
unsigned interpolate = psinfo->input_interpolate[i];
 
-   radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
-interpolate));
-   num_written++;
+   spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, 
vs, name,
+   index, interpolate);
 
if (name == TGSI_SEMANTIC_COLOR) {
assert(index < ARRAY_SIZE(bcol_interp));
@@ -2669,12 +2667,19 @@ static void si_emit_spi_map(struct si_context *sctx)
if (!(psinfo->colors_read & (0xf << (i * 4
continue;
 
-   radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
-i, 
bcol_interp[i]));
-   num_written++;
+   spi_ps_input_cntl[num_written++] =
+ si_get_ps_input_cntl(sctx, vs, bcol, i, 
bcol_interp[i]);
+
}
}
assert(num_interp == num_written);
+
+   /* R_028644_SPI_PS_INPUT_CNTL_0 */
+   /* Dota 2: Only ~16% of SPI map updates set differ

Re: [Mesa-dev] [PATCH] glsl: Allow ES2 function parameters to be hidden by variable declarations.

2018-07-18 Thread Eric Anholt
Ian Romanick  writes:

> On 07/16/2018 02:46 PM, Eric Anholt wrote:
>> This fixes dEQP case:
>> 
>> dEQP-GLES2.functional.shaders.scoping.valid.local_variable_hides_function_parameter_fragment
>
> Are we sure that test is correct?  I'm sure I already know the answer,
> but does the test contain any justification or spec references?  I just
> re-read section 4.2 (Scoping) of the ESSL 1.00 spec, and I don't see
> anything to support this.  Did I miss something?
>
> In fact, the grammar says:
>
> function_definition:
> function_prototype compound_statement_no_new_scope
>
> So... I think this test is just wrong.

OK, so I'm confused why this test still exists, if people have managed
to get conformance on Mesa.  I'm on master of VK-GL-CTS, and it's still
in the mustpass file:

external/openglcts/data/mustpass/gles/aosp_mustpass/master/gles2-master.txt:dEQP-GLES2.functional.shaders.scoping.valid.local_variable_hides_function_parameter_fragment

I don't see anything that would exclude the test -- there's
gles2-driver-issues.txt, but that appears to only be used to exclude
tests from AOSP DEQP usage.

Could whoever on the Intel side submitted a conformance package for Mesa
send me a copy?  I haven't been able to find it on the Khronos site, and
I suspect it would help me understand how to achieve conformance with
Mesa.
dEQP-GLES3.functional.shaders.preprocessor.predefined_macros.line_2_vertex
is another one that fails on Mesa with i965, and seems to have been in
the testsuite forever.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] st: Sweep NIR after linking phase to free held memory

2018-07-18 Thread Eric Anholt
Danylo Piliaiev  writes:

> After optimization passes and many trasfromations most of memory

"transformations"

> NIR holds is a garbage which was being freed only after shader deletion.

"is garbage"

> Freeing it at the end of linking will save memory which would be useful
> in case there are a lot of complex shaders being compiled.
> The common case for this issue is 32bit game running under Wine.
>
> The cost of the optimization is around ~3-5% of compilation speed
> with complex shaders.
>
> Signed-off-by: Danylo Piliaiev 

This seems good, and I'm running it through the CTS now.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] libGLw: Use newly introduced GLAPIVAR for variables

2018-07-18 Thread Timothy Arceri

On 19/07/18 03:07, Brian Paul wrote:


On 07/17/2018 06:47 PM, Stefan Dirsch wrote:

On Tue, Jul 17, 2018 at 04:57:26PM -0600, Brian Paul wrote:

Reviewed-by: Brian Paul 

Do you need me to push this for you?


I'm afraid the answer is yes. Tried it but push hangs forever after this

# git push --verbose
Pushing to ssh://git.freedesktop.org/git/mesa/glw.git
Counting objects: 4, done.
Delta compression using up to 8 threads.
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 700 bytes | 350.00 KiB/s, done.
Total 4 (delta 3), reused 0 (delta 0)


Worked for me.

My first guess was glw was migrated to gitlab.freedesktop.org, but it 
looks like it has not.


Hmm. It should have been migrated though right? CCing Jason for comment.



-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107283] Dispatch sanity tests broken

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107283

Bug ID: 107283
   Summary: Dispatch sanity tests broken
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Keywords: bisected, regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: e...@anholt.net
  Reporter: mark.a.ja...@intel.com
QA Contact: mesa-dev@lists.freedesktop.org

../src/mesa/main/tests/dispatch_sanity.cpp:174
  Expected: nop_table[i]
  Which is: 0x56348eaef6e0
To be equal to: table[i]
  Which is: 0x56348eb347b0
i = 1295 (BeginPerfMonitorAMD)


Bisected to:
70534dbe29052e2bd1c669750db3608c9eb9
Author: Eric Anholt 
AuthorDate: Wed Jul 18 11:01:17 2018 -0400
Commit: Eric Anholt 
CommitDate: Wed Jul 18 10:39:21 2018 -0700

Parent: 4ba478d7cdf egl: Use the canonical drm-uapi fourcc header to avoid
local defines.
Merged: master
Follows:18.1-branchpoint (1823)

Allow AMD_perfmon on GLES contexts

v2: whitespace alignment fix

Reviewed-by: Rob Clark 

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler: Delete dead VS intrinsic handling.

2018-07-18 Thread Kenneth Graunke
These are lowered by brw_nir_lower_vs_inputs().  If they weren't, we
would have already hit the unreachable() in emit_system_values_block().
---
 src/intel/compiler/brw_fs_nir.cpp | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index e983110027f..ce73bd29d83 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2498,18 +2498,6 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
case nir_intrinsic_load_base_vertex:
   unreachable("should be lowered by nir_lower_system_values()");
 
-   case nir_intrinsic_load_vertex_id_zero_base:
-   case nir_intrinsic_load_instance_id:
-   case nir_intrinsic_load_base_instance:
-   case nir_intrinsic_load_draw_id: {
-  gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
-  fs_reg val = nir_system_values[sv];
-  assert(val.file != BAD_FILE);
-  dest.type = val.type;
-  bld.MOV(dest, val);
-  break;
-   }
-
case nir_intrinsic_load_input: {
   fs_reg src = fs_reg(ATTR, nir_intrinsic_base(instr) * 4, dest.type);
   unsigned first_component = nir_intrinsic_component(instr);
@@ -2530,6 +2518,10 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
   break;
}
 
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_first_vertex:
case nir_intrinsic_load_is_indexed_draw:
   unreachable("lowered by brw_nir_lower_vs_inputs");
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107224] Incorrect Rendering in Deus Ex: Mankind Divided in-game menu

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107224

Timothy Arceri  changed:

   What|Removed |Added

 QA Contact|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
   Assignee|dri-devel@lists.freedesktop |mesa-dev@lists.freedesktop.
   |.org|org
  Component|Drivers/Gallium/radeonsi|Mesa core

--- Comment #1 from Timothy Arceri  ---
Di(In reply to network723 from comment #0)
> Created attachment 140628 [details]
> A screenshot of in-game menu
> 
> With recent Mesa versions some parts of the game Deus Ex: Mankind Divided
> are rendered not the way they are supposed to. 
> To reproduce the bug, max out all graphics settings (didn't test with other
> settings) and while in-game, go to "database" menu. It will show bizarre
> artifacts instead of readable text. The game used to work correctly with
> older Mesa/LLVM.

Are you sure it used to work. I've tested all the way back to Mesa
17.0-branchpoint (d1efa09d342bff) on Intels i965 driver and I still see the
issue. 

For now moving this to Mesa core as it seems to be a general Mesa issue (or
game bug ?) rather than a radeonsi problem.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH V2 01/10] nir: evaluate if condition uses inside the if branches

2018-07-18 Thread Timothy Arceri
Since we know what side of the branch we ended up on we can just
replace the use with a constant.

All helped shaders are from Unreal Engine 4 besides one shader from
Dirt Showdown.

V2: make sure we do evaluation when condition is used in else with
a single block (we were checking for blocks < the last else
block rather than <=)

shader-db results SKL:

total instructions in shared programs: 13219725 -> 13219643 (<.01%)
instructions in affected programs: 28917 -> 28835 (-0.28%)
helped: 45
HURT: 0

total cycles in shared programs: 529335971 -> 529334604 (<.01%)
cycles in affected programs: 216209 -> 214842 (-0.63%)
helped: 45
HURT: 4

Cc: Ian Romanick 

fix if condition eval for else with a single block
---
 src/compiler/nir/nir_opt_if.c | 121 ++
 1 file changed, 121 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index a52de120ad6..4ed919887ce 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -348,6 +348,86 @@ opt_if_loop_terminator(nir_if *nif)
return true;
 }
 
+static void
+replace_if_condition_use_with_const(nir_src *use, unsigned nir_boolean,
+void *mem_ctx, bool if_condition)
+{
+   /* Create const */
+   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1, 32);
+   load->value.u32[0] = nir_boolean;
+
+   if (if_condition) {
+  nir_instr_insert_before_cf(&use->parent_if->cf_node,  &load->instr);
+   } else if (use->parent_instr->type == nir_instr_type_phi) {
+  nir_phi_instr *cond_phi = nir_instr_as_phi(use->parent_instr);
+
+  bool UNUSED found = false;
+  nir_foreach_phi_src(phi_src, cond_phi) {
+ if (phi_src->src.ssa == use->ssa) {
+nir_instr_insert_before_block(phi_src->pred, &load->instr);
+found = true;
+break;
+ }
+  }
+  assert(found);
+   } else {
+  nir_instr_insert_before(use->parent_instr,  &load->instr);
+   }
+
+   /* Rewrite use to use const */
+   nir_src new_src = nir_src_for_ssa(&load->def);
+
+   if (if_condition)
+  nir_if_rewrite_condition(use->parent_if, new_src);
+   else
+  nir_instr_rewrite_src(use->parent_instr, use, new_src);
+}
+
+static bool
+evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
+   bool if_condition)
+{
+   bool progress = false;
+
+   nir_block *first_then = nir_if_first_then_block(nif);
+   if (use_src->parent_instr->block->index > first_then->index) {
+  nir_block *first_else = nir_if_first_else_block(nif);
+  if (use_src->parent_instr->block->index < first_else->index) {
+ replace_if_condition_use_with_const(use_src, NIR_TRUE, mem_ctx,
+ if_condition);
+
+ progress = true;
+  } else if (use_src->parent_instr->block->index <=
+ nir_if_last_else_block(nif)->index) {
+ replace_if_condition_use_with_const(use_src, NIR_FALSE, mem_ctx,
+ if_condition);
+
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
+static bool
+opt_if_evaluate_condition_use(nir_if *nif, void *mem_ctx)
+{
+   bool progress = false;
+
+   /* Evaluate any uses of the if condition inside the if branches */
+   assert(nif->condition.is_ssa);
+   nir_foreach_use_safe(use_src, nif->condition.ssa) {
+  progress |= evaluate_condition_use(nif, use_src, mem_ctx, false);
+   }
+
+   nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
+  if (use_src->parent_if != nif)
+ progress |= evaluate_condition_use(nif, use_src, mem_ctx, true);
+   }
+
+   return progress;
+}
+
 static bool
 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 {
@@ -381,6 +461,41 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
return progress;
 }
 
+/**
+ * These optimisations depend on nir_metadata_block_index and therefore must
+ * not do anything to cause the metadata to become invalid.
+ */
+static bool
+opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list, void *mem_ctx)
+{
+   bool progress = false;
+   foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
+  switch (cf_node->type) {
+  case nir_cf_node_block:
+ break;
+
+  case nir_cf_node_if: {
+ nir_if *nif = nir_cf_node_as_if(cf_node);
+ progress |= opt_if_safe_cf_list(b, &nif->then_list, mem_ctx);
+ progress |= opt_if_safe_cf_list(b, &nif->else_list, mem_ctx);
+ progress |= opt_if_evaluate_condition_use(nif, mem_ctx);
+ break;
+  }
+
+  case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(cf_node);
+ progress |= opt_if_safe_cf_list(b, &loop->body, mem_ctx);
+ break;
+  }
+
+  case nir_cf_node_function:
+ unreachable("Invalid cf type");
+  }
+   }
+
+   return progress;
+}
+
 bool
 nir_opt_if(nir_shader *shader)
 {
@@ -393,6 +508,12 @@ nir_opt_

Re: [Mesa-dev] [PATCH] intel/compiler: Delete dead VS intrinsic handling.

2018-07-18 Thread Jason Ekstrand

Rb

On July 18, 2018 16:49:07 Kenneth Graunke  wrote:


These are lowered by brw_nir_lower_vs_inputs().  If they weren't, we
would have already hit the unreachable() in emit_system_values_block().
---
src/intel/compiler/brw_fs_nir.cpp | 16 
1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp

index e983110027f..ce73bd29d83 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2498,18 +2498,6 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
   case nir_intrinsic_load_base_vertex:
  unreachable("should be lowered by nir_lower_system_values()");

-   case nir_intrinsic_load_vertex_id_zero_base:
-   case nir_intrinsic_load_instance_id:
-   case nir_intrinsic_load_base_instance:
-   case nir_intrinsic_load_draw_id: {
-  gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
-  fs_reg val = nir_system_values[sv];
-  assert(val.file != BAD_FILE);
-  dest.type = val.type;
-  bld.MOV(dest, val);
-  break;
-   }
-
   case nir_intrinsic_load_input: {
  fs_reg src = fs_reg(ATTR, nir_intrinsic_base(instr) * 4, dest.type);
  unsigned first_component = nir_intrinsic_component(instr);
@@ -2530,6 +2518,10 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
  break;
   }

+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
   case nir_intrinsic_load_first_vertex:
   case nir_intrinsic_load_is_indexed_draw:
  unreachable("lowered by brw_nir_lower_vs_inputs");
--
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Combine both gl_PatchVerticesIn lowering passes.

2018-07-18 Thread Kenneth Graunke
Until now, we had separate passes for lowering gl_PatchVerticesIn to
a statically known constant (for TES inputs when linked against a TCS),
and a uniform in the other cases.  Annoyingly, one had to be run before
nir_lower_system_values, and the other afterward.  This simplified the
passes, but made life painful for the callers.

This patch combines both into a single pass.  If you give it a non-zero
static count, it uses that.  If you give it Mesa state slots, it turns
it back into a built-in uniform.  Otherwise, it does nothing.

This also moves the i965 uniform lowering out to shared code.
---
 src/compiler/nir/nir.h|  3 +-
 src/compiler/nir/nir_lower_patch_vertices.c   | 68 +--
 src/intel/compiler/brw_nir.h  |  2 -
 src/intel/vulkan/anv_pipeline.c   |  4 +-
 .../drivers/dri/i965/brw_nir_uniforms.cpp | 28 
 src/mesa/drivers/dri/i965/brw_program.c   | 41 +--
 6 files changed, 84 insertions(+), 62 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3bfe7d7f7bf..c4e551cbfb0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2806,7 +2806,8 @@ void nir_lower_two_sided_color(nir_shader *shader);
 bool nir_lower_clamp_color_outputs(nir_shader *shader);
 
 void nir_lower_passthrough_edgeflags(nir_shader *shader);
-void nir_lower_tes_patch_vertices(nir_shader *tes, unsigned patch_vertices);
+bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
+  gl_state_index16 *uniform_state_tokens);
 
 typedef struct nir_lower_wpos_ytransform_options {
gl_state_index16 state_tokens[STATE_LENGTH];
diff --git a/src/compiler/nir/nir_lower_patch_vertices.c 
b/src/compiler/nir/nir_lower_patch_vertices.c
index d196576b993..75b1d8108d7 100644
--- a/src/compiler/nir/nir_lower_patch_vertices.c
+++ b/src/compiler/nir/nir_lower_patch_vertices.c
@@ -22,11 +22,52 @@
  */
 
 #include "nir_builder.h"
+#include "program/prog_instruction.h"
 
-void
-nir_lower_tes_patch_vertices(nir_shader *tes_nir, unsigned patch_vertices)
+static nir_variable *
+make_uniform(nir_shader *nir, gl_state_index16 *tokens)
 {
-   nir_foreach_function(function, tes_nir) {
+   /* Note: name must be prefixed with "gl_" to trigger slot based
+* special handling in uniform setup.
+*/
+   nir_variable *var =
+  nir_variable_create(nir, nir_var_uniform, glsl_int_type(),
+  "gl_PatchVerticesIn");
+   var->num_state_slots = 1;
+   var->state_slots = ralloc_array(var, nir_state_slot, var->num_state_slots);
+   memcpy(var->state_slots[0].tokens, tokens, sizeof(*tokens) * STATE_LENGTH);
+   var->state_slots[0].swizzle = SWIZZLE_;
+
+   return var;
+}
+
+/**
+ * This pass lowers the load_patch_vertices_in intrinsic.
+ *
+ * - If we statically know the value, we lower it to a constant.
+ *   (If a TES is linked against a TCS, the TCS tells us the TES input count.)
+ *
+ * - If not, and we're given Mesa state slots, we lower it to a uniform.
+ *
+ * - Otherwise, we leave it as a system value.
+ *
+ * This pass must be run after nir_lower_system_values().
+ */
+bool
+nir_lower_patch_vertices(nir_shader *nir,
+ unsigned static_count,
+ gl_state_index16 *uniform_state_tokens)
+{
+   bool progress = false;
+   nir_variable *var = NULL;
+
+   /* If there's no static count and we don't want uniforms, there's no
+* lowering to do...just bail early.
+*/
+   if (static_count == 0 && !uniform_state_tokens)
+  return false;
+
+   nir_foreach_function(function, nir) {
   if (function->impl) {
  nir_foreach_block(block, function->impl) {
 nir_builder b;
@@ -38,7 +79,18 @@ nir_lower_tes_patch_vertices(nir_shader *tes_nir, unsigned 
patch_vertices)
  continue;
 
   b.cursor = nir_before_instr(&intr->instr);
-  nir_ssa_def *val = nir_imm_int(&b, patch_vertices);
+
+  nir_ssa_def *val = NULL;
+  if (static_count) {
+ val = nir_imm_int(&b, static_count);
+  } else {
+ if (!var)
+var = make_uniform(nir, uniform_state_tokens);
+
+ val = nir_load_var(&b, var);
+  }
+
+  progress = true;
   nir_ssa_def_rewrite_uses(&intr->dest.ssa,
nir_src_for_ssa(val));
   nir_instr_remove(instr);
@@ -46,8 +98,12 @@ nir_lower_tes_patch_vertices(nir_shader *tes_nir, unsigned 
patch_vertices)
 }
  }
 
- nir_metadata_preserve(function->impl, nir_metadata_block_index |
-   nir_metadata_dominance);
+ if (progress) {
+nir_metadata_preserve(function->impl, nir_metadata_block_index |
+  nir_me

Re: [Mesa-dev] [PATCH V2 01/10] nir: evaluate if condition uses inside the if branches

2018-07-18 Thread Connor Abbott
Why not do the more general thing, and evaluate the condition in every
block dominated by the then and else blocks? That should handle the
loop and non-loop cases.

On Thu, Jul 19, 2018 at 8:06 AM, Timothy Arceri  wrote:
> Since we know what side of the branch we ended up on we can just
> replace the use with a constant.
>
> All helped shaders are from Unreal Engine 4 besides one shader from
> Dirt Showdown.
>
> V2: make sure we do evaluation when condition is used in else with
> a single block (we were checking for blocks < the last else
> block rather than <=)
>
> shader-db results SKL:
>
> total instructions in shared programs: 13219725 -> 13219643 (<.01%)
> instructions in affected programs: 28917 -> 28835 (-0.28%)
> helped: 45
> HURT: 0
>
> total cycles in shared programs: 529335971 -> 529334604 (<.01%)
> cycles in affected programs: 216209 -> 214842 (-0.63%)
> helped: 45
> HURT: 4
>
> Cc: Ian Romanick 
>
> fix if condition eval for else with a single block
> ---
>  src/compiler/nir/nir_opt_if.c | 121 ++
>  1 file changed, 121 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index a52de120ad6..4ed919887ce 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -348,6 +348,86 @@ opt_if_loop_terminator(nir_if *nif)
> return true;
>  }
>
> +static void
> +replace_if_condition_use_with_const(nir_src *use, unsigned nir_boolean,
> +void *mem_ctx, bool if_condition)
> +{
> +   /* Create const */
> +   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1, 32);
> +   load->value.u32[0] = nir_boolean;
> +
> +   if (if_condition) {
> +  nir_instr_insert_before_cf(&use->parent_if->cf_node,  &load->instr);
> +   } else if (use->parent_instr->type == nir_instr_type_phi) {
> +  nir_phi_instr *cond_phi = nir_instr_as_phi(use->parent_instr);
> +
> +  bool UNUSED found = false;
> +  nir_foreach_phi_src(phi_src, cond_phi) {
> + if (phi_src->src.ssa == use->ssa) {
> +nir_instr_insert_before_block(phi_src->pred, &load->instr);
> +found = true;
> +break;
> + }
> +  }
> +  assert(found);
> +   } else {
> +  nir_instr_insert_before(use->parent_instr,  &load->instr);
> +   }
> +
> +   /* Rewrite use to use const */
> +   nir_src new_src = nir_src_for_ssa(&load->def);
> +
> +   if (if_condition)
> +  nir_if_rewrite_condition(use->parent_if, new_src);
> +   else
> +  nir_instr_rewrite_src(use->parent_instr, use, new_src);
> +}
> +
> +static bool
> +evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
> +   bool if_condition)
> +{
> +   bool progress = false;
> +
> +   nir_block *first_then = nir_if_first_then_block(nif);
> +   if (use_src->parent_instr->block->index > first_then->index) {
> +  nir_block *first_else = nir_if_first_else_block(nif);
> +  if (use_src->parent_instr->block->index < first_else->index) {
> + replace_if_condition_use_with_const(use_src, NIR_TRUE, mem_ctx,
> + if_condition);
> +
> + progress = true;
> +  } else if (use_src->parent_instr->block->index <=
> + nir_if_last_else_block(nif)->index) {
> + replace_if_condition_use_with_const(use_src, NIR_FALSE, mem_ctx,
> + if_condition);
> +
> + progress = true;
> +  }
> +   }
> +
> +   return progress;
> +}
> +
> +static bool
> +opt_if_evaluate_condition_use(nir_if *nif, void *mem_ctx)
> +{
> +   bool progress = false;
> +
> +   /* Evaluate any uses of the if condition inside the if branches */
> +   assert(nif->condition.is_ssa);
> +   nir_foreach_use_safe(use_src, nif->condition.ssa) {
> +  progress |= evaluate_condition_use(nif, use_src, mem_ctx, false);
> +   }
> +
> +   nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
> +  if (use_src->parent_if != nif)
> + progress |= evaluate_condition_use(nif, use_src, mem_ctx, true);
> +   }
> +
> +   return progress;
> +}
> +
>  static bool
>  opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
>  {
> @@ -381,6 +461,41 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
> return progress;
>  }
>
> +/**
> + * These optimisations depend on nir_metadata_block_index and therefore must
> + * not do anything to cause the metadata to become invalid.
> + */
> +static bool
> +opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list, void *mem_ctx)
> +{
> +   bool progress = false;
> +   foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
> +  switch (cf_node->type) {
> +  case nir_cf_node_block:
> + break;
> +
> +  case nir_cf_node_if: {
> + nir_if *nif = nir_cf_node_as_if(cf_node);
> + progress |= opt_if_safe_cf_list(b, &nif->then_list, mem_ctx);
> + progress |= opt_if_safe_cf_list(b, &nif-

[Mesa-dev] [PATCH v2] i965: Combine both gl_PatchVerticesIn lowering passes.

2018-07-18 Thread Kenneth Graunke
Until now, we had separate passes for lowering gl_PatchVerticesIn to
a statically known constant (for TES inputs when linked against a TCS),
and a uniform in the other cases.  Annoyingly, one had to be run before
nir_lower_system_values, and the other afterward.  This simplified the
passes, but made life painful for the callers.

This patch combines both into a single pass.  If you give it a non-zero
static count, it uses that.  If you give it Mesa state slots, it turns
it back into a built-in uniform.  Otherwise, it does nothing.

This also moves the i965 uniform lowering out to shared code.

v2: Make token arrays const.
---
 src/compiler/nir/nir.h|  3 +-
 src/compiler/nir/nir_lower_patch_vertices.c   | 68 +--
 src/intel/compiler/brw_nir.h  |  2 -
 src/intel/vulkan/anv_pipeline.c   |  4 +-
 .../drivers/dri/i965/brw_nir_uniforms.cpp | 28 
 src/mesa/drivers/dri/i965/brw_program.c   | 41 +--
 6 files changed, 84 insertions(+), 62 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3bfe7d7f7bf..fe3b4a9c9cc 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2806,7 +2806,8 @@ void nir_lower_two_sided_color(nir_shader *shader);
 bool nir_lower_clamp_color_outputs(nir_shader *shader);
 
 void nir_lower_passthrough_edgeflags(nir_shader *shader);
-void nir_lower_tes_patch_vertices(nir_shader *tes, unsigned patch_vertices);
+bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
+  const gl_state_index16 *uniform_state_tokens);
 
 typedef struct nir_lower_wpos_ytransform_options {
gl_state_index16 state_tokens[STATE_LENGTH];
diff --git a/src/compiler/nir/nir_lower_patch_vertices.c 
b/src/compiler/nir/nir_lower_patch_vertices.c
index d196576b993..8e89268ca41 100644
--- a/src/compiler/nir/nir_lower_patch_vertices.c
+++ b/src/compiler/nir/nir_lower_patch_vertices.c
@@ -22,11 +22,52 @@
  */
 
 #include "nir_builder.h"
+#include "program/prog_instruction.h"
 
-void
-nir_lower_tes_patch_vertices(nir_shader *tes_nir, unsigned patch_vertices)
+static nir_variable *
+make_uniform(nir_shader *nir, const gl_state_index16 *tokens)
 {
-   nir_foreach_function(function, tes_nir) {
+   /* Note: name must be prefixed with "gl_" to trigger slot based
+* special handling in uniform setup.
+*/
+   nir_variable *var =
+  nir_variable_create(nir, nir_var_uniform, glsl_int_type(),
+  "gl_PatchVerticesIn");
+   var->num_state_slots = 1;
+   var->state_slots = ralloc_array(var, nir_state_slot, var->num_state_slots);
+   memcpy(var->state_slots[0].tokens, tokens, sizeof(*tokens) * STATE_LENGTH);
+   var->state_slots[0].swizzle = SWIZZLE_;
+
+   return var;
+}
+
+/**
+ * This pass lowers the load_patch_vertices_in intrinsic.
+ *
+ * - If we statically know the value, we lower it to a constant.
+ *   (If a TES is linked against a TCS, the TCS tells us the TES input count.)
+ *
+ * - If not, and we're given Mesa state slots, we lower it to a uniform.
+ *
+ * - Otherwise, we leave it as a system value.
+ *
+ * This pass must be run after nir_lower_system_values().
+ */
+bool
+nir_lower_patch_vertices(nir_shader *nir,
+ unsigned static_count,
+ const gl_state_index16 *uniform_state_tokens)
+{
+   bool progress = false;
+   nir_variable *var = NULL;
+
+   /* If there's no static count and we don't want uniforms, there's no
+* lowering to do...just bail early.
+*/
+   if (static_count == 0 && !uniform_state_tokens)
+  return false;
+
+   nir_foreach_function(function, nir) {
   if (function->impl) {
  nir_foreach_block(block, function->impl) {
 nir_builder b;
@@ -38,7 +79,18 @@ nir_lower_tes_patch_vertices(nir_shader *tes_nir, unsigned 
patch_vertices)
  continue;
 
   b.cursor = nir_before_instr(&intr->instr);
-  nir_ssa_def *val = nir_imm_int(&b, patch_vertices);
+
+  nir_ssa_def *val = NULL;
+  if (static_count) {
+ val = nir_imm_int(&b, static_count);
+  } else {
+ if (!var)
+var = make_uniform(nir, uniform_state_tokens);
+
+ val = nir_load_var(&b, var);
+  }
+
+  progress = true;
   nir_ssa_def_rewrite_uses(&intr->dest.ssa,
nir_src_for_ssa(val));
   nir_instr_remove(instr);
@@ -46,8 +98,12 @@ nir_lower_tes_patch_vertices(nir_shader *tes_nir, unsigned 
patch_vertices)
 }
  }
 
- nir_metadata_preserve(function->impl, nir_metadata_block_index |
-   nir_metadata_dominance);
+ if (progress) {
+nir_metadata_preserve(function->impl, nir_metadata_block_index |
+

Re: [Mesa-dev] [PATCH V2 01/10] nir: evaluate if condition uses inside the if branches

2018-07-18 Thread Timothy Arceri

On 19/07/18 12:02, Connor Abbott wrote:

Why not do the more general thing, and evaluate the condition in every
block dominated by the then and else blocks? That should handle the
loop and non-loop cases.


Can you explain what the advantage would be in doing that? Is it just 
likely to reduce the code required?




On Thu, Jul 19, 2018 at 8:06 AM, Timothy Arceri  wrote:

Since we know what side of the branch we ended up on we can just
replace the use with a constant.

All helped shaders are from Unreal Engine 4 besides one shader from
Dirt Showdown.

V2: make sure we do evaluation when condition is used in else with
 a single block (we were checking for blocks < the last else
 block rather than <=)

shader-db results SKL:

total instructions in shared programs: 13219725 -> 13219643 (<.01%)
instructions in affected programs: 28917 -> 28835 (-0.28%)
helped: 45
HURT: 0

total cycles in shared programs: 529335971 -> 529334604 (<.01%)
cycles in affected programs: 216209 -> 214842 (-0.63%)
helped: 45
HURT: 4

Cc: Ian Romanick 

fix if condition eval for else with a single block
---
  src/compiler/nir/nir_opt_if.c | 121 ++
  1 file changed, 121 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index a52de120ad6..4ed919887ce 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -348,6 +348,86 @@ opt_if_loop_terminator(nir_if *nif)
 return true;
  }

+static void
+replace_if_condition_use_with_const(nir_src *use, unsigned nir_boolean,
+void *mem_ctx, bool if_condition)
+{
+   /* Create const */
+   nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1, 32);
+   load->value.u32[0] = nir_boolean;
+
+   if (if_condition) {
+  nir_instr_insert_before_cf(&use->parent_if->cf_node,  &load->instr);
+   } else if (use->parent_instr->type == nir_instr_type_phi) {
+  nir_phi_instr *cond_phi = nir_instr_as_phi(use->parent_instr);
+
+  bool UNUSED found = false;
+  nir_foreach_phi_src(phi_src, cond_phi) {
+ if (phi_src->src.ssa == use->ssa) {
+nir_instr_insert_before_block(phi_src->pred, &load->instr);
+found = true;
+break;
+ }
+  }
+  assert(found);
+   } else {
+  nir_instr_insert_before(use->parent_instr,  &load->instr);
+   }
+
+   /* Rewrite use to use const */
+   nir_src new_src = nir_src_for_ssa(&load->def);
+
+   if (if_condition)
+  nir_if_rewrite_condition(use->parent_if, new_src);
+   else
+  nir_instr_rewrite_src(use->parent_instr, use, new_src);
+}
+
+static bool
+evaluate_condition_use(nir_if *nif, nir_src *use_src, void *mem_ctx,
+   bool if_condition)
+{
+   bool progress = false;
+
+   nir_block *first_then = nir_if_first_then_block(nif);
+   if (use_src->parent_instr->block->index > first_then->index) {
+  nir_block *first_else = nir_if_first_else_block(nif);
+  if (use_src->parent_instr->block->index < first_else->index) {
+ replace_if_condition_use_with_const(use_src, NIR_TRUE, mem_ctx,
+ if_condition);
+
+ progress = true;
+  } else if (use_src->parent_instr->block->index <=
+ nir_if_last_else_block(nif)->index) {
+ replace_if_condition_use_with_const(use_src, NIR_FALSE, mem_ctx,
+ if_condition);
+
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
+static bool
+opt_if_evaluate_condition_use(nir_if *nif, void *mem_ctx)
+{
+   bool progress = false;
+
+   /* Evaluate any uses of the if condition inside the if branches */
+   assert(nif->condition.is_ssa);
+   nir_foreach_use_safe(use_src, nif->condition.ssa) {
+  progress |= evaluate_condition_use(nif, use_src, mem_ctx, false);
+   }
+
+   nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
+  if (use_src->parent_if != nif)
+ progress |= evaluate_condition_use(nif, use_src, mem_ctx, true);
+   }
+
+   return progress;
+}
+
  static bool
  opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
  {
@@ -381,6 +461,41 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 return progress;
  }

+/**
+ * These optimisations depend on nir_metadata_block_index and therefore must
+ * not do anything to cause the metadata to become invalid.
+ */
+static bool
+opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list, void *mem_ctx)
+{
+   bool progress = false;
+   foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
+  switch (cf_node->type) {
+  case nir_cf_node_block:
+ break;
+
+  case nir_cf_node_if: {
+ nir_if *nif = nir_cf_node_as_if(cf_node);
+ progress |= opt_if_safe_cf_list(b, &nif->then_list, mem_ctx);
+ progress |= opt_if_safe_cf_list(b, &nif->else_list, mem_ctx);
+ progress |= opt_if_evaluate_condition_use(nif, mem_ctx);
+ break;

[Mesa-dev] [PATCH 1/3] nir: allow nir search type check to see through bcsel

2018-07-18 Thread Timothy Arceri
---
 src/compiler/nir/nir_search.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 28b36b2b863..743ffdf232c 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -73,6 +73,9 @@ src_is_type(nir_src src, nir_alu_type type)
src_is_type(src_alu->src[1].src, nir_type_bool);
  case nir_op_inot:
 return src_is_type(src_alu->src[0].src, nir_type_bool);
+ case nir_op_bcsel:
+return src_is_type(src_alu->src[1].src, nir_type_bool) &&
+   src_is_type(src_alu->src[2].src, nir_type_bool);
  default:
 break;
  }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nir: match constant bools with @bool type

2018-07-18 Thread Timothy Arceri
For simplicity we only allow this to work for scalar types.
---
 src/compiler/nir/nir_search.c | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 743ffdf232c..c0224ca4360 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -49,10 +49,11 @@ static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 };
  * Used for satisfying 'a@type' constraints.
  */
 static bool
-src_is_type(nir_src src, nir_alu_type type)
+src_is_type(nir_alu_src alu_src, nir_alu_type type, unsigned num_components)
 {
assert(type != nir_type_invalid);
 
+   nir_src src = alu_src.src;
if (!src.is_ssa)
   return false;
 
@@ -69,13 +70,18 @@ src_is_type(nir_src src, nir_alu_type type)
  case nir_op_iand:
  case nir_op_ior:
  case nir_op_ixor:
-return src_is_type(src_alu->src[0].src, nir_type_bool) &&
-   src_is_type(src_alu->src[1].src, nir_type_bool);
+return src_is_type(src_alu->src[0], nir_type_bool,
+   nir_ssa_alu_instr_src_components(src_alu, 0)) &&
+   src_is_type(src_alu->src[1], nir_type_bool,
+   nir_ssa_alu_instr_src_components(src_alu, 1));
  case nir_op_inot:
-return src_is_type(src_alu->src[0].src, nir_type_bool);
+return src_is_type(src_alu->src[0], nir_type_bool,
+   nir_ssa_alu_instr_src_components(src_alu, 0));
  case nir_op_bcsel:
-return src_is_type(src_alu->src[1].src, nir_type_bool) &&
-   src_is_type(src_alu->src[2].src, nir_type_bool);
+return src_is_type(src_alu->src[1], nir_type_bool,
+   nir_ssa_alu_instr_src_components(src_alu, 1)) &&
+   src_is_type(src_alu->src[2], nir_type_bool,
+   nir_ssa_alu_instr_src_components(src_alu, 2));
  default:
 break;
  }
@@ -89,6 +95,14 @@ src_is_type(nir_src src, nir_alu_type type)
  return intr->intrinsic == nir_intrinsic_load_front_face ||
 intr->intrinsic == nir_intrinsic_load_helper_invocation;
   }
+   } else if (src.ssa->parent_instr->type == nir_instr_type_load_const) {
+  nir_load_const_instr *load =
+ nir_instr_as_load_const(src.ssa->parent_instr);
+
+  if (type == nir_type_bool && num_components == 1) {
+ return load->value.u32[alu_src.swizzle[0]] == NIR_TRUE ||
+load->value.u32[alu_src.swizzle[0]] == NIR_FALSE;
+  }
}
 
/* don't know */
@@ -162,7 +176,8 @@ match_value(const nir_search_value *value, nir_alu_instr 
*instr, unsigned src,
 return false;
 
  if (var->type != nir_type_invalid &&
- !src_is_type(instr->src[src].src, var->type))
+ !src_is_type(instr->src[src], var->type,
+  nir_ssa_alu_instr_src_components(instr, src)))
 return false;
 
  state->variables_seen |= (1 << var->variable);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] nir: opt_algebraic add some opts to remove spilling in dirt showdown

2018-07-18 Thread Timothy Arceri
These appear in a single Dirt Showdown compute shader. With this
VGPR spilling in the radeonsi NIR backend is reduced.
---
 src/compiler/nir/nir_opt_algebraic.py | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 808679d0bbb..97ffc75d1c3 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -221,8 +221,19 @@ optimizations = [
(('~bcsel', ('fge', a, b), b, a), ('fmin', a, b)),
(('~bcsel', ('fge', b, a), b, a), ('fmax', a, b)),
(('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
+   (('bcsel', a, b, ('inot', 'a@bool')), ('bcsel', a, b, True)),
+   (('bcsel', a, ('inot', 'a@bool'), b), ('bcsel', a, False, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
+   (('bcsel', a, 'a@bool', 'b@bool'), ('ior', a, b)),
(('bcsel', a, True, 'b@bool'), ('ior', a, b)),
+   (('bcsel', a, ('iand', 'a@bool', b), c), ('bcsel', a, b, c)),
+   (('bcsel', a, b, ('iand', ('inot', 'a@bool'), c)), ('bcsel', a, b, c)),
+   (('bcsel', a, ('ior', ('inot', 'a@bool'), b), c), ('bcsel', a, b, c)),
+   (('bcsel', ('fge', a, b), c, ('flt', a, b)), ('bcsel', ('fge', a, b), c, 
True)),
+   (('bcsel', ('fge', a, b), ('ior', ('flt', a, b), c), d), ('bcsel', ('fge', 
a, b), c, d)),
+   (('bcsel', ('iand', 'a@bool', 'b@bool'), ('bcsel', a, b, True), c), 
('bcsel', ('iand', a, b), True, c)),
+   (('ior', ('bcsel', 'a@bool', 'b@bool', 'c@bool'), ('bcsel', a, True, c)), 
('ior', a, c)),
+   (('ior', ('bcsel', 'a@bool', 'b@bool', 'c@bool'), ('ior', a, c)), ('ior', 
a, c)),
(('fmin', a, a), a),
(('fmax', a, a), a),
(('imin', a, a), a),
@@ -287,7 +298,14 @@ optimizations = [
 
(('ior', 'a@bool', ('ieq', a, False)), True),
(('ior', 'a@bool', ('inot', a)), True),
+   (('ior', a, ('iand', ('inot', 'a@bool'), 'b@bool')), ('ior', a, b)),
 
+   (('ior', ('ior', a, b), a), ('ior', a, b)),
+   (('iand', ('iand', a, b), a), ('iand', a, b)),
+
+   (('iand', ('fge', a, b), ('ior', ('flt', a, b), 'c@bool')), ('iand', 
('fge', a, b), c)),
+   (('iand', ('iand', 'a@bool', 'b@bool'), a), ('iand', a, b)),
+   (('iand', ('ior', 'a@bool', 'b@bool'), ('inot', 'a@bool')), ('iand', 
('inot', 'a@bool'), b)),
(('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', 'a@32', 
'b@32'), 0)),
 
# These patterns can result when (a < b || a < c) => (a < min(b, c))
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 94747] Convert phi nodes to logical operations

2018-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=94747

--- Comment #4 from Timothy Arceri  ---
(In reply to Ian Romanick from comment #3)
> (In reply to Timothy Arceri from comment #2)
> > As (In reply to Jason Ekstrand from comment #1)
> > > The code for this already exists.  It's called nir_opt_peephole_select.  
> > > The
> > > only problem is that it only triggers if both sides of the if are empty. 
> > > I've been wanting to add some sort of heuristic to it for some time now. 
> > > The only problem is that it's really back-end specific.
> > 
> > Although i965 doesn't use it I believe this is otherwise fixed by the
> > following commit. We should probably close this bug:
> 
> Thanks for reminding me about these "don't forget" bugs that I filed. :)  I
> clearly forgot about them anyway.
> 
> I have a patch for this that has been on the list for almost 3 weeks.
> 
> https://patchwork.freedesktop.org/patch/233182/
> 
> That will just eliminate the flow control (but it might not since there are
> 2 instructions... I wish I could remember which shader this was).  Even then
> we'll get something like
> 
> ssa_177 = ssa_153 ? true : abs(ssa_171 - ssa_135) >= ssa_145
> 
> when we really want (ssa_153 || (abs(ssa_171 - ssa_135) >= ssa_145)).  I
> have another patch series in progress that will take care of that.

I suspect we have been working on similar changes. I've just sent a series that
should cover this also.

https://patchwork.freedesktop.org/series/46827/

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] nir: Do not use continue block after removing it.

2018-07-18 Thread Jason Ekstrand
On Sat, Jul 14, 2018 at 4:26 PM Bas Nieuwenhuizen 
wrote:

> Reinserting code directly before a jump means the block gets split
> and merged, removing the original block and replacing it in the
> process.
>
> Hence keeping a pointer to the continue block over a reinsert
> causes issues.
>
> This code changes nir_opt_if to simply look for the new continue
> block.
>
> CC: 18.1 
> ---
>  src/compiler/nir/nir_opt_if.c | 33 +++--
>  1 file changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index a52de120ad6..658ff654169 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -26,6 +26,28 @@
>  #include "nir_control_flow.h"
>  #include "nir_loop_analyze.h"
>
> +/**
> + * Gets the single block that jumps back to the loop header. Already
> assumes
> + * there is exactly one such block.
> + */
> +static nir_block* find_continue_block(nir_loop *loop)
>

The return type goes on its own line.


> +{
> +   nir_block *header_block = nir_loop_first_block(loop);
> +   nir_block *prev_block =
> +  nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
> +
> +   assert(header_block->predecessors->entries == 2);
> +
> +   nir_block *continue_block = NULL;
> +   struct set_entry *pred_entry;
> +   set_foreach(header_block->predecessors, pred_entry) {
> +  if (pred_entry->key != prev_block)
> + continue_block = (void *)pred_entry->key;
>

Just return right here.  No need to keep looping or store continue_block
off in a variable.


> +   }
> +
> +   return continue_block;
>

Then this becomes unreachable.

With those nits fixed,

Reviewed-by: Jason Ekstrand 


> +}
> +
>  /**
>   * This optimization detects if statements at the tops of loops where the
>   * condition is a phi node of two constants and moves half of the if to
> above
> @@ -97,12 +119,7 @@ opt_peel_loop_initial_if(nir_loop *loop)
> if (header_block->predecessors->entries != 2)
>return false;
>
> -   nir_block *continue_block = NULL;
> -   struct set_entry *pred_entry;
> -   set_foreach(header_block->predecessors, pred_entry) {
> -  if (pred_entry->key != prev_block)
> - continue_block = (void *)pred_entry->key;
> -   }
> +   nir_block *continue_block = find_continue_block(loop);
>
> nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
> if (!if_node || if_node->type != nir_cf_node_if)
> @@ -193,6 +210,10 @@ opt_peel_loop_initial_if(nir_loop *loop)
> nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
>
> nir_cf_reinsert(&header, nir_after_block_before_jump(continue_block));
> +
> +   /* Get continue block again as the previous reinsert might have
> removed the block. */
> +   continue_block = find_continue_block(loop);
> +
> nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
>  nir_after_cf_list(continue_list));
> nir_cf_reinsert(&tmp, nir_after_block_before_jump(continue_block));
> --
> 2.18.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] radeonsi: report supported EQAA combinations from is_format_supported

2018-07-18 Thread Marek Olšák
From: Marek Olšák 

Framebuffer without attachments now supports 16 samples.
---
 src/gallium/drivers/radeonsi/si_state.c | 36 ++---
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index a1b00cb979a..fc1ec83931f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2143,6 +2143,9 @@ static boolean si_is_format_supported(struct pipe_screen 
*screen,
return false;
}
 
+   if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))
+   return false;
+
if (sample_count > 1) {
if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
return false;
@@ -2150,25 +2153,26 @@ static boolean si_is_format_supported(struct 
pipe_screen *screen,
if (usage & PIPE_BIND_SHADER_IMAGE)
return false;
 
-   if (sample_count != storage_sample_count)
+   /* Only power-of-two sample counts are supported. */
+   if (!util_is_power_of_two_or_zero(sample_count) ||
+   !util_is_power_of_two_or_zero(storage_sample_count))
return false;
 
-   switch (sample_count) {
-   case 2:
-   case 4:
-   case 8:
-   break;
-   case 16:
-   /* Allow resource_copy_region with nr_samples == 16. */
-   if (sscreen->eqaa_force_coverage_samples == 16 &&
-   !util_format_is_depth_or_stencil(format))
-   return true;
-   if (format == PIPE_FORMAT_NONE)
-   return true;
-   else
+   /* MSAA support without framebuffer attachments. */
+   if (format == PIPE_FORMAT_NONE && sample_count <= 16)
+   return true;
+
+   if (!sscreen->info.has_eqaa_surface_allocator ||
+   util_format_is_depth_or_stencil(format)) {
+   /* Color without EQAA or depth/stencil. */
+   if (sample_count > 8 ||
+   sample_count != storage_sample_count)
+   return false;
+   } else {
+   /* Color with EQAA. */
+   if (sample_count > 16 ||
+   storage_sample_count > 8)
return false;
-   default:
-   return false;
}
}
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] gallium: add storage_sample_count parameter into is_format_supported

2018-07-18 Thread Marek Olšák
From: Marek Olšák 

---
 .../auxiliary/driver_ddebug/dd_screen.c   |  3 ++-
 src/gallium/auxiliary/driver_noop/noop_pipe.c |  4 ++-
 .../auxiliary/driver_rbug/rbug_screen.c   |  2 ++
 .../auxiliary/driver_trace/tr_screen.c|  3 ++-
 src/gallium/auxiliary/hud/font.c  |  2 +-
 src/gallium/auxiliary/hud/hud_context.c   |  2 +-
 src/gallium/auxiliary/postprocess/pp_init.c   |  6 ++---
 src/gallium/auxiliary/postprocess/pp_mlaa.c   |  2 +-
 .../auxiliary/postprocess/pp_program.c|  2 +-
 src/gallium/auxiliary/util/u_blit.c   |  1 +
 src/gallium/auxiliary/util/u_blitter.c|  9 ---
 src/gallium/auxiliary/util/u_gen_mipmap.c |  2 +-
 .../auxiliary/util/u_threaded_context.c   |  3 ++-
 src/gallium/auxiliary/util/u_vbuf.c   |  2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c  |  8 +++---
 src/gallium/auxiliary/vl/vl_video_buffer.c|  4 +--
 src/gallium/docs/source/screen.rst|  3 +++
 src/gallium/drivers/etnaviv/etnaviv_screen.c  |  7 +-
 .../drivers/freedreno/a2xx/fd2_screen.c   |  4 +++
 .../drivers/freedreno/a3xx/fd3_screen.c   |  4 +++
 .../drivers/freedreno/a4xx/fd4_screen.c   |  4 +++
 .../drivers/freedreno/a5xx/fd5_screen.c   |  4 +++
 .../drivers/freedreno/freedreno_resource.c|  1 +
 src/gallium/drivers/i915/i915_screen.c|  4 +++
 src/gallium/drivers/i915/i915_screen.h|  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c  |  4 +++
 .../drivers/nouveau/nv30/nv30_screen.c|  4 +++
 .../drivers/nouveau/nv50/nv50_screen.c|  4 +++
 .../drivers/nouveau/nv50/nv50_shader_state.c  |  1 +
 .../drivers/nouveau/nvc0/nvc0_screen.c|  4 +++
 src/gallium/drivers/r300/r300_blit.c  |  6 +++--
 src/gallium/drivers/r300/r300_screen.c|  4 +++
 src/gallium/drivers/r600/evergreen_state.c|  4 +++
 src/gallium/drivers/r600/r600_pipe.h  |  2 ++
 src/gallium/drivers/r600/r600_state.c |  4 +++
 src/gallium/drivers/r600/r600_texture.c   |  2 +-
 src/gallium/drivers/radeonsi/si_clear.c   |  2 +-
 src/gallium/drivers/radeonsi/si_state.c   |  4 +++
 src/gallium/drivers/softpipe/sp_screen.c  |  4 +++
 src/gallium/drivers/svga/svga_format.c|  4 +++
 src/gallium/drivers/svga/svga_format.h|  1 +
 src/gallium/drivers/svga/svga_pipe_clear.c|  1 +
 .../drivers/svga/svga_resource_texture.c  |  3 +++
 src/gallium/drivers/swr/swr_screen.cpp|  4 +++
 src/gallium/drivers/tegra/tegra_screen.c  |  4 ++-
 src/gallium/drivers/v3d/v3d_screen.c  |  4 +++
 src/gallium/drivers/vc4/vc4_screen.c  |  4 +++
 src/gallium/drivers/virgl/virgl_screen.c  |  4 +++
 src/gallium/include/pipe/p_screen.h   |  1 +
 .../state_trackers/clover/core/format.cpp |  2 +-
 src/gallium/state_trackers/dri/dri2.c |  5 ++--
 .../state_trackers/dri/dri_query_renderer.c   |  2 +-
 src/gallium/state_trackers/dri/dri_screen.c   | 16 ++--
 src/gallium/state_trackers/glx/xlib/xm_api.c  |  3 ++-
 .../state_trackers/nine/basetexture9.c|  2 +-
 src/gallium/state_trackers/nine/device9.c |  2 ++
 src/gallium/state_trackers/nine/nine_pipe.h   |  2 +-
 src/gallium/state_trackers/nine/surface9.c|  2 +-
 src/gallium/state_trackers/va/subpicture.c|  2 +-
 src/gallium/state_trackers/vdpau/query.c  | 14 +--
 .../state_trackers/vdpau/vdpau_private.h  |  5 ++--
 .../state_trackers/wgl/stw_pixelformat.c  |  3 ++-
 src/gallium/state_trackers/xa/xa_context.c|  2 +-
 src/gallium/state_trackers/xa/xa_renderer.c   |  2 +-
 src/gallium/state_trackers/xa/xa_tracker.c| 10 
 src/gallium/state_trackers/xvmc/subpicture.c  |  8 +++---
 src/gallium/tests/trivial/compute.c   |  4 +--
 .../winsys/sw/wrapper/wrapper_sw_winsys.c |  2 +-
 src/mesa/state_tracker/st_atom_framebuffer.c  |  2 +-
 src/mesa/state_tracker/st_cb_bitmap.c |  6 ++---
 src/mesa/state_tracker/st_cb_drawpixels.c |  4 ++-
 src/mesa/state_tracker/st_cb_eglimage.c   | 15 ++-
 src/mesa/state_tracker/st_cb_fbo.c|  6 +++--
 src/mesa/state_tracker/st_cb_readpixels.c |  4 +--
 src/mesa/state_tracker/st_cb_texture.c| 25 +++
 src/mesa/state_tracker/st_context.c   |  4 +--
 src/mesa/state_tracker/st_extensions.c| 16 ++--
 src/mesa/state_tracker/st_format.c| 12 -
 src/mesa/state_tracker/st_manager.c   |  1 +
 src/mesa/state_tracker/st_texture.c   |  2 +-
 80 files changed, 239 insertions(+), 112 deletions(-)

diff --git a/src/gallium/auxiliary/driver_ddebug/dd_screen.c 
b/src/gallium/auxiliary/driver_ddebug/dd_screen.c
index 5f922d884fe..a89af8a8d62 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_screen.c
+++ b/src/gallium/auxiliary/driver_ddebug/dd_screen.c
@@ -147,12 +147,13 @@ dd_screen_is_format_supported(struct pipe_screen *_screen,
   enum pipe_format format

[Mesa-dev] [PATCH 4/5] radeonsi: use storage_samples instead of color_samples in most places

2018-07-18 Thread Marek Olšák
From: Marek Olšák 

and use pipe_resource::nr_storage_samples instead of
r600_texture::num_color_samples.
---
 src/amd/common/ac_surface.c   |  6 +--
 src/amd/common/ac_surface.h   |  2 +-
 src/amd/vulkan/radv_image.c   |  2 +-
 src/gallium/drivers/r600/r600_texture.c   |  4 +-
 src/gallium/drivers/radeon/radeon_winsys.h|  1 -
 src/gallium/drivers/radeonsi/si_clear.c   |  4 +-
 src/gallium/drivers/radeonsi/si_pipe.h|  1 -
 src/gallium/drivers/radeonsi/si_state.c   | 19 ++
 src/gallium/drivers/radeonsi/si_texture.c | 38 ++-
 .../winsys/amdgpu/drm/amdgpu_surface.c|  3 +-
 .../winsys/radeon/drm/radeon_drm_surface.c|  6 +--
 11 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 959e7b3d735..afdae1971e9 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -262,7 +262,7 @@ static int surf_config_sanity(const struct ac_surf_config 
*config,
}
 
if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
-   switch (config->info.color_samples) {
+   switch (config->info.storage_samples) {
case 0:
case 1:
case 2:
@@ -694,7 +694,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
AddrDccIn.numSamples = AddrSurfInfoIn.numFrags =
-   MAX2(1, config->info.color_samples);
+   MAX2(1, config->info.storage_samples);
}
 
/* Set the micro tile type. */
@@ -1433,7 +1433,7 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
 
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
-   AddrSurfInfoIn.numFrags = MAX2(1, config->info.color_samples);
+   AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
 
/* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 6d95e610a59..8ba964e64ec 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -226,7 +226,7 @@ struct ac_surf_info {
uint32_t height;
uint32_t depth;
uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples 
*/
-   uint8_t color_samples; /* For color: color samples */
+   uint8_t storage_samples; /* For color: allocated samples */
uint8_t levels;
uint8_t num_channels; /* heuristic for displayability */
uint16_t array_size;
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 826f898d281..f1c78e8115d 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -937,7 +937,7 @@ radv_image_create(VkDevice _device,
image->info.height = pCreateInfo->extent.height;
image->info.depth = pCreateInfo->extent.depth;
image->info.samples = pCreateInfo->samples;
-   image->info.color_samples = pCreateInfo->samples;
+   image->info.storage_samples = pCreateInfo->samples;
image->info.array_size = pCreateInfo->arrayLayers;
image->info.levels = pCreateInfo->mipLevels;
image->info.num_channels = 
vk_format_get_nr_components(pCreateInfo->format);
diff --git a/src/gallium/drivers/r600/r600_texture.c 
b/src/gallium/drivers/r600/r600_texture.c
index 9e2d5c7fc9e..08db6bab04c 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -245,7 +245,7 @@ static int r600_init_surface(struct r600_common_screen 
*rscreen,
if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
-   r = rscreen->ws->surface_init(rscreen->ws, ptex, ptex->nr_samples,
+   r = rscreen->ws->surface_init(rscreen->ws, ptex,
  flags, bpe, array_mode, surface);
if (r) {
return r;
@@ -616,7 +616,7 @@ void r600_texture_get_fmask_info(struct r600_common_screen 
*rscreen,
bpe *= 2;
}
 
-   if (rscreen->ws->surface_init(rscreen->ws, &templ, templ.nr_samples,
+   if (rscreen->ws->surface_init(rscreen->ws, &templ,
  flags, bpe, RADEON_SURF_MODE_2D, &fmask)) 
{
R600_ERR("Got error in surface_init while allocating FMASK.\n");
return;
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 406cfe46da3..99a793f9028 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -643,7 +643,6 @@ struct radeon_winsys {
  */
 int (*surface_init)(struct radeon_winsys *ws,
 

[Mesa-dev] [PATCH 1/5] gallium: add PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS

2018-07-18 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/docs/source/screen.rst   | 11 +++
 src/gallium/drivers/etnaviv/etnaviv_screen.c |  1 +
 src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
 src/gallium/drivers/i915/i915_screen.c   |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
 src/gallium/drivers/r300/r300_screen.c   |  1 +
 src/gallium/drivers/r600/r600_pipe.c |  1 +
 src/gallium/drivers/radeonsi/si_get.c|  2 ++
 src/gallium/drivers/softpipe/sp_screen.c |  1 +
 src/gallium/drivers/svga/svga_screen.c   |  1 +
 src/gallium/drivers/swr/swr_screen.cpp   |  1 +
 src/gallium/drivers/v3d/v3d_screen.c |  1 +
 src/gallium/drivers/vc4/vc4_screen.c |  1 +
 src/gallium/drivers/virgl/virgl_screen.c |  1 +
 src/gallium/include/pipe/p_defines.h |  1 +
 18 files changed, 29 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index d7ce521c2c3..5fc49e24403 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -411,6 +411,17 @@ The integer capabilities:
 * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on combined shader
   output resources (images + buffers + fragment outputs). If 0 the state
   tracker works it out.
+* ``PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS``: This determines limitations
+  on the number of samples that framebuffer attachments can have.
+  Possible values:
+0: color.nr_samples == zs.nr_samples == color.nr_storage_samples
+   (standard MSAA quality)
+1: color.nr_samples >= zs.nr_samples == color.nr_storage_samples
+   (enhanced MSAA quality)
+2: color.nr_samples >= zs.nr_samples >= color.nr_storage_samples
+   (full flexibility in tuning MSAA quality and performance)
+  All color attachments must have the same number of samples and the same
+  number of storage samples.
 * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
   Whether pipe_vertex_buffer::buffer_offset is treated as signed. The u_vbuf
   module needs this for optimal performance in workstation applications.
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 04c7a873de6..d4abf99947e 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -270,6 +270,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+   case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
case PIPE_CAP_FENCE_SIGNAL:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 65ad64c9efa..40b7e0786f2 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -354,6 +354,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+   case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_FENCE_SIGNAL:
case PIPE_CAP_CONSTBUF0_FLAGS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index a03840a3aa4..132a9fce034 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -328,6 +328,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+   case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
case PIPE_CAP_FENCE_SIGNAL:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 9921a941d58..0d0a3454375 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -359,6 +359,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+   case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
case PIPE_CAP_FENCE_SIGNAL:
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
ind

[Mesa-dev] [PATCH 2/5] gallium: add pipe_resource::nr_storage_samples, and set it same as nr_samples

2018-07-18 Thread Marek Olšák
From: Marek Olšák 

---
 .../auxiliary/driver_trace/tr_dump_state.c|  1 +
 src/gallium/auxiliary/postprocess/pp_mlaa.c   |  2 +-
 src/gallium/auxiliary/util/u_dump_state.c |  1 +
 src/gallium/auxiliary/util/u_tests.c  |  1 +
 src/gallium/docs/source/screen.rst| 23 +--
 src/gallium/include/pipe/p_state.h| 18 ---
 src/gallium/state_trackers/dri/dri2.c |  3 +++
 src/gallium/state_trackers/glx/xlib/xm_st.c   |  1 +
 src/gallium/state_trackers/nine/buffer9.c |  1 +
 .../state_trackers/nine/cubetexture9.c|  1 +
 src/gallium/state_trackers/nine/device9.c |  2 +-
 src/gallium/state_trackers/nine/surface9.c|  2 ++
 src/gallium/state_trackers/nine/swapchain9.c  |  3 +++
 src/gallium/state_trackers/nine/texture9.c|  1 +
 src/gallium/state_trackers/nine/volume9.c |  1 +
 .../state_trackers/nine/volumetexture9.c  |  1 +
 src/gallium/state_trackers/wgl/stw_st.c   |  1 +
 src/gallium/tests/graw/clear.c|  1 -
 src/gallium/tests/graw/fs-test.c  |  2 --
 src/gallium/tests/graw/graw_util.h|  3 ---
 src/gallium/tests/graw/gs-test.c  |  3 ---
 src/gallium/tests/graw/quad-sample.c  |  2 --
 src/gallium/tests/graw/shader-leak.c  |  1 -
 src/gallium/tests/graw/tri-gs.c   |  1 -
 src/gallium/tests/graw/tri-instanced.c|  1 -
 src/gallium/tests/graw/vs-test.c  |  3 ---
 src/mesa/state_tracker/st_cb_copyimage.c  |  7 +++---
 src/mesa/state_tracker/st_cb_fbo.c|  2 ++
 src/mesa/state_tracker/st_cb_texture.c|  2 ++
 src/mesa/state_tracker/st_texture.c   |  1 +
 30 files changed, 65 insertions(+), 27 deletions(-)

diff --git a/src/gallium/auxiliary/driver_trace/tr_dump_state.c 
b/src/gallium/auxiliary/driver_trace/tr_dump_state.c
index 2d12720ddd9..46fa5747460 100644
--- a/src/gallium/auxiliary/driver_trace/tr_dump_state.c
+++ b/src/gallium/auxiliary/driver_trace/tr_dump_state.c
@@ -69,6 +69,7 @@ void trace_dump_resource_template(const struct pipe_resource 
*templat)
 
trace_dump_member(uint, templat, last_level);
trace_dump_member(uint, templat, nr_samples);
+   trace_dump_member(uint, templat, nr_storage_samples);
trace_dump_member(uint, templat, usage);
trace_dump_member(uint, templat, bind);
trace_dump_member(uint, templat, flags);
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c 
b/src/gallium/auxiliary/postprocess/pp_mlaa.c
index 610cedbd1b3..f003ee75fd5 100644
--- a/src/gallium/auxiliary/postprocess/pp_mlaa.c
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -240,7 +240,7 @@ pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned 
int n,
res.width0 = res.height0 = 165;
res.bind = PIPE_BIND_SAMPLER_VIEW;
res.usage = PIPE_USAGE_DEFAULT;
-   res.depth0 = res.array_size = res.nr_samples = 1;
+   res.depth0 = res.array_size = res.nr_samples = res.nr_storage_samples = 1;
 
if (!ppq->p->screen->is_format_supported(ppq->p->screen, res.format,
 res.target, 1, res.bind))
diff --git a/src/gallium/auxiliary/util/u_dump_state.c 
b/src/gallium/auxiliary/util/u_dump_state.c
index b68de134275..286d5fad75b 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -319,6 +319,7 @@ util_dump_resource(FILE *stream, const struct pipe_resource 
*state)
 
util_dump_member(stream, uint, state, last_level);
util_dump_member(stream, uint, state, nr_samples);
+   util_dump_member(stream, uint, state, nr_storage_samples);
util_dump_member(stream, uint, state, usage);
util_dump_member(stream, uint, state, bind);
util_dump_member(stream, uint, state, flags);
diff --git a/src/gallium/auxiliary/util/u_tests.c 
b/src/gallium/auxiliary/util/u_tests.c
index bae3fa111a4..f8c001813c1 100644
--- a/src/gallium/auxiliary/util/u_tests.c
+++ b/src/gallium/auxiliary/util/u_tests.c
@@ -55,6 +55,7 @@ util_create_texture2d(struct pipe_screen *screen, unsigned 
width,
templ.depth0 = 1;
templ.array_size = 1;
templ.nr_samples = num_samples;
+   templ.nr_storage_samples = num_samples;
templ.format = format;
templ.usage = PIPE_USAGE_DEFAULT;
templ.bind = PIPE_BIND_SAMPLER_VIEW |
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 5fc49e24403..ecff0800bf8 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -789,8 +789,27 @@ For cube maps this must be 6, for other textures 1.
 
 **last_level** the last mip map level present.
 
-**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
-which isn't multisampled.
+**nr_samples**: Number of samples determining quality, driving the rasterizer,
+shading, and framebuffer. It is the number of samples seen by the whole
+graphics pipeline. 0 and 1 specify a resource which isn't multisampled.
+
+**nr_storage_samples**: Only color buf

[Mesa-dev] [PATCH 0/5] Gallium & RadeonSI: EQAA (mixed samples & storage sample counts)

2018-07-18 Thread Marek Olšák
Hi,

This implements the Gallium part of EQAA. I'd rather get this merged
sooner than later because of the amount of trivial but vast changes in
Gallium.

Please review.

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600: enable tess_input_info for TES

2018-07-18 Thread Dave Airlie
From: Dave Airlie 

There might be a nicer way to do this, but this is at least correct.

This fixes:
KHR-GL44.tessellation_shader.single.max_patch_vertices
KHR-GL44.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_PatchVerticesIn
---
 src/gallium/drivers/r600/r600_shader.c | 20 ++--
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 4b91da3..6effa31be7b 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1673,19 +1673,11 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
} else if 
(ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
TGSI_SEMANTIC_TESSOUTER) {
r600_src->sel = 2;
} else if 
(ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
TGSI_SEMANTIC_VERTICESIN) {
-   if (ctx->type == PIPE_SHADER_TESS_CTRL) {
-   r600_src->sel = ctx->tess_input_info;
-   r600_src->swizzle[0] = 2;
-   r600_src->swizzle[1] = 2;
-   r600_src->swizzle[2] = 2;
-   r600_src->swizzle[3] = 2;
-   } else {
-   r600_src->sel = ctx->tess_input_info;
-   r600_src->swizzle[0] = 3;
-   r600_src->swizzle[1] = 3;
-   r600_src->swizzle[2] = 3;
-   r600_src->swizzle[3] = 3;
-   }
+   r600_src->sel = ctx->tess_input_info;
+   r600_src->swizzle[0] = 2;
+   r600_src->swizzle[1] = 2;
+   r600_src->swizzle[2] = 2;
+   r600_src->swizzle[3] = 2;
} else if (ctx->type == PIPE_SHADER_TESS_CTRL && 
ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
TGSI_SEMANTIC_PRIMID) {
r600_src->sel = 0;
r600_src->swizzle[0] = 0;
@@ -3559,7 +3551,7 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.tess_input_info = ++regno;
ctx.tess_output_info = ++regno;
} else if (ctx.type == PIPE_SHADER_TESS_EVAL) {
-   ctx.tess_input_info = 0;
+   ctx.tess_input_info = ++regno;
ctx.tess_output_info = ++regno;
} else if (ctx.type == PIPE_SHADER_GEOMETRY) {
ctx.gs_export_gpr_tregs[0] = ++regno;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >