Re: [Mesa-dev] [PATCH 13/16] meta: Track temporary textures using gl_texture_object instead of GL API object handle

2017-12-21 Thread Tapani Pälli



On 19.12.2017 02:14, Ian Romanick wrote:

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
  src/mesa/drivers/common/meta.c  | 43 ++---
  src/mesa/drivers/common/meta.h  |  2 +-
  src/mesa/drivers/common/meta_blit.c |  8 ---
  3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 52d959a..be490d5 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -1228,6 +1228,8 @@ invert_z(GLfloat normZ)
  static void
  init_temp_texture(struct gl_context *ctx, struct temp_texture *tex)
  {
+   GLuint texObj;
+


this declaration could've been moved closer to the usage but since it's 
removed later ... it does not really matter :)



 /* prefer texture rectangle */
 if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle) {
tex->Target = GL_TEXTURE_RECTANGLE;
@@ -1243,16 +1245,22 @@ init_temp_texture(struct gl_context *ctx, struct 
temp_texture *tex)
 tex->MinSize = 16;  /* 16 x 16 at least */
 assert(tex->MaxSize > 0);
  
-   _mesa_GenTextures(1, &tex->TexObj);

+   _mesa_GenTextures(1, &texObj);
+   tex->tex_obj = NULL;
+
+   if (texObj == 0)
+  return;
+
+   tex->tex_obj = _mesa_lookup_texture(ctx, texObj);
  }
  
  static void

  cleanup_temp_texture(struct temp_texture *tex)
  {
-   if (!tex->TexObj)
+   if (tex->tex_obj == NULL)
   return;
-   _mesa_DeleteTextures(1, &tex->TexObj);
-   tex->TexObj = 0;
+   _mesa_DeleteTextures(1, &tex->tex_obj->Name);
+   tex->tex_obj = NULL;
  }
  
  
@@ -1265,7 +1273,7 @@ _mesa_meta_get_temp_texture(struct gl_context *ctx)

  {
 struct temp_texture *tex = &ctx->Meta->TempTex;
  
-   if (!tex->TexObj) {

+   if (tex->tex_obj == NULL) {
init_temp_texture(ctx, tex);
 }
  
@@ -1283,7 +1291,7 @@ get_bitmap_temp_texture(struct gl_context *ctx)

  {
 struct temp_texture *tex = &ctx->Meta->Bitmap.Tex;
  
-   if (!tex->TexObj) {

+   if (tex->tex_obj == NULL) {
init_temp_texture(ctx, tex);
 }
  
@@ -1299,7 +1307,7 @@ _mesa_meta_get_temp_depth_texture(struct gl_context *ctx)

  {
 struct temp_texture *tex = &ctx->Meta->Blit.depthTex;
  
-   if (!tex->TexObj) {

+   if (tex->tex_obj == NULL) {
init_temp_texture(ctx, tex);
 }
  
@@ -1378,9 +1386,11 @@ _mesa_meta_setup_copypix_texture(struct gl_context *ctx,

  {
 bool newTex;
  
-   _mesa_BindTexture(tex->Target, tex->TexObj);

-   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, filter);
-   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, filter);
+   _mesa_BindTexture(tex->Target, tex->tex_obj->Name);
+   _mesa_texture_parameteriv(ctx, tex->tex_obj, GL_TEXTURE_MIN_FILTER,
+ (GLint *) &filter, false);
+   _mesa_texture_parameteriv(ctx, tex->tex_obj, GL_TEXTURE_MAG_FILTER,
+ (GLint *) &filter, false);
 _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
  
 newTex = _mesa_meta_alloc_texture(tex, width, height, intFormat);

@@ -1422,9 +1432,16 @@ _mesa_meta_setup_drawpix_texture(struct gl_context *ctx,
   GLenum format, GLenum type,
   const GLvoid *pixels)
  {
-   _mesa_BindTexture(tex->Target, tex->TexObj);
-   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+   /* GLint so the compiler won't complain about type signedness mismatch in
+* the call to _mesa_texture_parameteriv below.
+*/
+   static const GLint filter = GL_NEAREST;
+
+   _mesa_BindTexture(tex->Target, tex->tex_obj->Name);
+   _mesa_texture_parameteriv(ctx, tex->tex_obj, GL_TEXTURE_MIN_FILTER, &filter,
+ false);
+   _mesa_texture_parameteriv(ctx, tex->tex_obj, GL_TEXTURE_MAG_FILTER, &filter,
+ false);
 _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
  
 /* copy pixel data to texture */

diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index 252b236..6d51854 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -200,7 +200,7 @@ struct save_state
   */
  struct temp_texture
  {
-   GLuint TexObj;
+   struct gl_texture_object *tex_obj;
 GLenum Target; /**< GL_TEXTURE_2D or GL_TEXTURE_RECTANGLE */
 GLsizei MinSize;   /**< Min texture size to allocate */
 GLsizei MaxSize;   /**< Max possible texture size */
diff --git a/src/mesa/drivers/common/meta_blit.c 
b/src/mesa/drivers/common/meta_blit.c
index 0c08109..95dfa64 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -674,7 +674,7 @@ blitframebuffer_texture(struct gl_context *ctx,
}
  
srcLevel = 0;

-  texObj = _mesa_lookup_texture(ctx, meta_temp_texture->TexObj);
+  texObj = meta_temp_texture->

Re: [Mesa-dev] [PATCH 16/16] meta: Don't pollute the texture namespace

2017-12-21 Thread Tapani Pälli

series is
Reviewed-by: Tapani Pälli 

On 19.12.2017 02:14, Ian Romanick wrote:

From: Ian Romanick 

tl;dr: For many types of GL object, we can *NEVER* use the Gen function.

In OpenGL ES (all versions!) and OpenGL compatibility profile,
applications don't have to call Gen functions.  The GL spec is very
clear about how you can mix-and-match generated names and non-generated
names: you can use any name you want for a particular object type until
you call the Gen function for that object type.

Here's the problem scenario:

  - Application calls a meta function that generates a name.  The first
Gen will probably return 1.

  - Application decides to use the same name for an object of the same
type without calling Gen.  Many demo programs use names 1, 2, 3,
etc. without calling Gen.

  - Application calls the meta function again, and the meta function
replaces the data.  The application's data is lost, and the app
fails.  Have fun debugging that.

Signed-off-by: Ian Romanick 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92363
---
  src/mesa/drivers/common/meta.c  | 21 ++---
  src/mesa/drivers/common/meta_blit.c | 18 --
  2 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 55d803f..f4830ec 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -98,7 +98,8 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, bool 
glsl);
  static struct blit_shader *
  choose_blit_shader(GLenum target, struct blit_shader_table *table);
  
-static void cleanup_temp_texture(struct temp_texture *tex);

+static void cleanup_temp_texture(struct gl_context *ctx,
+ struct temp_texture *tex);
  static void meta_glsl_clear_cleanup(struct gl_context *ctx,
  struct clear_state *clear);
  static void meta_decompress_cleanup(struct gl_context *ctx,
@@ -418,7 +419,7 @@ _mesa_meta_free(struct gl_context *ctx)
 _mesa_meta_glsl_blit_cleanup(ctx, &ctx->Meta->Blit);
 meta_glsl_clear_cleanup(ctx, &ctx->Meta->Clear);
 _mesa_meta_glsl_generate_mipmap_cleanup(ctx, &ctx->Meta->Mipmap);
-   cleanup_temp_texture(&ctx->Meta->TempTex);
+   cleanup_temp_texture(ctx, &ctx->Meta->TempTex);
 meta_decompress_cleanup(ctx, &ctx->Meta->Decompress);
 meta_drawpix_cleanup(ctx, &ctx->Meta->DrawPix);
 if (old_context)
@@ -1228,8 +1229,6 @@ invert_z(GLfloat normZ)
  static void
  init_temp_texture(struct gl_context *ctx, struct temp_texture *tex)
  {
-   GLuint texObj;
-
 /* prefer texture rectangle */
 if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle) {
tex->Target = GL_TEXTURE_RECTANGLE;
@@ -1245,21 +1244,13 @@ init_temp_texture(struct gl_context *ctx, struct 
temp_texture *tex)
 tex->MinSize = 16;  /* 16 x 16 at least */
 assert(tex->MaxSize > 0);
  
-   _mesa_CreateTextures(tex->Target, 1, &texObj);

-   tex->tex_obj = NULL;
-
-   if (texObj == 0)
-  return;
-
-   tex->tex_obj = _mesa_lookup_texture(ctx, texObj);
+   tex->tex_obj = ctx->Driver.NewTextureObject(ctx, 0xDEADBEEF, tex->Target);
  }
  
  static void

-cleanup_temp_texture(struct temp_texture *tex)
+cleanup_temp_texture(struct gl_context *ctx, struct temp_texture *tex)
  {
-   if (tex->tex_obj == NULL)
- return;
-   _mesa_DeleteTextures(1, &tex->tex_obj->Name);
+   _mesa_delete_nameless_texture(ctx, tex->tex_obj);
 tex->tex_obj = NULL;
  }
  
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c

index 95dfa64..496ef28 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -879,9 +879,7 @@ _mesa_meta_fb_tex_blit_end(struct gl_context *ctx, GLenum 
target,
 _mesa_bind_sampler(ctx, ctx->Texture.CurrentUnit, blit->samp_obj_save);
 _mesa_reference_sampler_object(ctx, &blit->samp_obj_save, NULL);
 _mesa_reference_sampler_object(ctx, &blit->samp_obj, NULL);
-
-   if (blit->temp_tex_obj)
-  _mesa_DeleteTextures(1, &blit->temp_tex_obj->Name);
+   _mesa_delete_nameless_texture(ctx, blit->temp_tex_obj);
  }
  
  struct gl_texture_object *

@@ -890,20 +888,14 @@ _mesa_meta_texture_object_from_renderbuffer(struct 
gl_context *ctx,
  {
 struct gl_texture_image *texImage;
 struct gl_texture_object *texObj;
-   GLuint tempTex;
 const GLenum target = rb->NumSamples > 1
? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
  
-   tempTex = 0;

-   _mesa_CreateTextures(target, 1, &tempTex);
-   if (tempTex == 0)
-  return NULL;
-
-   texObj = _mesa_lookup_texture(ctx, tempTex);
+   texObj = ctx->Driver.NewTextureObject(ctx, 0xDEADBEEF, target);
 texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
  
 if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, texImage)) {

-  _mesa_DeleteTextures(1, &tempTex);
+  _mesa_delete_nameless_texture(ctx, texObj);
return NULL;
 

[Mesa-dev] [PATCH] freedreno: set missing internal_format when importing texture

2017-12-21 Thread Ilia Mirkin
Fixes running piglits without -fbo. Probably lots of other stuff too.

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/freedreno/freedreno_resource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index df00b514396..920e8736a81 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -840,6 +840,7 @@ fd_resource_from_handle(struct pipe_screen *pscreen,
if (!rsc->bo)
goto fail;
 
+   rsc->internal_format = tmpl->format;
rsc->cpp = util_format_get_blocksize(tmpl->format);
slice->pitch = handle->stride / rsc->cpp;
slice->offset = handle->offset;
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa 1/3] radv: Add VK_KHR_display, VK_EXT_direct_mode_display and VK_EXT_acquire_xlib_display [v3]

2017-12-21 Thread Keith Packard
Implements VK_KHR_display, VK_EXT_direct_mode_display and
VK_EXT_acquire_xlib_display using DRM/KMS.

This uses X leases to provide the same API as nVidia, allowing an
application to discover available display resources and acquire one
from the X server using RandR leases.

Because Linux DRM does not allow access to KMS resources through a
render node, applications must use vkGetRandROutputDisplayEXT to
access a suitable VkDisplayKHR object as
vkGetPhysicalDeviceDisplayPropertiesKHR will not be able to see the
required kernel objects.

Contrarywise, nVidia masks off some RandR outputs from X (such as
head-mounted-displays), those are *only* visible through
vkGetPhysicalDeviceDisplayPropertiesKHR. Hence, an application wanting
to work with both systems may need to use both mechanisms to locate
the desired output device.

v2:
Rework VK_KEITHP_kms_display extension to just use a data
structure.

v3:
Restructure to provide KMS access using the existing
VK_EXT_acquire_xlib_display instead of creating a new
extension.

Rebase on more recent mesa master

Signed-off-by: Keith Packard 
---
 configure.ac|   12 +
 src/amd/vulkan/Makefile.am  |   17 +
 src/amd/vulkan/Makefile.sources |3 +
 src/amd/vulkan/radv_extensions.py   |3 +
 src/amd/vulkan/radv_private.h   |1 +
 src/amd/vulkan/radv_wsi.c   |3 +-
 src/amd/vulkan/radv_wsi_display.c   |  185 
 src/intel/Makefile.vulkan.am|   17 +
 src/intel/vulkan/anv_wsi.c  |3 +-
 src/vulkan/Makefile.am  |   15 +
 src/vulkan/Makefile.sources |4 +
 src/vulkan/wsi/wsi_common.c |   19 +-
 src/vulkan/wsi/wsi_common.h |5 +-
 src/vulkan/wsi/wsi_common_display.c | 1782 +++
 src/vulkan/wsi/wsi_common_display.h |   92 ++
 src/vulkan/wsi/wsi_common_private.h |   10 +
 16 files changed, 2166 insertions(+), 5 deletions(-)
 create mode 100644 src/amd/vulkan/radv_wsi_display.c
 create mode 100644 src/vulkan/wsi/wsi_common_display.c
 create mode 100644 src/vulkan/wsi/wsi_common_display.h

diff --git a/configure.ac b/configure.ac
index 79f275d3914..0666b9d2021 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1494,6 +1494,10 @@ fi
 AC_SUBST([GL_LIB])
 AC_SUBST([OSMESA_LIB])
 
+# Check for RandR leases
+PKG_CHECK_MODULES([RANDRPROTO], [randrproto >= 1.6.0],
+  [have_xlease=yes], [have_xlease=no])
+
 # Check for libdrm
 PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
   [have_libdrm=yes], [have_libdrm=no])
@@ -1533,6 +1537,7 @@ AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = xyes 
-a "x$dri_platform" = x
 AM_CONDITIONAL(HAVE_LMSENSORS, test "x$enable_lmsensors" = xyes )
 AM_CONDITIONAL(HAVE_GALLIUM_EXTRA_HUD, test "x$enable_gallium_extra_hud" = 
xyes )
 AM_CONDITIONAL(HAVE_WINDOWSDRI, test "x$enable_dri" = xyes -a "x$dri_platform" 
= xwindows )
+AM_CONDITIONAL(HAVE_XLEASE, test "x$have_xlease" = xyes )
 
 AC_ARG_ENABLE([shared-glapi],
 [AS_HELP_STRING([--enable-shared-glapi],
@@ -1832,9 +1837,16 @@ if test x"$enable_dri3" = xyes; then
 PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
 fi
 
+if test x"$have_xlease" = xyes; then
+DEFINES="$DEFINES -DHAVE_XLEASE"
+randr_modules="x11-xcb xcb-randr"
+PKG_CHECK_MODULES([XCB_RANDR], [$randr_modules])
+fi
+
 AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
 AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland')
 AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
+AM_CONDITIONAL(HAVE_PLATFORM_DISPLAY, echo "$platforms" | grep -q 'drm')
 AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 
'surfaceless')
 AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
 
diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index 6b352aebf98..f80e041fbe7 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -76,6 +76,23 @@ VULKAN_LIB_DEPS = \
$(DLOPEN_LIBS) \
-lm
 
+if HAVE_PLATFORM_DISPLAY
+AM_CPPFLAGS += \
+   -DVK_USE_PLATFORM_DISPLAY_KHR \
+   -DVK_USE_PLATFORM_XLIB_XRANDR_EXT
+
+VULKAN_SOURCES += $(VULKAN_WSI_DISPLAY_FILES)
+
+if HAVE_PLATFORM_X11
+if HAVE_XLEASE
+AM_CPPFLAGS += \
+   $(XCB_RANDR_CFLAGS)
+VULKAN_LIB_DEPS += $(XCB_RANDR_LIBS)
+endif
+endif
+
+endif
+
 if HAVE_PLATFORM_X11
 AM_CPPFLAGS += \
$(XCB_DRI3_CFLAGS) \
diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources
index c9d172c3b1b..4321f7ddea3 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -75,6 +75,9 @@ VULKAN_WSI_WAYLAND_FILES := \
 VULKAN_WSI_X11_FILES := \
radv_wsi_x11.c
 
+VULKAN_WSI_DISPLAY_FILES := \
+   radv_wsi_display.c
+
 VULKAN_GENERATED_FILES := \
radv_entrypoints.c \
radv_entrypoints.h \
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/ra

[Mesa-dev] [PATCH mesa 0/3] Add VK_KHR_display and friends for radv

2017-12-21 Thread Keith Packard
Here's an implementation of a suite of extensions necessary to
directly drive displays from the mesa Vulkan implementation:

VK_KHR_display

Provides basic enumeration, control and display to directly
connected devices.

VK_EXT_direct_mode_display
VK_EXT_acquire_xlib_display

These allow the application to borrow an output from X and use
it with Vulkan. The Mesa/DRM implementation is slightly
different from the existing nVidia code as it requires that
the application use RandR to discover the desired output and
convert that to a Vulkan Display using
vkGetRandROutputDisplayEXT. It cannot use the display
enumeration API provided in VK_KHR_display because DRM doesn't
allow normal applications to access any of the display
resources from the kernel.

An application wanting to support both sets of devices will
need to use both enumeration methods.

VK_EXT_display_surface_counter

This lets you tell if the vblank counter from
VK_EXT_display_control is supported.

VK_EXT_display_control

This provides vblank fences for direct displays along with
DPMS control and hotplug events. The implementation provided
here does not wire up the hotplug events.

The previous series included a new extension for passing device
file descriptors directly to the driver; that may still be a nice
thing to have, but isn't required to support HMD displays in a fairly
straightforward fashion.

-keith

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa 3/3] radv: add VK_EXT_display_control to radv [v2]

2017-12-21 Thread Keith Packard
This extension provides fences and frame count information to direct
display contexts. It uses new kernel ioctls to provide 64-bits of
vblank sequence and nanosecond resolution.

v2: Remove DRM_CRTC_SEQUENCE_FIRST_PIXEL_OUT flag. This has
been removed from the proposed kernel API.

Add NULL parameter to drmCrtcQueueSequence ioctl as we
don't care what sequence the event was actually queued to.

Signed-off-by: Keith Packard 
---
 src/amd/vulkan/radv_extensions.py   |   1 +
 src/amd/vulkan/radv_private.h   |  11 +-
 src/amd/vulkan/radv_wsi_display.c   | 108 +
 src/vulkan/wsi/wsi_common.h |   9 ++
 src/vulkan/wsi/wsi_common_display.c | 295 +++-
 src/vulkan/wsi/wsi_common_display.h |  28 
 6 files changed, 445 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index 8afba9d9057..08aa694e3c1 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -84,6 +84,7 @@ EXTENSIONS = [
 Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
 Extension('VK_EXT_display_surface_counter',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
+Extension('VK_EXT_display_control',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_KHX_multiview', 1, True),
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
 Extension('VK_EXT_global_priority',   1, 
'device->rad_info.has_ctx_priority'),
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index bf0643a6a5e..7fac98def11 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1597,8 +1597,17 @@ void radv_initialise_cmask(struct radv_cmd_buffer 
*cmd_buffer,
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 struct radv_image *image, uint32_t value);
 
+enum radv_fence_type {
+RADV_FENCE_TYPE_WINSYS = 0,
+RADV_FENCE_TYPE_WSI = 1
+};
+
 struct radv_fence {
-   struct radeon_winsys_fence *fence;
+enum radv_fence_type type;
+union {
+struct radeon_winsys_fence  *fence;
+struct wsi_fence*fence_wsi;
+};
bool submitted;
bool signalled;
 
diff --git a/src/amd/vulkan/radv_wsi_display.c 
b/src/amd/vulkan/radv_wsi_display.c
index 6bfce5f37ed..7dd06db6bef 100644
--- a/src/amd/vulkan/radv_wsi_display.c
+++ b/src/amd/vulkan/radv_wsi_display.c
@@ -183,3 +183,111 @@ radv_CreateDisplayPlaneSurfaceKHR(VkInstance  
  _instanc
 
return wsi_create_display_surface(_instance, alloc, create_info, surface);
 }
+
+/* VK_EXT_display_control */
+
+VkResult
+radv_DisplayPowerControlEXT(VkDevice_device,
+VkDisplayKHRdisplay,
+const VkDisplayPowerInfoEXT *display_power_info)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+
+   return wsi_display_power_control(_device,
+&device->physical_device->wsi_device,
+display,
+display_power_info);
+}
+
+VkResult
+radv_RegisterDeviceEventEXT(VkDevice_device,
+const VkDeviceEventInfoEXT  *device_event_info,
+const VkAllocationCallbacks *allocator,
+VkFence *_fence)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+   const VkAllocationCallbacks  *alloc;
+   struct radv_fence*fence;
+   VkResult ret;
+
+   if (allocator)
+ alloc = allocator;
+   else
+ alloc = &device->instance->alloc;
+
+   fence = vk_alloc(alloc, sizeof (*fence), 8,
+VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!fence)
+  return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   fence->type = RADV_FENCE_TYPE_WSI;
+   fence->submitted = true;
+   fence->signalled = false;
+
+   ret = wsi_register_device_event(_device,
+   &device->physical_device->wsi_device,
+   device_event_info,
+   alloc,
+   &fence->fence_wsi);
+   if (ret == VK_SUCCESS)
+  *_fence = radv_fence_to_handle(fence);
+   else
+  vk_free(alloc, fence);
+   return ret;
+}
+
+VkResult
+radv_RegisterDisplayEventEXT(VkDevice   _device,
+ VkDisplayKHR   display,
+ const VkDisplayEventInfoEXT
*display_event_info,
+ const VkAllocationCallbacks*allocator,
+ VkFenc

[Mesa-dev] [PATCH mesa 2/3] radv: Add VK_EXT_display_surface_counter [v2]

2017-12-21 Thread Keith Packard
This extension is required to support EXT_display_control as it offers
a way to query whether the vblank counter is supported.

v2: Thanks to kisak

Fix spelling of VkSurfaceCapabilities2EXT in wsi_common_wayland.c,
it was using ext instead of EXT.

Fix spelling of VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT
---
 src/amd/vulkan/radv_extensions.py   |  1 +
 src/amd/vulkan/radv_wsi.c   | 12 
 src/vulkan/wsi/wsi_common.c | 11 +++
 src/vulkan/wsi/wsi_common.h |  5 +
 src/vulkan/wsi/wsi_common_display.c | 27 +++
 src/vulkan/wsi/wsi_common_private.h |  2 ++
 src/vulkan/wsi/wsi_common_wayland.c | 27 +++
 src/vulkan/wsi/wsi_common_x11.c | 27 +++
 8 files changed, 112 insertions(+)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index fc27ff8734c..8afba9d9057 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -83,6 +83,7 @@ EXTENSIONS = [
 Extension('VK_KHR_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_direct_mode_display',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_EXT_acquire_xlib_display',  1, 
'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+Extension('VK_EXT_display_surface_counter',   1, 
'VK_USE_PLATFORM_DISPLAY_KHR'),
 Extension('VK_KHX_multiview', 1, True),
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
 Extension('VK_EXT_global_priority',   1, 
'device->rad_info.has_ctx_priority'),
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 5ec872a63d0..2d2a30ebbb1 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -103,6 +103,18 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilities2KHR(
pSurfaceCapabilities);
 }
 
+VkResult radv_GetPhysicalDeviceSurfaceCapabilities2EXT(
+   VkPhysicalDevicephysicalDevice,
+   VkSurfaceKHRsurface,
+   VkSurfaceCapabilities2EXT*  pSurfaceCapabilities)
+{
+   RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+
+   return wsi_common_get_surface_capabilities2ext(&device->wsi_device,
+   surface,
+   pSurfaceCapabilities);
+}
+
 VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
VkPhysicalDevicephysicalDevice,
VkSurfaceKHRsurface,
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index 99f4c607666..672b0c50ec0 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -576,6 +576,17 @@ wsi_common_get_surface_capabilities2(struct wsi_device 
*wsi_device,
pSurfaceCapabilities);
 }
 
+VkResult
+wsi_common_get_surface_capabilities2ext(struct wsi_device *wsi_device,
+VkSurfaceKHR _surface,
+VkSurfaceCapabilities2EXT 
*pSurfaceCapabilities)
+{
+   ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+   struct wsi_interface *iface = wsi_device->wsi[surface->platform];
+
+   return iface->get_capabilities2ext(surface, pSurfaceCapabilities);
+}
+
 VkResult
 wsi_common_get_surface_formats(struct wsi_device *wsi_device,
VkSurfaceKHR _surface,
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 1cb6aaebca0..124d096170a 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -155,6 +155,11 @@ wsi_common_get_surface_present_modes(struct wsi_device 
*wsi_device,
  uint32_t *pPresentModeCount,
  VkPresentModeKHR *pPresentModes);
 
+VkResult
+wsi_common_get_surface_capabilities2ext(struct wsi_device *wsi_device,
+VkSurfaceKHR surface,
+VkSurfaceCapabilities2EXT 
*pSurfaceCapabilities);
+
 VkResult
 wsi_common_get_images(VkSwapchainKHR _swapchain,
   uint32_t *pSwapchainImageCount,
diff --git a/src/vulkan/wsi/wsi_common_display.c 
b/src/vulkan/wsi/wsi_common_display.c
index 05b019cf957..bf13a359ac7 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -1145,6 +1145,32 @@ wsi_display_surface_get_capabilities2(VkIcdSurfaceBase 
*icd_surface,
return wsi_display_surface_get_capabilities(icd_surface, 
&caps->surfaceCapabilities);
 }
 
+static VkResult
+wsi_display_surface_get_capabilities_2ext(VkIcdSurfaceBase *icd_surface,
+  VkSurfaceCapabilities2EXT *caps)
+{
+   VkSurfaceCapa

Re: [Mesa-dev] [PATCH] [rfc] radv: load depth clear regs for tc compatible depth images.

2017-12-21 Thread Dave Airlie
On 22 December 2017 at 12:14, Dave Airlie  wrote:
> From: Dave Airlie 
>
> If we are using tc compatible depth image as textures, it seems
> that we would need to make sure the depth/stencil clear registers
> are reloaded with the correct values before accessing the image.

Ignore me, I'm slowing working out how tc compat works :-)

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] [rfc] radv: load depth clear regs for tc compatible depth images.

2017-12-21 Thread Dave Airlie
From: Dave Airlie 

If we are using tc compatible depth image as textures, it seems
that we would need to make sure the depth/stencil clear registers
are reloaded with the correct values before accessing the image.

This was a failed attempt to fix
dEQP-VK.renderpass.suballocation.formats.d32_sfloat_s8_uint.load.clear
---
 src/amd/vulkan/radv_cmd_buffer.c | 2 +-
 src/amd/vulkan/radv_descriptor_set.c | 4 
 src/amd/vulkan/radv_private.h| 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 6a89d4e..c24fef0 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1330,7 +1330,7 @@ radv_set_depth_clear_regs(struct radv_cmd_buffer 
*cmd_buffer,
radeon_emit(cmd_buffer->cs, fui(ds_clear_value.depth)); /* 
R_02802C_DB_DEPTH_CLEAR */
 }
 
-static void
+void
 radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
   struct radv_image *image)
 {
diff --git a/src/amd/vulkan/radv_descriptor_set.c 
b/src/amd/vulkan/radv_descriptor_set.c
index e815939..3bede6d 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -642,6 +642,10 @@ write_image_descriptor(struct radv_device *device,
 
memcpy(dst, descriptor, 16 * 4);
 
+   if (radv_htile_enabled(iview->image, iview->base_mip)) {
+   radv_load_depth_clear_regs(cmd_buffer,iview->image);
+   }
+
if (cmd_buffer)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo, 7);
else
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index acc4ed4..596bfc9 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1608,6 +1608,8 @@ void radv_meta_push_descriptor_set(struct radv_cmd_buffer 
*cmd_buffer,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet 
*pDescriptorWrites);
 
+void radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
+   struct radv_image *image);
 void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
   struct radv_image *image, uint32_t value);
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC libdrm 0/5] Move alloc_handle_t from gralloc impls.

2017-12-21 Thread Gurchetan Singh
So the plan is for alloc_handle_t to not be sub-classed by the
implementations, but have all necessary information that an implementation
would need?

If so, how do we reconcile the implementation specific information that is
often in the handle:

https://github.com/intel/minigbm/blob/master/cros_
gralloc/cros_gralloc_handle.h [consumer_usage, producer_usage, yuv_color_range,
is_updated etc.]

https://chromium.googlesource.com/chromiumos/platform/
minigbm/+/master/cros_gralloc/cros_gralloc_handle.h [use_flags,
pixel_stride]

In our case, removing our minigbm specific use flags from the handle would
add complexity to our (*registerBuffer) path.

On Thu, Dec 21, 2017 at 10:14 AM, Rob Herring  wrote:

> On Wed, Dec 13, 2017 at 5:02 PM, Gurchetan Singh
>  wrote:
> > Hi Robert,
> >
> > Thanks for looking into this!  We need to decide if we want:
> >
> > (1) A common struct that implementations can subclass, i.e:
> >
> > struct blah_gralloc_handle {
> > alloc_handle_t alloc_handle;
> > int x, y, z;
> > 
> > }
> >
> > (2) An accessor library that vendors can implement, i.e:
> >
> > struct drmAndroidHandleInfo {
> >uint32_t (*get_fourcc)(buffer_handle_t handle);
> >uint32_t (*get_stride)(buffer_handle_t handle, uint32_t plane);
> >uint32_t (*get_offsets)(buffer_handle_t handle, uint32_t plane);
> >uint64_t (*get_modifier)(buffer_handle_t handle);
> > };
> >
> > From my perspective as someone who has to maintain the minigbm gralloc
> > implementation, (2) is preferable since:
>
> Yeah, I'd prefer not to encourage 1 as the default.
>
> > a) We really don't have a need for fields like data_owner, void *data,
> etc.
>
> We should be able to get rid of this. It's just for tracking imports.
>
> > Also, minigbm puts per plane fds, strides, offsets into the handle.
> > Separating the information for the first plane (for the alloc_handle_t)
> and
> > then rest of the planes would be annoying.
>
> The plan is to add those to alloc_handle_t.
>
> > b) we can avoid the struct within a struct that happens when we subclass,
> > since alignment/padding issues often pop up during
> > serialization/de-serialization.  Using __attribute__((aligned(xx))) is
> less
> > portable than maintaining a POD struct.
>
> Yes. Even just between 32 and 64 bit it's problematic.


> > c) IMO creating the handle should be left to the gralloc implementation.
> > Having accessor functions clearly defines what we need from libdrm -- to
> > make up for shortcomings of the gralloc API for DRM/KMS use cases.
> >
> >
> > On Wed, Dec 13, 2017 at 9:30 AM, Robert Foss 
> > wrote:
> >>
> >> This series moves {gbm,drm,cros}_gralloc_handle_t struct to libdrm,
> >> since at least 4 implementations exist, and share a lot of contents.
> >> The idea is to keep the common stuff defined in one place, and libdrm
> >> is the common codebase to all of these platforms.
> >>
> >> Additionally, having this struct defined in libdrm will make it
> >> easier for mesa and grallocs to communicate.
> >>
> >> Curretly missing is:
> >>  - Planar formats
> >>  - Get/Set functions
> >>
> >>
> >> Planar formats
> >> --
> >> Support for planar formats is needed, but has not been added
> >> yet, mostly since this was not already implemented in {gbm,drm}_gralloc
> >> and the fact the having at least initial backwards compatability would
> >> be nice. Anonymous unions can of course be used later on to provide
> >> backwards compatability if so desired.
> >>
> >>
> >> Get/Set functions
> >> -
> >> During the previous discussion[1] one suggestion was to add accessor
> >> functions. In this RFC I've only provided a alloc_handle_create()
> >> function.
> >>
> >> The Get/Set functions have not been added yet, I was hoping for some
> >> conclusive arguments for them being adeded.
> >>
> >> Lastly it was suggested by Rob Herring that having a fourcc<->android
> >> pixel format conversion function would be useful.
> >>
> >>
> >> [1]
> >> https://lists.freedesktop.org/archives/mesa-dev/2017-
> November/178199.html
> >>
> >> Robert Foss (5):
> >>   android: Move gralloc handle struct to libdrm
> >>   android: Add version variable to alloc_handle_t
> >>   android: Mark alloc_handle_t magic variable as const
> >>   android: Remove member name from alloc_handle_t
> >>   android: Change alloc_handle_t format from Android format to fourcc
> >>
> >>  Android.mk   |  8 +++-
> >>  Makefile.sources |  3 ++
> >>  android/alloc_handle.h   | 87
> >> 
> >>  android/gralloc_drm_handle.h |  1 +
> >>  4 files changed, 97 insertions(+), 2 deletions(-)
> >>  create mode 100644 android/alloc_handle.h
> >>  create mode 12 android/gralloc_drm_handle.h
> >>
> >> --
> >> 2.14.1
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> >
>
_

Re: [Mesa-dev] [PATCH 00/22] Add support for GL_EXT_semaphore v2

2017-12-21 Thread Andres Rodriguez

Patch #2 is a little large so it is stuck in the moderation queue. The
patch doesn't introduce any functionality changes, just renames fences
to semaphores.

[snip]

[PATCH 02/22] gallium: rename pipe fences to semaphores

Is being held until the list moderator can review it for approval.

The reason it is being held:

Message body is too big: 271096 bytes with a limit of 128 KB

[snip]

On 2017-12-21 07:41 PM, Andres Rodriguez wrote:

Sorry for the long time to get this v2 out, past month has been
a bit hectic.

This incorporates the feedback I received from mareko and nha on
the previous iteration.

Some of the notable changes:
   - Merged fences and semaphores at the gallium level
   - Fixed latency due to delayed flushing
   - Moved the flush from gallium into the radeonsi pipe
   - Added tests (on piglit mailing list)
   - Some fixups for bugs found with the tests are at the end of the series

Thanks for taking the time to review.

Kind Regards,
Andres

Andres Rodriguez (22):
   gallium: add type parameter to create_fence_fd
   gallium: rename pipe fences to semaphores
   gallium: documentation updates for fence->semaphore rename
   gallium: introduce PIPE_CAP_SEMAPHORE_SIGNAL
   gallium: introduce PIPE_FD_TYPE_SYNCOBJ
   gallium: add semaphore_server_signal()
   u_threaded_context: add support for semaphore_server_signal
   mesa/st: introduce EXT_semaphore and EXT_semaphore_fd v2
   mesa: add support for semaphore object creation/import/delete v2
   mesa/st: add support for semaphore object create/import/delete
   mesa: add semaphore parameter stub v2
   mesa: add support for semaphore object signal/wait v2
   mesa/st: add support for semaphore object signal/wait v2
   mesa: implement buffer/texture barriers for semaphore signal/wait v2
   winsys/amdgpu: add support for syncobj signaling v2
   radeonsi: add support for importing PIPE_FD_TYPE_SYNCOBJ semaphores
   radeonsi: implement semaphore_server_signal
   radeonsi: fix semaphore_server_sync() holding up extra work
   radeonsi: advertise support for GL_EXT_semaphore
   mesa: fix error codes for importing memory/semaphore FDs
   mesa: fix glGet for ext_external_objects parameters
   mesa: check for invalid index on UUID glGet queries

  src/gallium/auxiliary/pipebuffer/pb_buffer.h   |   6 +-
  .../auxiliary/pipebuffer/pb_buffer_fenced.c|  20 +-
  .../auxiliary/pipebuffer/pb_buffer_fenced.h|  14 +-
  .../auxiliary/pipebuffer/pb_buffer_malloc.c|   2 +-
  src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c |   2 +-
  src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |   2 +-
  src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c|   2 +-
  .../auxiliary/pipebuffer/pb_bufmgr_ondemand.c  |   2 +-
  src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c  |   2 +-
  src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c  |   2 +-
  src/gallium/auxiliary/pipebuffer/pb_validate.c |   2 +-
  src/gallium/auxiliary/pipebuffer/pb_validate.h |   4 +-
  src/gallium/auxiliary/util/u_tests.c   |  55 ++---
  src/gallium/auxiliary/util/u_threaded_context.c|  57 --
  src/gallium/auxiliary/util/u_threaded_context.h|  10 +-
  .../auxiliary/util/u_threaded_context_calls.h  |   3 +-
  src/gallium/docs/source/context.rst|  65 --
  src/gallium/docs/source/screen.rst |   2 +
  src/gallium/drivers/ddebug/dd_draw.c   |  24 +--
  src/gallium/drivers/ddebug/dd_pipe.h   |   6 +-
  src/gallium/drivers/ddebug/dd_screen.c |  18 +-
  src/gallium/drivers/etnaviv/etnaviv_context.c  |   8 +-
  src/gallium/drivers/etnaviv/etnaviv_fence.c|  40 ++--
  src/gallium/drivers/etnaviv/etnaviv_fence.h|  15 +-
  src/gallium/drivers/etnaviv/etnaviv_screen.c   |   3 +-
  src/gallium/drivers/freedreno/freedreno_batch.c|   4 +-
  src/gallium/drivers/freedreno/freedreno_batch.h|   2 +-
  src/gallium/drivers/freedreno/freedreno_context.c  |  16 +-
  src/gallium/drivers/freedreno/freedreno_fence.c|  42 ++--
  src/gallium/drivers/freedreno/freedreno_fence.h|  28 +--
  src/gallium/drivers/freedreno/freedreno_gmem.c |   2 +-
  src/gallium/drivers/freedreno/freedreno_screen.c   |   9 +-
  src/gallium/drivers/i915/i915_batch.h  |   2 +-
  src/gallium/drivers/i915/i915_flush.c  |   4 +-
  src/gallium/drivers/i915/i915_screen.c |  21 +-
  src/gallium/drivers/i915/i915_winsys.h |  16 +-
  src/gallium/drivers/llvmpipe/lp_context.c  |   2 +-
  src/gallium/drivers/llvmpipe/lp_fence.h|   2 +-
  src/gallium/drivers/llvmpipe/lp_flush.c|   8 +-
  src/gallium/drivers/llvmpipe/lp_flush.h|   4 +-
  src/gallium/drivers/llvmpipe/lp_query.c|   2 +-
  src/gallium/drivers/llvmpipe/lp_scene.c|   4 +-
  src/gallium/drivers/llvmpipe/lp_screen.c   |  19 +-
  src/gallium/drivers/llvmpipe/lp_setup.c|  12 +-
  src/gallium/

[Mesa-dev] [PATCH 14/22] mesa: implement buffer/texture barriers for semaphore signal/wait v2

2017-12-21 Thread Andres Rodriguez
Make sure memory is accessible to the external client, for the specified
memory object, before the signal/after the wait.

v2: fixed flush order with respect to wait/signal emission

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/dd.h  | 14 ++-
 src/mesa/main/externalobjects.c | 38 +++---
 src/mesa/state_tracker/st_cb_semaphoreobjects.c | 53 +++--
 3 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index e43cf1c..6879587 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1166,14 +1166,24 @@ struct dd_function_table {
 * server's command stream
 */
void (*ServerWaitSemaphoreObject)(struct gl_context *ctx,
- struct gl_semaphore_object *semObj);
+ struct gl_semaphore_object *semObj,
+ GLuint numBufferBarriers,
+ struct gl_buffer_object **bufObjs,
+ GLuint numTextureBarriers,
+ struct gl_texture_object **texObjs,
+ const GLenum *srcLayouts);
 
/**
 * Introduce an operation to signal the semaphore object in the GL
 * server's command stream
 */
void (*ServerSignalSemaphoreObject)(struct gl_context *ctx,
-   struct gl_semaphore_object *semObj);
+   struct gl_semaphore_object *semObj,
+   GLuint numBufferBarriers,
+   struct gl_buffer_object **bufObjs,
+   GLuint numTextureBarriers,
+   struct gl_texture_object **texObjs,
+   const GLenum *dstLayouts);
/*@}*/
 
/**
diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index 26c1f27..e1e6df2 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -23,6 +23,7 @@
 
 #include "macros.h"
 #include "mtypes.h"
+#include "bufferobj.h"
 #include "context.h"
 #include "externalobjects.h"
 #include "teximage.h"
@@ -715,7 +716,8 @@ _mesa_WaitSemaphoreEXT(GLuint semaphore,
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_semaphore_object *semObj;
-
+   struct gl_buffer_object **bufObjs;
+   struct gl_texture_object **texObjs;
 
if (!ctx->Extensions.EXT_semaphore) {
   _mesa_error(ctx, GL_INVALID_OPERATION, 
"glWaitSemaphoreEXT(unsupported)");
@@ -731,8 +733,20 @@ _mesa_WaitSemaphoreEXT(GLuint semaphore,
FLUSH_VERTICES( ctx, 0 );
FLUSH_CURRENT( ctx, 0 );
 
-   /* TODO: memory barriers and layout transitions */
-   ctx->Driver.ServerWaitSemaphoreObject(ctx, semObj);
+   bufObjs = alloca(sizeof(struct gl_buffer_object **) * numBufferBarriers);
+   for (unsigned i = 0; i < numBufferBarriers; i++) {
+  bufObjs[i] = _mesa_lookup_bufferobj(ctx, buffers[i]);
+   }
+
+   texObjs = alloca(sizeof(struct gl_texture_object **) * numTextureBarriers);
+   for (unsigned i = 0; i < numTextureBarriers; i++) {
+  texObjs[i] = _mesa_lookup_texture(ctx, textures[i]);
+   }
+
+   ctx->Driver.ServerWaitSemaphoreObject(ctx, semObj,
+ numBufferBarriers, bufObjs,
+ numTextureBarriers, texObjs,
+ srcLayouts);
 }
 
 void GLAPIENTRY
@@ -745,6 +759,8 @@ _mesa_SignalSemaphoreEXT(GLuint semaphore,
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_semaphore_object *semObj;
+   struct gl_buffer_object **bufObjs;
+   struct gl_texture_object **texObjs;
 
if (!ctx->Extensions.EXT_semaphore) {
   _mesa_error(ctx, GL_INVALID_OPERATION, 
"glSignalSemaphoreEXT(unsupported)");
@@ -760,8 +776,20 @@ _mesa_SignalSemaphoreEXT(GLuint semaphore,
FLUSH_VERTICES( ctx, 0 );
FLUSH_CURRENT( ctx, 0 );
 
-   /* TODO: memory barriers and layout transitions */
-   ctx->Driver.ServerSignalSemaphoreObject(ctx, semObj);
+   bufObjs = alloca(sizeof(struct gl_buffer_object **) * numBufferBarriers);
+   for (unsigned i = 0; i < numBufferBarriers; i++) {
+  bufObjs[i] = _mesa_lookup_bufferobj(ctx, buffers[i]);
+   }
+
+   texObjs = alloca(sizeof(struct gl_texture_object **) * numTextureBarriers);
+   for (unsigned i = 0; i < numTextureBarriers; i++) {
+  texObjs[i] = _mesa_lookup_texture(ctx, textures[i]);
+   }
+
+   ctx->Driver.ServerSignalSemaphoreObject(ctx, semObj,
+   numBufferBarriers, bufObjs,
+   numTextureBarriers, texObjs,
+   dstLayouts);
 }
 
 void GLAPIENTRY
diff --git a/src/mesa/state_tracker/st_cb_semaphoreobjects.c 
b/src/mesa/state_tracker/st_cb_semaphoreobjects.c
index b1aec14..f53fe4d 100644
--- a/src/mesa/

[Mesa-dev] [PATCH 13/22] mesa/st: add support for semaphore object signal/wait v2

2017-12-21 Thread Andres Rodriguez
Bits to implement ServerWaitSemaphoreObject/ServerSignalSemaphoreObject

v2:
  - corresponding changes for gallium fence->semaphore rename
  - flushing moved to mesa/main

Signed-off-by: Andres Rodriguez 
---
 src/mesa/state_tracker/st_cb_semaphoreobjects.c | 27 +
 1 file changed, 27 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_semaphoreobjects.c 
b/src/mesa/state_tracker/st_cb_semaphoreobjects.c
index d7ea2ef..b1aec14 100644
--- a/src/mesa/state_tracker/st_cb_semaphoreobjects.c
+++ b/src/mesa/state_tracker/st_cb_semaphoreobjects.c
@@ -1,5 +1,7 @@
+
 #include "main/imports.h"
 #include "main/mtypes.h"
+#include "main/context.h"
 
 #include "main/externalobjects.h"
 
@@ -46,10 +48,35 @@ st_import_semaphoreobj_fd(struct gl_context *ctx,
 #endif
 }
 
+static void
+st_server_wait_semaphore(struct gl_context *ctx,
+ struct gl_semaphore_object *semObj)
+{
+   struct st_semaphore_object *st_obj = st_semaphore_object(semObj);
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   pipe->semaphore_server_sync(pipe, st_obj->fence);
+}
+
+static void
+st_server_signal_semaphore(struct gl_context *ctx,
+   struct gl_semaphore_object *semObj)
+{
+   struct st_semaphore_object *st_obj = st_semaphore_object(semObj);
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   /**TODO FIXME */
+   pipe->semaphore_server_signal(pipe, st_obj->fence);
+}
+
 void
 st_init_semaphoreobject_functions(struct dd_function_table *functions)
 {
functions->NewSemaphoreObject = st_semaphoreobj_alloc;
functions->DeleteSemaphoreObject = st_semaphoreobj_free;
functions->ImportSemaphoreFd = st_import_semaphoreobj_fd;
+   functions->ServerWaitSemaphoreObject = st_server_wait_semaphore;
+   functions->ServerSignalSemaphoreObject = st_server_signal_semaphore;
 }
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/22] mesa/st: introduce EXT_semaphore and EXT_semaphore_fd v2

2017-12-21 Thread Andres Rodriguez
Guarded by PIPE_CAP_SEMAPHORE_SIGNAL

v2: corresponding changes for PIPE_CAP_SEMAPHORE_SIGNAL rename

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/extensions_table.h   | 2 ++
 src/mesa/main/mtypes.h | 2 ++
 src/mesa/state_tracker/st_extensions.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 3dec6ea..4f3e040 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -247,6 +247,8 @@ EXT(EXT_read_format_bgra, dummy_true
 EXT(EXT_rescale_normal  , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_robustness  , KHR_robustness   
  ,  x,   x,   x , ES2, 2011)
 EXT(EXT_secondary_color , dummy_true   
  , GLL,  x ,  x ,  x , 1999)
+EXT(EXT_semaphore   , EXT_semaphore
  , GLL, GLC,  x , ES2, 2017)
+EXT(EXT_semaphore_fd, EXT_semaphore_fd 
  , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_separate_shader_objects , dummy_true   
  ,  x ,  x ,  x , ES2, 2013)
 EXT(EXT_separate_specular_color , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_shader_framebuffer_fetch, MESA_shader_framebuffer_fetch
  ,  x ,  x ,  x , ES2, 2013)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a29d78b..1908591 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4181,6 +4181,8 @@ struct gl_extensions
GLboolean EXT_pixel_buffer_object;
GLboolean EXT_point_parameters;
GLboolean EXT_provoking_vertex;
+   GLboolean EXT_semaphore;
+   GLboolean EXT_semaphore_fd;
GLboolean EXT_shader_integer_mix;
GLboolean EXT_shader_samples_identical;
GLboolean EXT_stencil_two_side;
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 4dfc3bb..42d348d 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -670,6 +670,8 @@ void st_init_extensions(struct pipe_screen *screen,
   { o(EXT_draw_buffers2),PIPE_CAP_INDEP_BLEND_ENABLE   
},
   { o(EXT_memory_object),PIPE_CAP_MEMOBJ   
},
   { o(EXT_memory_object_fd), PIPE_CAP_MEMOBJ   
},
+  { o(EXT_semaphore),PIPE_CAP_SEMAPHORE_SIGNAL 
},
+  { o(EXT_semaphore_fd), PIPE_CAP_SEMAPHORE_SIGNAL 
},
   { o(EXT_stencil_two_side), PIPE_CAP_TWO_SIDED_STENCIL
},
   { o(EXT_texture_array),PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS 
},
   { o(EXT_texture_filter_anisotropic),   PIPE_CAP_ANISOTROPIC_FILTER   
},
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/22] mesa: check for invalid index on UUID glGet queries

2017-12-21 Thread Andres Rodriguez
This fixes the piglit test:
spec/ext_semaphore/api-errors/usigned-byte-i-v-bad-value

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/get.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 34970b8..302cece 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -2547,9 +2547,13 @@ find_value_indexed(const char *func, GLenum pname, 
GLuint index, union value *v)
   v->value_int = 1;
   return TYPE_INT;
case GL_DRIVER_UUID_EXT:
+  if (index >= 1)
+ goto invalid_value;
   _mesa_get_driver_uuid(ctx, v->value_int_4);
   return TYPE_INT_4;
case GL_DEVICE_UUID_EXT:
+  if (index >= 1)
+ goto invalid_value;
   _mesa_get_device_uuid(ctx, v->value_int_4);
   return TYPE_INT_4;
}
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/22] radeonsi: add support for importing PIPE_FD_TYPE_SYNCOBJ semaphores

2017-12-21 Thread Andres Rodriguez
Hook up importing semaphores of type PIPE_FD_TYPE_SYNCOBJ

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeonsi/si_fence.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index 3b30eb0..086d45c 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -305,18 +305,32 @@ static void si_create_semaphore_fd(struct pipe_context 
*ctx,
struct radeon_winsys *ws = sscreen->ws;
struct si_multi_fence *rfence;
 
-   assert(type == PIPE_FD_TYPE_NATIVE_SYNC);
-
*pfence = NULL;
 
-   if (!sscreen->info.has_sync_file)
-   return;
-
rfence = si_create_multi_fence();
if (!rfence)
return;
 
-   rfence->gfx = ws->fence_import_sync_file(ws, fd);
+   switch (type) {
+   case PIPE_FD_TYPE_NATIVE_SYNC:
+   if (!sscreen->info.has_sync_file)
+   goto finish;
+
+   rfence->gfx = ws->fence_import_sync_file(ws, fd);
+   break;
+
+   case PIPE_FD_TYPE_SYNCOBJ:
+   if (!sscreen->info.has_syncobj)
+   goto finish;
+
+   rfence->gfx = ws->fence_import_syncobj(ws, fd);
+   break;
+
+   default:
+   unreachable("bad semaphore type when importing");
+   }
+
+finish:
if (!rfence->gfx) {
FREE(rfence);
return;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/22] mesa: fix glGet for ext_external_objects parameters

2017-12-21 Thread Andres Rodriguez
This allows the client to actually query the enums specified in the
ext_external_objects spec.

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/get.c  | 3 +++
 src/mesa/main/get_hash_params.py | 5 +
 2 files changed, 8 insertions(+)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 7f2d72a..34970b8 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -2543,6 +2543,9 @@ find_value_indexed(const char *func, GLenum pname, GLuint 
index, union value *v)
   return TYPE_INT;
 
/* GL_EXT_external_objects */
+   case GL_NUM_DEVICE_UUIDS_EXT:
+  v->value_int = 1;
+  return TYPE_INT;
case GL_DRIVER_UUID_EXT:
   _mesa_get_driver_uuid(ctx, v->value_int_4);
   return TYPE_INT_4;
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index bc71574..6852714 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -138,6 +138,11 @@ descriptor=[
 
 # GL_ARB_polygon_offset_clamp / GL_EXT_polygon_offset_clamp
   [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), 
extra_ARB_polygon_offset_clamp" ],
+
+# GL_EXT_external_objects
+  [ "NUM_DEVICE_UUIDS_EXT", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, NO_EXTRA" ],
+  [ "DRIVER_UUID_EXT", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, NO_EXTRA" ],
+  [ "DEVICE_UUID_EXT", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, NO_EXTRA" ],
 ]},
 
 # Enums in OpenGL and GLES1
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/22] mesa: fix error codes for importing memory/semaphore FDs

2017-12-21 Thread Andres Rodriguez
This fixes the following piglit tests:
spec/ext_semaphore_fd/api-errors/import-semaphore-fd-bad-enum
spec/ext_memory_object_fd/api-errors/import-memory-fd-bad-enum

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/externalobjects.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index e1e6df2..553e592 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -808,7 +808,7 @@ _mesa_ImportMemoryFdEXT(GLuint memory,
}
 
if (handleType != GL_HANDLE_TYPE_OPAQUE_FD_EXT) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "%s(handleType=%u)", func, 
handleType);
+  _mesa_error(ctx, GL_INVALID_ENUM, "%s(handleType=%u)", func, handleType);
   return;
}
 
@@ -835,7 +835,7 @@ _mesa_ImportSemaphoreFdEXT(GLuint semaphore,
}
 
if (handleType != GL_HANDLE_TYPE_OPAQUE_FD_EXT) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "%s(handleType=%u)", func, 
handleType);
+  _mesa_error(ctx, GL_INVALID_ENUM, "%s(handleType=%u)", func, handleType);
   return;
}
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/22] gallium: introduce PIPE_FD_TYPE_SYNCOBJ

2017-12-21 Thread Andres Rodriguez
Denotes that a fd is backed by a synobj. For example, radv shared
semaphores.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/include/pipe/p_defines.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index e2e07ac..9fbd332 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -1070,6 +1070,7 @@ struct pipe_driver_query_group_info
 enum pipe_fd_type
 {
PIPE_FD_TYPE_NATIVE_SYNC,
+   PIPE_FD_TYPE_SYNCOBJ,
 };
 
 enum pipe_debug_type
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/22] radeonsi: advertise support for GL_EXT_semaphore

2017-12-21 Thread Andres Rodriguez
Expose the feature only when we have syncobj support available

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeonsi/si_get.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index ed819bf..9e08b52 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -274,9 +274,11 @@ static int si_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
-   case PIPE_CAP_SEMAPHORE_SIGNAL:
return 0;
 
+   case PIPE_CAP_SEMAPHORE_SIGNAL:
+   return sscreen->info.has_syncobj;
+
case PIPE_CAP_NATIVE_SEMAPHORE_FD:
return sscreen->info.has_sync_file;
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/22] mesa: add support for semaphore object signal/wait v2

2017-12-21 Thread Andres Rodriguez
Memory synchronization is left for a future patch.

v2: flush vertices/bitmaps moved to mesa/main

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/dd.h  | 14 ++
 src/mesa/main/externalobjects.c | 38 ++
 2 files changed, 52 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 66a8089..e43cf1c 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1160,6 +1160,20 @@ struct dd_function_table {
 */
void (*DeleteSemaphoreObject)(struct gl_context *ctx,
  struct gl_semaphore_object *semObj);
+
+   /**
+* Introduce an operation to wait for the semaphore object in the GL
+* server's command stream
+*/
+   void (*ServerWaitSemaphoreObject)(struct gl_context *ctx,
+ struct gl_semaphore_object *semObj);
+
+   /**
+* Introduce an operation to signal the semaphore object in the GL
+* server's command stream
+*/
+   void (*ServerSignalSemaphoreObject)(struct gl_context *ctx,
+   struct gl_semaphore_object *semObj);
/*@}*/
 
/**
diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index d667e44..26c1f27 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -23,6 +23,7 @@
 
 #include "macros.h"
 #include "mtypes.h"
+#include "context.h"
 #include "externalobjects.h"
 #include "teximage.h"
 #include "texobj.h"
@@ -712,7 +713,26 @@ _mesa_WaitSemaphoreEXT(GLuint semaphore,
const GLuint *textures,
const GLenum *srcLayouts)
 {
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_semaphore_object *semObj;
+
+
+   if (!ctx->Extensions.EXT_semaphore) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, 
"glWaitSemaphoreEXT(unsupported)");
+  return;
+   }
+
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   semObj = _mesa_lookup_semaphore_object(ctx, semaphore);
+   if (!semObj)
+  return;
+
+   FLUSH_VERTICES( ctx, 0 );
+   FLUSH_CURRENT( ctx, 0 );
 
+   /* TODO: memory barriers and layout transitions */
+   ctx->Driver.ServerWaitSemaphoreObject(ctx, semObj);
 }
 
 void GLAPIENTRY
@@ -723,7 +743,25 @@ _mesa_SignalSemaphoreEXT(GLuint semaphore,
  const GLuint *textures,
  const GLenum *dstLayouts)
 {
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_semaphore_object *semObj;
+
+   if (!ctx->Extensions.EXT_semaphore) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, 
"glSignalSemaphoreEXT(unsupported)");
+  return;
+   }
+
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   semObj = _mesa_lookup_semaphore_object(ctx, semaphore);
+   if (!semObj)
+  return;
+
+   FLUSH_VERTICES( ctx, 0 );
+   FLUSH_CURRENT( ctx, 0 );
 
+   /* TODO: memory barriers and layout transitions */
+   ctx->Driver.ServerSignalSemaphoreObject(ctx, semObj);
 }
 
 void GLAPIENTRY
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/22] radeonsi: fix semaphore_server_sync() holding up extra work

2017-12-21 Thread Andres Rodriguez
When calling si_semaphore_server_sync(), the wait operation is associated
with the next kernel submission. Therefore, any unflushed work
submitted previous to semaphore_server_sync() will also be affected by
the wait.

To avoid adding the dependency to the unflushed work, we flush before
emitting the fence dependency.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeonsi/si_fence.c | 49 +
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index 7d898d1..2fd1063 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -114,30 +114,6 @@ struct pipe_semaphore_handle *si_create_fence(struct 
pipe_context *ctx,
return (struct pipe_semaphore_handle *)fence;
 }
 
-static void si_semaphore_server_sync(struct pipe_context *ctx,
-struct pipe_semaphore_handle *fence)
-{
-   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-   struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
-
-   util_queue_fence_wait(&rfence->ready);
-
-   /* Unflushed fences from the same context are no-ops. */
-   if (rfence->gfx_unflushed.ctx &&
-   rfence->gfx_unflushed.ctx == rctx)
-   return;
-
-   /* All unflushed commands will not start execution before
-* this fence dependency is signalled.
-*
-* Should we flush the context to allow more GPU parallelism?
-*/
-   if (rfence->sdma)
-   si_add_fence_dependency(rctx, rfence->sdma);
-   if (rfence->gfx)
-   si_add_fence_dependency(rctx, rfence->gfx);
-}
-
 static bool si_fine_fence_signaled(struct radeon_winsys *rws,
   const struct si_fine_fence *fine)
 {
@@ -513,6 +489,31 @@ static void si_semaphore_server_signal(struct pipe_context 
*ctx,
si_flush_from_st(ctx, NULL, PIPE_FLUSH_ASYNC);
 }
 
+static void si_semaphore_server_sync(struct pipe_context *ctx,
+struct pipe_semaphore_handle *fence)
+{
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+   struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
+
+   util_queue_fence_wait(&rfence->ready);
+
+   /* Unflushed fences from the same context are no-ops. */
+   if (rfence->gfx_unflushed.ctx &&
+   rfence->gfx_unflushed.ctx == rctx)
+   return;
+
+   /* All unflushed commands will not start execution before
+* this fence dependency is signalled.
+*
+* Therefore we must flush before inserting the dependency
+*/
+   si_flush_from_st(ctx, NULL, PIPE_FLUSH_ASYNC);
+
+   if (rfence->sdma)
+   si_add_fence_dependency(rctx, rfence->sdma);
+   if (rfence->gfx)
+   si_add_fence_dependency(rctx, rfence->gfx);
+}
 
 void si_init_fence_functions(struct si_context *ctx)
 {
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/22] radeonsi: implement semaphore_server_signal

2017-12-21 Thread Andres Rodriguez
Syncobj based waits or signals only happen at submission boundaries. In
order to guarantee that the requested signal event will occur when the
state tracker requested it, we must issue a flush.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeonsi/si_fence.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index 086d45c..7d898d1 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -63,6 +63,14 @@ static void si_add_fence_dependency(struct 
r600_common_context *rctx,
ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
 }
 
+static void si_add_syncobj_signal(struct r600_common_context *rctx,
+ struct pipe_semaphore_handle *semaphore)
+{
+   struct radeon_winsys *ws = rctx->ws;
+
+   ws->cs_add_syncobj_signal(rctx->gfx.cs, semaphore);
+}
+
 static void si_semaphore_reference(struct pipe_screen *screen,
   struct pipe_semaphore_handle **dst,
   struct pipe_semaphore_handle *src)
@@ -478,11 +486,40 @@ finish:
}
 }
 
+static void si_semaphore_server_signal(struct pipe_context *ctx,
+  struct pipe_semaphore_handle *semaphore)
+{
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+   struct si_multi_fence *rfence = (struct si_multi_fence *)semaphore;
+
+   /* We should have at least one syncobj to signal */
+   assert(rfence->sdma || rfence->gfx);
+
+   if (rfence->sdma)
+   si_add_syncobj_signal(rctx, rfence->sdma);
+   if (rfence->gfx)
+   si_add_syncobj_signal(rctx, rfence->gfx);
+
+   /**
+* The spec does not require a flush here. We insert a flush
+* because syncobj based signals are not directly placed into
+* the command stream. Instead the signal happens when the
+* submission associated with the syncobj finishes execution.
+*
+* Therefore, we must make sure that we flush the pipe to avoid
+* new work being emitted and getting executed before the signal
+* operation.
+*/
+   si_flush_from_st(ctx, NULL, PIPE_FLUSH_ASYNC);
+}
+
+
 void si_init_fence_functions(struct si_context *ctx)
 {
ctx->b.b.flush = si_flush_from_st;
ctx->b.b.create_semaphore_fd = si_create_semaphore_fd;
ctx->b.b.semaphore_server_sync = si_semaphore_server_sync;
+   ctx->b.b.semaphore_server_signal = si_semaphore_server_signal;
 }
 
 void si_init_screen_fence_functions(struct si_screen *screen)
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/22] winsys/amdgpu: add support for syncobj signaling v2

2017-12-21 Thread Andres Rodriguez
Add the ability to signal a syncobj when a cs completes execution.

v2: corresponding changes for gallium fence->semaphore rename

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeon/radeon_winsys.h | 12 +
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c  | 83 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h  |  4 ++
 3 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 916a0b4..888ecf2 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -583,6 +583,12 @@ struct radeon_winsys {
 struct pipe_semaphore_handle *fence);
 
 /**
+ * Signal a syncobj when the CS finishes execution.
+ */
+void (*cs_add_syncobj_signal)(struct radeon_winsys_cs *cs,
+ struct pipe_semaphore_handle *semaphore);
+
+/**
  * Wait for the fence and return true if the fence has been signalled.
  * The timeout of 0 will only return the status.
  * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
@@ -599,6 +605,12 @@ struct radeon_winsys {
 struct pipe_semaphore_handle *src);
 
 /**
+ * Create a new fence object corresponding to the given syncobj fd.
+ */
+struct pipe_semaphore_handle *(*fence_import_syncobj)(struct radeon_winsys 
*ws,
+ int fd);
+
+/**
  * Create a new fence object corresponding to the given sync_file.
  */
 struct pipe_semaphore_handle *(*fence_import_sync_file)(struct 
radeon_winsys *ws,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index ae71708..42656dc 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -57,6 +57,31 @@ amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
 }
 
 static struct pipe_semaphore_handle *
+amdgpu_fence_import_syncobj(struct radeon_winsys *rws, int fd)
+{
+   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+   struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
+   int r;
+
+   if (!fence)
+  return NULL;
+
+   pipe_reference_init(&fence->reference, 1);
+   fence->ws = ws;
+
+   r = amdgpu_cs_import_syncobj(ws->dev, fd, &fence->syncobj);
+   if (r) {
+  FREE(fence);
+  return NULL;
+   }
+
+   util_queue_fence_init(&fence->submitted);
+
+   assert(amdgpu_fence_is_syncobj(fence));
+   return (struct pipe_semaphore_handle*)fence;
+}
+
+static struct pipe_semaphore_handle *
 amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd)
 {
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
@@ -818,11 +843,14 @@ static void amdgpu_cs_context_cleanup(struct 
amdgpu_cs_context *cs)
}
for (i = 0; i < cs->num_fence_dependencies; i++)
   amdgpu_semaphore_reference(&cs->fence_dependencies[i], NULL);
+   for (i = 0; i < cs->num_syncobj_to_signal; i++)
+  amdgpu_semaphore_reference(&cs->syncobj_to_signal[i], NULL);
 
cs->num_real_buffers = 0;
cs->num_slab_buffers = 0;
cs->num_sparse_buffers = 0;
cs->num_fence_dependencies = 0;
+   cs->num_syncobj_to_signal = 0;
amdgpu_semaphore_reference(&cs->fence, NULL);
 
memset(cs->buffer_indices_hashlist, -1, 
sizeof(cs->buffer_indices_hashlist));
@@ -838,6 +866,7 @@ static void amdgpu_destroy_cs_context(struct 
amdgpu_cs_context *cs)
FREE(cs->slab_buffers);
FREE(cs->sparse_buffers);
FREE(cs->fence_dependencies);
+   FREE(cs->syncobj_to_signal);
 }
 
 
@@ -1147,6 +1176,36 @@ static void 
amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs)
amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, 
cs->num_sparse_buffers, cs->sparse_buffers);
 }
 
+static unsigned add_syncobj_to_signal_entry(struct amdgpu_cs_context *cs)
+{
+   unsigned idx = cs->num_syncobj_to_signal++;
+
+   if (idx >= cs->max_syncobj_to_signal) {
+  unsigned size;
+  const unsigned increment = 8;
+
+  cs->max_syncobj_to_signal = idx + increment;
+  size = cs->max_syncobj_to_signal * sizeof(cs->syncobj_to_signal[0]);
+  cs->syncobj_to_signal = realloc(cs->syncobj_to_signal, size);
+  /* Clear the newly-allocated elements. */
+  memset(cs->syncobj_to_signal + idx, 0,
+ increment * sizeof(cs->syncobj_to_signal[0]));
+   }
+   return idx;
+}
+
+static void amdgpu_cs_add_syncobj_signal(struct radeon_winsys_cs *rws,
+ struct pipe_semaphore_handle 
*semaphore)
+{
+   struct amdgpu_cs *acs = amdgpu_cs(rws);
+   struct amdgpu_cs_context *cs = acs->csc;
+
+   assert(amdgpu_fence_is_syncobj((struct amdgpu_fence *)semaphore));
+
+   unsigned idx = add_syncobj_to_signal_entry(cs);
+   amdgpu_semaphore_reference(&cs->syncobj_to_signal[idx], semaphore);
+}
+
 /* Add backing of sparse buffers to the buffer list.
  *
  * Th

[Mesa-dev] [PATCH 11/22] mesa: add semaphore parameter stub v2

2017-12-21 Thread Andres Rodriguez
EXT_semaphore and EXT_semaphore_fd define no pnames. Therefore there
isn't much to do besides determining the correct error code.

v2: removed useless return

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/externalobjects.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index 5aeffd1..d667e44 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -666,12 +666,32 @@ _mesa_IsSemaphoreEXT(GLuint semaphore)
return obj ? GL_TRUE : GL_FALSE;
 }
 
+/**
+ * Helper that outputs the correct error status for parameter
+ * calls where no pnames are defined
+ */
+static void
+semaphore_parameter_stub(const char* func, GLenum pname)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!ctx->Extensions.EXT_semaphore) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported)", func);
+  return;
+   }
+
+   /* EXT_semaphore and EXT_semaphore_fd define no parameters */
+   _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+}
+
 void GLAPIENTRY
 _mesa_SemaphoreParameterui64vEXT(GLuint semaphore,
  GLenum pname,
  const GLuint64 *params)
 {
+   const char *func = "glSemaphoreParameterui64vEXT";
 
+   semaphore_parameter_stub(func, pname);
 }
 
 void GLAPIENTRY
@@ -679,7 +699,9 @@ _mesa_GetSemaphoreParameterui64vEXT(GLuint semaphore,
 GLenum pname,
 GLuint64 *params)
 {
+   const char *func = "glGetSemaphoreParameterui64vEXT";
 
+   semaphore_parameter_stub(func, pname);
 }
 
 void GLAPIENTRY
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/22] mesa/st: add support for semaphore object create/import/delete

2017-12-21 Thread Andres Rodriguez
Add basic semaphore object operations.

Signed-off-by: Andres Rodriguez 
---
 src/mesa/Makefile.sources   |  2 +
 src/mesa/meson.build|  2 +
 src/mesa/state_tracker/st_cb_semaphoreobjects.c | 55 +
 src/mesa/state_tracker/st_cb_semaphoreobjects.h | 25 +++
 src/mesa/state_tracker/st_context.c |  2 +
 5 files changed, 86 insertions(+)
 create mode 100644 src/mesa/state_tracker/st_cb_semaphoreobjects.c
 create mode 100644 src/mesa/state_tracker/st_cb_semaphoreobjects.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 53fa486..2e89da4 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -485,6 +485,8 @@ STATETRACKER_FILES = \
state_tracker/st_cb_rasterpos.h \
state_tracker/st_cb_readpixels.c \
state_tracker/st_cb_readpixels.h \
+   state_tracker/st_cb_semaphoreobjects.c \
+   state_tracker/st_cb_semaphoreobjects.h \
state_tracker/st_cb_strings.c \
state_tracker/st_cb_strings.h \
state_tracker/st_cb_syncobj.c \
diff --git a/src/mesa/meson.build b/src/mesa/meson.build
index ab6bc27..a047396 100644
--- a/src/mesa/meson.build
+++ b/src/mesa/meson.build
@@ -536,6 +536,8 @@ files_libmesa_gallium = files(
   'state_tracker/st_cb_readpixels.h',
   'state_tracker/st_cb_strings.c',
   'state_tracker/st_cb_strings.h',
+  'state_tracker/st_cb_semaphoreobjects.c',
+  'state_tracker/st_cb_semaphoreobjects.h',
   'state_tracker/st_cb_syncobj.c',
   'state_tracker/st_cb_syncobj.h',
   'state_tracker/st_cb_texturebarrier.c',
diff --git a/src/mesa/state_tracker/st_cb_semaphoreobjects.c 
b/src/mesa/state_tracker/st_cb_semaphoreobjects.c
new file mode 100644
index 000..d7ea2ef
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_semaphoreobjects.c
@@ -0,0 +1,55 @@
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "main/externalobjects.h"
+
+#include "st_context.h"
+#include "st_cb_semaphoreobjects.h"
+
+#include "state_tracker/drm_driver.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+
+static struct gl_semaphore_object *
+st_semaphoreobj_alloc(struct gl_context *ctx, GLuint name)
+{
+   struct st_semaphore_object *st_obj = ST_CALLOC_STRUCT(st_semaphore_object);
+   if (!st_obj)
+  return NULL;
+
+   _mesa_initialize_semaphore_object(ctx, &st_obj->Base, name);
+   return &st_obj->Base;
+}
+
+static void
+st_semaphoreobj_free(struct gl_context *ctx,
+ struct gl_semaphore_object *semObj)
+{
+   _mesa_delete_semaphore_object(ctx, semObj);
+}
+
+
+static void
+st_import_semaphoreobj_fd(struct gl_context *ctx,
+   struct gl_semaphore_object *semObj,
+   int fd)
+{
+   struct st_semaphore_object *st_obj = st_semaphore_object(semObj);
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   pipe->create_semaphore_fd(pipe, &st_obj->fence, fd, PIPE_FD_TYPE_SYNCOBJ);
+
+#if !defined(_WIN32)
+   /* We own fd, but we no longer need it. So get rid of it */
+   close(fd);
+#endif
+}
+
+void
+st_init_semaphoreobject_functions(struct dd_function_table *functions)
+{
+   functions->NewSemaphoreObject = st_semaphoreobj_alloc;
+   functions->DeleteSemaphoreObject = st_semaphoreobj_free;
+   functions->ImportSemaphoreFd = st_import_semaphoreobj_fd;
+}
diff --git a/src/mesa/state_tracker/st_cb_semaphoreobjects.h 
b/src/mesa/state_tracker/st_cb_semaphoreobjects.h
new file mode 100644
index 000..43f774b
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_semaphoreobjects.h
@@ -0,0 +1,25 @@
+#ifndef ST_CB_SEMAPHOREOBJECTS_H
+#define ST_CB_SEMAPHOREOBJECTS_H
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+
+struct dd_function_table;
+struct pipe_screen;
+
+struct st_semaphore_object
+{
+   struct gl_semaphore_object Base;
+   struct pipe_semaphore_handle *fence;
+};
+
+static inline struct st_semaphore_object *
+st_semaphore_object(struct gl_semaphore_object *obj)
+{
+   return (struct st_semaphore_object *)obj;
+}
+
+extern void
+st_init_semaphoreobject_functions(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 7564a53..75f5815 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -60,6 +60,7 @@
 #include "st_cb_program.h"
 #include "st_cb_queryobj.h"
 #include "st_cb_readpixels.h"
+#include "st_cb_semaphoreobjects.h"
 #include "st_cb_texture.h"
 #include "st_cb_xformfb.h"
 #include "st_cb_flush.h"
@@ -739,6 +740,7 @@ st_init_driver_functions(struct pipe_screen *screen,
st_init_query_functions(functions);
st_init_cond_render_functions(functions);
st_init_readpixels_functions(functions);
+   st_init_semaphoreobject_functions(functions);
st_init_texture_functions(functions);
st_init_texture_barrier_functions(functions);
st_init_flush_functions(screen, functions);
-- 
2.9.3

__

[Mesa-dev] [PATCH 01/22] gallium: add type parameter to create_fence_fd

2017-12-21 Thread Andres Rodriguez
An fd can potentially have different types of objects backing it.
Specifying the type helps us make sure we treat the FD correctly.

This is in preparation to allow importing syncobj fence FDs in addition
to native sync FDs.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/auxiliary/util/u_tests.c| 7 ---
 src/gallium/auxiliary/util/u_threaded_context.c | 5 +++--
 src/gallium/drivers/etnaviv/etnaviv_fence.c | 4 +++-
 src/gallium/drivers/etnaviv/etnaviv_fence.h | 3 ++-
 src/gallium/drivers/freedreno/freedreno_fence.c | 4 +++-
 src/gallium/drivers/freedreno/freedreno_fence.h | 3 ++-
 src/gallium/drivers/radeonsi/si_fence.c | 5 -
 src/gallium/drivers/svga/svga_pipe_flush.c  | 4 +++-
 src/gallium/include/pipe/p_context.h| 8 +---
 src/gallium/include/pipe/p_defines.h| 5 +
 src/gallium/state_trackers/dri/dri_helpers.c| 2 +-
 11 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_tests.c 
b/src/gallium/auxiliary/util/u_tests.c
index 2548b46..6e0d84f 100644
--- a/src/gallium/auxiliary/util/u_tests.c
+++ b/src/gallium/auxiliary/util/u_tests.c
@@ -502,6 +502,7 @@ test_sync_file_fences(struct pipe_context *ctx)
 {
struct pipe_screen *screen = ctx->screen;
bool pass = true;
+   enum pipe_fd_type fd_type = PIPE_FD_TYPE_NATIVE_SYNC;
 
if (!screen->get_param(screen, PIPE_CAP_NATIVE_FENCE_FD))
   return;
@@ -536,9 +537,9 @@ test_sync_file_fences(struct pipe_context *ctx)
/* (Re)import all fences. */
struct pipe_fence_handle *re_buf_fence = NULL, *re_tex_fence = NULL;
struct pipe_fence_handle *merged_fence = NULL;
-   ctx->create_fence_fd(ctx, &re_buf_fence, buf_fd);
-   ctx->create_fence_fd(ctx, &re_tex_fence, tex_fd);
-   ctx->create_fence_fd(ctx, &merged_fence, merged_fd);
+   ctx->create_fence_fd(ctx, &re_buf_fence, buf_fd, fd_type);
+   ctx->create_fence_fd(ctx, &re_tex_fence, tex_fd, fd_type);
+   ctx->create_fence_fd(ctx, &merged_fence, merged_fd, fd_type);
pass = pass && re_buf_fence && re_tex_fence && merged_fence;
 
/* Run another clear after waiting for everything. */
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c 
b/src/gallium/auxiliary/util/u_threaded_context.c
index ffa8247..3ea1797 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -1835,13 +1835,14 @@ tc_set_log_context(struct pipe_context *_pipe, struct 
u_log_context *log)
 
 static void
 tc_create_fence_fd(struct pipe_context *_pipe,
-   struct pipe_fence_handle **fence, int fd)
+   struct pipe_fence_handle **fence, int fd,
+   enum pipe_fd_type type)
 {
struct threaded_context *tc = threaded_context(_pipe);
struct pipe_context *pipe = tc->pipe;
 
tc_sync(tc);
-   pipe->create_fence_fd(pipe, fence, fd);
+   pipe->create_fence_fd(pipe, fence, fd, type);
 }
 
 static void
diff --git a/src/gallium/drivers/etnaviv/etnaviv_fence.c 
b/src/gallium/drivers/etnaviv/etnaviv_fence.c
index d82708e..22a964a 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_fence.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_fence.c
@@ -76,8 +76,10 @@ etna_screen_fence_finish(struct pipe_screen *pscreen, struct 
pipe_context *ctx,
 
 void
 etna_create_fence_fd(struct pipe_context *pctx,
- struct pipe_fence_handle **pfence, int fd)
+ struct pipe_fence_handle **pfence, int fd,
+ enum pipe_fd_type type)
 {
+   assert(type == PIPE_FD_TYPE_NATIVE_SYNC);
*pfence = etna_fence_create(pctx, dup(fd));
 }
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_fence.h 
b/src/gallium/drivers/etnaviv/etnaviv_fence.h
index cd68a42..8b8bb63 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_fence.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_fence.h
@@ -32,7 +32,8 @@
 
 void
 etna_create_fence_fd(struct pipe_context *pctx,
- struct pipe_fence_handle **pfence, int fd);
+ struct pipe_fence_handle **pfence, int fd,
+ enum pipe_fd_type type);
 
 void
 etna_fence_server_sync(struct pipe_context *pctx,
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c 
b/src/gallium/drivers/freedreno/freedreno_fence.c
index 9289720..1925f72 100644
--- a/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -120,8 +120,10 @@ static struct pipe_fence_handle * fence_create(struct 
fd_context *ctx,
 }
 
 void fd_create_fence_fd(struct pipe_context *pctx,
-   struct pipe_fence_handle **pfence, int fd)
+   struct pipe_fence_handle **pfence, int fd,
+   enum pipe_fd_type type)
 {
+   assert(type == PIPE_FD_TYPE_NATIVE_SYNC);
*pfence = fence_create(fd_context(pctx), NULL, 0, dup(fd));
 }
 
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.h 
b/src/gallium/drivers/freedreno/freedreno_fence.h
index c1a9fd3..0842a1d 

[Mesa-dev] [PATCH 07/22] u_threaded_context: add support for semaphore_server_signal

2017-12-21 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/gallium/auxiliary/util/u_threaded_context.c  | 20 
 .../auxiliary/util/u_threaded_context_calls.h|  1 +
 2 files changed, 21 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c 
b/src/gallium/auxiliary/util/u_threaded_context.c
index ab37055..b34c93a 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -1864,6 +1864,25 @@ tc_semaphore_server_sync(struct pipe_context *_pipe,
screen->semaphore_reference(screen, &payload->fence, fence);
 }
 
+static void
+tc_call_semaphore_server_signal(struct pipe_context *pipe, union tc_payload 
*payload)
+{
+   pipe->semaphore_server_signal(pipe, payload->fence);
+   pipe->screen->semaphore_reference(pipe->screen, &payload->fence, NULL);
+}
+
+static void
+tc_semaphore_server_signal(struct pipe_context *_pipe,
+   struct pipe_semaphore_handle *fence)
+{
+   struct threaded_context *tc = threaded_context(_pipe);
+   struct pipe_screen *screen = tc->pipe->screen;
+   union tc_payload *payload = tc_add_small_call(tc, 
TC_CALL_semaphore_server_signal);
+
+   payload->fence = NULL;
+   screen->semaphore_reference(screen, &payload->fence, fence);
+}
+
 static struct pipe_video_codec *
 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
   UNUSED const struct pipe_video_codec *templ)
@@ -2662,6 +2681,7 @@ threaded_context_create(struct pipe_context *pipe,
CTX_INIT(set_debug_callback);
CTX_INIT(create_semaphore_fd);
CTX_INIT(semaphore_server_sync);
+   CTX_INIT(semaphore_server_signal);
CTX_INIT(get_timestamp);
CTX_INIT(create_texture_handle);
CTX_INIT(delete_texture_handle);
diff --git a/src/gallium/auxiliary/util/u_threaded_context_calls.h 
b/src/gallium/auxiliary/util/u_threaded_context_calls.h
index f5e90fd..9187485 100644
--- a/src/gallium/auxiliary/util/u_threaded_context_calls.h
+++ b/src/gallium/auxiliary/util/u_threaded_context_calls.h
@@ -1,6 +1,7 @@
 CALL(flush)
 CALL(callback)
 CALL(semaphore_server_sync)
+CALL(semaphore_server_signal)
 CALL(destroy_query)
 CALL(begin_query)
 CALL(end_query)
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/22] mesa: add support for semaphore object creation/import/delete v2

2017-12-21 Thread Andres Rodriguez
Used by EXT_semmaphore and EXT_semaphore_fd

v2: Removed unnecessary dummy callback initialization

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/dd.h  |  34 ++
 src/mesa/main/externalobjects.c | 133 +++-
 src/mesa/main/externalobjects.h |  31 +-
 src/mesa/main/mtypes.h  |   8 +++
 src/mesa/main/shared.c  |  17 +
 5 files changed, 221 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 4e4d2a6..66a8089 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1143,6 +1143,40 @@ struct dd_function_table {
   struct gl_shader_program *shProg,
   struct gl_program *prog);
/*@}*/
+
+   /**
+* \name GL_EXT_semaphore interface
+*/
+   /*@{*/
+  /**
+* Called to allocate a new semaphore object. Drivers will usually
+* allocate/return a subclass of gl_semaphore_object.
+*/
+   struct gl_semaphore_object * (*NewSemaphoreObject)(struct gl_context *ctx,
+  GLuint name);
+   /**
+* Called to delete/free a semaphore object. Drivers should free the
+* object and any associated resources.
+*/
+   void (*DeleteSemaphoreObject)(struct gl_context *ctx,
+ struct gl_semaphore_object *semObj);
+   /*@}*/
+
+   /**
+* \name GL_EXT_semaphore_fd interface
+*/
+   /*@{*/
+   /**
+* Called to import a semaphore object. The caller relinquishes ownership
+* of fd after the call returns.
+*
+* Accessing fd after ImportSemaphoreFd returns results in undefined
+* behaviour. This is consistent with EXT_semaphore_fd.
+*/
+   void (*ImportSemaphoreFd)(struct gl_context *ctx,
+struct gl_semaphore_object *semObj,
+int fd);
+   /*@}*/
 };
 
 
diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index 4e9f8f8..5aeffd1 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -545,22 +545,125 @@ _mesa_TextureStorageMem1DEXT(GLuint texture,
  memory, offset, "glTextureStorageMem1DEXT");
 }
 
+/**
+ * Used as a placeholder for semaphore objects between glGenSemaphoresEXT()
+ * and glImportSemaphoreFdEXT(), so that glIsSemaphoreEXT() can work correctly.
+ */
+static struct gl_semaphore_object DummySemaphoreObject;
+
+/**
+ * Delete a semaphore object.  Called via ctx->Driver.DeleteSemaphore().
+ * Not removed from hash table here.
+ */
+void
+_mesa_delete_semaphore_object(struct gl_context *ctx,
+  struct gl_semaphore_object *semObj)
+{
+   free(semObj);
+}
+
+/**
+ * Initialize a semaphore object to default values.
+ */
+void
+_mesa_initialize_semaphore_object(struct gl_context *ctx,
+  struct gl_semaphore_object *obj,
+  GLuint name)
+{
+   memset(obj, 0, sizeof(struct gl_semaphore_object));
+   obj->Name = name;
+}
+
 void GLAPIENTRY
 _mesa_GenSemaphoresEXT(GLsizei n, GLuint *semaphores)
 {
+   GET_CURRENT_CONTEXT(ctx);
+
+   const char *func = "glGenSemaphoresEXT";
+
+   if (MESA_VERBOSE & (VERBOSE_API))
+  _mesa_debug(ctx, "%s(%d, %p)", func, n, semaphores);
 
+   if (!ctx->Extensions.EXT_semaphore) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported)", func);
+  return;
+   }
+
+   if (n < 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func);
+  return;
+   }
+
+   if (!semaphores)
+  return;
+
+   _mesa_HashLockMutex(ctx->Shared->SemaphoreObjects);
+   GLuint first = _mesa_HashFindFreeKeyBlock(ctx->Shared->SemaphoreObjects, n);
+   if (first) {
+  for (GLsizei i = 0; i < n; i++) {
+ semaphores[i] = first + i;
+ _mesa_HashInsertLocked(ctx->Shared->SemaphoreObjects,
+semaphores[i], &DummySemaphoreObject);
+  }
+   }
+
+   _mesa_HashUnlockMutex(ctx->Shared->SemaphoreObjects);
 }
 
 void GLAPIENTRY
 _mesa_DeleteSemaphoresEXT(GLsizei n, const GLuint *semaphores)
 {
+   GET_CURRENT_CONTEXT(ctx);
+
+   const char *func = "glDeleteSemaphoresEXT";
+
+   if (MESA_VERBOSE & (VERBOSE_API)) {
+  _mesa_debug(ctx, "%s(%d, %p)\n", func, n, semaphores);
+   }
+
+   if (!ctx->Extensions.EXT_semaphore) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported)", func);
+  return;
+   }
 
+   if (n < 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func);
+  return;
+   }
+
+   if (!semaphores)
+  return;
+
+   _mesa_HashLockMutex(ctx->Shared->SemaphoreObjects);
+   for (GLint i = 0; i < n; i++) {
+  if (semaphores[i] > 0) {
+ struct gl_semaphore_object *delObj
+= _mesa_lookup_semaphore_object_locked(ctx, semaphores[i]);
+
+ if (delObj) {
+_mesa_HashRemoveLocked(ctx->Shared->SemaphoreObjects,
+  

[Mesa-dev] [PATCH 06/22] gallium: add semaphore_server_signal()

2017-12-21 Thread Andres Rodriguez
Calling this function will emit a semaphore signal operation into the
GPU's command stream.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/docs/source/context.rst  | 31 +++
 src/gallium/include/pipe/p_context.h |  6 ++
 2 files changed, 37 insertions(+)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index a8ff3dc..bab9613 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -567,7 +567,38 @@ by a single pipe_screen and is not shared with another 
process.
 (i.e. you shouldn't use it to flush caches explicitly if you want to e.g.
 use the resource for texturing)
 
+Semaphores
+^^
 
+``pipe_semaphore_handle``, and related methods, are used to synchronize
+execution between multiple parties. Examples include CPU <-> GPU 
syncrhonization,
+rederer <-> windowing system, multiple external APIs, etc.
+
+A ``pipe_semaphore_handle`` can either be 'one time use' or 're-usable'. A 
'one time use'
+semaphore behaves like a traditional GPU fence. Once it reaches the signaled 
state it
+is forever considered to be signaled.
+
+Once a re-usable ``pipe_semaphore_handle`` becomes signaled, it can be reset
+back into an unsignaled state. The ``pipe_semaphore_handle`` will be reset to
+the unsignaled state by performing a wait operation on said object, i.e.
+``semaphore_server_sync``. As a corollary to this behaviour, a re-usable
+``pipe_semaphore_handle`` can only have one waiter.
+
+This behaviour is useful in producer <-> consumer chains. It helps avoid
+unecessarily sharing a new ``pipe_semaphore_handle`` each time a new frame is
+ready. Instead, the semaphores are exchanged once ahead of time, and access is 
synchronized
+through GPU signaling instead of direct producer <-> consumer communication.
+
+``semaphore_server_sync`` inserts a wait command into the GPU's command stream.
+
+``semaphore_server_signal`` inserts a signal command into the GPU's command 
stream.
+
+There are no guarantees that the wait/signal commands will be flushed when
+calling ``semaphore_server_sync`` or ``semaphore_server_signal``. An explicit
+call to ``flush`` is required to make sure the commands are emitted to the GPU.
+
+The Gallium implementation may implicitly ``flush`` the command stream during a
+``semaphore_server_sync`` or ``semaphore_server_signal`` call if necessary.
 
 Resource Busy Queries
 ^
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 72e4b9d..5827491 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -527,6 +527,12 @@ struct pipe_context {
  struct pipe_semaphore_handle *semaphore);
 
/**
+* Insert commands to have the GPU signal a semaphore.
+*/
+   void (*semaphore_server_signal)(struct pipe_context *pipe,
+   struct pipe_semaphore_handle *semaphore);
+
+   /**
 * Create a view on a texture to be used by a shader stage.
 */
struct pipe_sampler_view * (*create_sampler_view)(struct pipe_context *ctx,
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/22] gallium: documentation updates for fence->semaphore rename

2017-12-21 Thread Andres Rodriguez
Kept separate from the symbol renames for review purposes.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/docs/source/context.rst| 34 +++---
 src/gallium/include/pipe/p_context.h   | 33 +++--
 src/gallium/include/pipe/p_screen.h| 24 ++---
 src/gallium/include/state_tracker/st_api.h |  2 +-
 4 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index 399b390..a8ff3dc 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -527,33 +527,33 @@ Flushing
 PIPE_FLUSH_END_OF_FRAME: Whether the flush marks the end of frame.
 
 PIPE_FLUSH_DEFERRED: It is not required to flush right away, but it is required
-to return a valid fence. If semaphore_finish is called with the returned fence
-and the context is still unflushed, and the ctx parameter of semaphore_finish 
is
-equal to the context where the fence was created, semaphore_finish will flush
-the context.
+to return a valid semaphore. If semaphore_finish is called with the returned
+semaphore and the context is still unflushed, and the ctx parameter of
+semaphore_finish is equal to the context where the semaphore was created,
+semaphore_finish will flush the context.
 
 PIPE_FLUSH_ASYNC: The flush is allowed to be asynchronous. Unlike
-``PIPE_FLUSH_DEFERRED``, the driver must still ensure that the returned fence
-will finish in finite time. However, subsequent operations in other contexts of
-the same screen are no longer guaranteed to happen after the flush. Drivers
-which use this flag must implement pipe_context::semaphore_server_sync.
+``PIPE_FLUSH_DEFERRED``, the driver must still ensure that the returned
+semaphore will finish in finite time. However, subsequent operations in other
+contexts of the same screen are no longer guaranteed to happen after the flush.
+Drivers which use this flag must implement pipe_context::semaphore_server_sync.
 
 PIPE_FLUSH_HINT_FINISH: Hints to the driver that the caller will immediately
-wait for the returned fence.
+wait for the returned semaphore.
 
 Additional flags may be set together with ``PIPE_FLUSH_DEFERRED`` for even
-finer-grained fences. Note that as a general rule, GPU caches may not have been
-flushed yet when these fences are signaled. Drivers are free to ignore these
-flags and create normal fences instead. At most one of the following flags can
-be specified:
+finer-grained semaphore. Note that as a general rule, GPU caches may not have
+been flushed yet when these semaphore are signaled. Drivers are free to ignore
+these flags and create normal semaphore instead. At most one of the following
+flags can be specified:
 
-PIPE_FLUSH_TOP_OF_PIPE: The fence should be signaled as soon as the next
+PIPE_FLUSH_TOP_OF_PIPE: The semaphore should be signaled as soon as the next
 command is ready to start executing at the top of the pipeline, before any of
 its data is actually read (including indirect draw parameters).
 
-PIPE_FLUSH_BOTTOM_OF_PIPE: The fence should be signaled as soon as the previous
-command has finished executing on the GPU entirely (but data written by the
-command may still be in caches and inaccessible to the CPU).
+PIPE_FLUSH_BOTTOM_OF_PIPE: The semaphore should be signaled as soon as the
+previous command has finished executing on the GPU entirely (but data written
+by the command may still be in caches and inaccessible to the CPU).
 
 
 ``flush_resource``
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 750a69a..72e4b9d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -487,43 +487,44 @@ struct pipe_context {
/**
 * Flush draw commands.
 *
-* This guarantees that the new fence (if any) will finish in finite time,
-* unless PIPE_FLUSH_DEFERRED is used.
+* This guarantees that the new semaphore (if any) will finish in finite
+* time, unless PIPE_FLUSH_DEFERRED is used.
 *
 * Subsequent operations on other contexts of the same screen are guaranteed
 * to execute after the flushed commands, unless PIPE_FLUSH_ASYNC is used.
 *
 * NOTE: use screen->semaphore_reference() (or equivalent) to transfer
-* new fence ref to **fence, to ensure that previous fence is unref'd
+* new semaphore ref to **semaphore, to ensure that previous semaphore is
+* unref'd.
 *
-* \param fence  if not NULL, an old fence to unref and transfer a
-*new fence reference to
-* \param flags  bitfield of enum pipe_flush_flags values.
+* \param semaphore  if not NULL, an old semaphore to unref and transfer a
+*   new semaphore reference to
+* \param flags  bitfield of enum pipe_flush_flags values.
 */
void (*flush)(struct pipe_context *pipe,
- struct pipe_semaphore_handle **fence,
+ str

[Mesa-dev] [PATCH 04/22] gallium: introduce PIPE_CAP_SEMAPHORE_SIGNAL

2017-12-21 Thread Andres Rodriguez
Protects semaphore signaling functionality required by GL_EXT_semaphore.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/docs/source/screen.rst   | 2 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_get.c| 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 -
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 18 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 519728f..fca7edf 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -417,6 +417,8 @@ The integer capabilities:
 * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
   Whether pipe_vertex_buffer::buffer_offset is treated as signed. The u_vbuf
   module needs this for optimal performance in workstation applications.
+* ``PIPE_CAP_SEMAPHORE_SIGNAL``: True if the driver supports signaling 
semaphores
+  using semaphore_server_signal().
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 105e0ee..f4eb387 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -268,6 +268,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+   case PIPE_CAP_SEMAPHORE_SIGNAL:
   return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 7db13b1..3cc0486 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -339,6 +339,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+   case PIPE_CAP_SEMAPHORE_SIGNAL:
return 0;
 
case PIPE_CAP_DRAW_INDIRECT:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 96ee78e..7aa5447 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -326,6 +326,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+   case PIPE_CAP_SEMAPHORE_SIGNAL:
   return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 973d82a..539d762 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -364,6 +364,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+   case PIPE_CAP_SEMAPHORE_SIGNAL:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 9f3e574..2b926cc 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -228,6 +228,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+   case PIPE_CAP_SEMAPHORE_SIGNAL:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6dce340..5f9b3f3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -280,6 +280,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE

[Mesa-dev] [PATCH 00/22] Add support for GL_EXT_semaphore v2

2017-12-21 Thread Andres Rodriguez
Sorry for the long time to get this v2 out, past month has been
a bit hectic.

This incorporates the feedback I received from mareko and nha on
the previous iteration.

Some of the notable changes:
  - Merged fences and semaphores at the gallium level
  - Fixed latency due to delayed flushing
  - Moved the flush from gallium into the radeonsi pipe
  - Added tests (on piglit mailing list)
  - Some fixups for bugs found with the tests are at the end of the series

Thanks for taking the time to review.

Kind Regards,
Andres

Andres Rodriguez (22):
  gallium: add type parameter to create_fence_fd
  gallium: rename pipe fences to semaphores
  gallium: documentation updates for fence->semaphore rename
  gallium: introduce PIPE_CAP_SEMAPHORE_SIGNAL
  gallium: introduce PIPE_FD_TYPE_SYNCOBJ
  gallium: add semaphore_server_signal()
  u_threaded_context: add support for semaphore_server_signal
  mesa/st: introduce EXT_semaphore and EXT_semaphore_fd v2
  mesa: add support for semaphore object creation/import/delete v2
  mesa/st: add support for semaphore object create/import/delete
  mesa: add semaphore parameter stub v2
  mesa: add support for semaphore object signal/wait v2
  mesa/st: add support for semaphore object signal/wait v2
  mesa: implement buffer/texture barriers for semaphore signal/wait v2
  winsys/amdgpu: add support for syncobj signaling v2
  radeonsi: add support for importing PIPE_FD_TYPE_SYNCOBJ semaphores
  radeonsi: implement semaphore_server_signal
  radeonsi: fix semaphore_server_sync() holding up extra work
  radeonsi: advertise support for GL_EXT_semaphore
  mesa: fix error codes for importing memory/semaphore FDs
  mesa: fix glGet for ext_external_objects parameters
  mesa: check for invalid index on UUID glGet queries

 src/gallium/auxiliary/pipebuffer/pb_buffer.h   |   6 +-
 .../auxiliary/pipebuffer/pb_buffer_fenced.c|  20 +-
 .../auxiliary/pipebuffer/pb_buffer_fenced.h|  14 +-
 .../auxiliary/pipebuffer/pb_buffer_malloc.c|   2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c |   2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |   2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c|   2 +-
 .../auxiliary/pipebuffer/pb_bufmgr_ondemand.c  |   2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c  |   2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c  |   2 +-
 src/gallium/auxiliary/pipebuffer/pb_validate.c |   2 +-
 src/gallium/auxiliary/pipebuffer/pb_validate.h |   4 +-
 src/gallium/auxiliary/util/u_tests.c   |  55 ++---
 src/gallium/auxiliary/util/u_threaded_context.c|  57 --
 src/gallium/auxiliary/util/u_threaded_context.h|  10 +-
 .../auxiliary/util/u_threaded_context_calls.h  |   3 +-
 src/gallium/docs/source/context.rst|  65 --
 src/gallium/docs/source/screen.rst |   2 +
 src/gallium/drivers/ddebug/dd_draw.c   |  24 +--
 src/gallium/drivers/ddebug/dd_pipe.h   |   6 +-
 src/gallium/drivers/ddebug/dd_screen.c |  18 +-
 src/gallium/drivers/etnaviv/etnaviv_context.c  |   8 +-
 src/gallium/drivers/etnaviv/etnaviv_fence.c|  40 ++--
 src/gallium/drivers/etnaviv/etnaviv_fence.h|  15 +-
 src/gallium/drivers/etnaviv/etnaviv_screen.c   |   3 +-
 src/gallium/drivers/freedreno/freedreno_batch.c|   4 +-
 src/gallium/drivers/freedreno/freedreno_batch.h|   2 +-
 src/gallium/drivers/freedreno/freedreno_context.c  |  16 +-
 src/gallium/drivers/freedreno/freedreno_fence.c|  42 ++--
 src/gallium/drivers/freedreno/freedreno_fence.h|  28 +--
 src/gallium/drivers/freedreno/freedreno_gmem.c |   2 +-
 src/gallium/drivers/freedreno/freedreno_screen.c   |   9 +-
 src/gallium/drivers/i915/i915_batch.h  |   2 +-
 src/gallium/drivers/i915/i915_flush.c  |   4 +-
 src/gallium/drivers/i915/i915_screen.c |  21 +-
 src/gallium/drivers/i915/i915_winsys.h |  16 +-
 src/gallium/drivers/llvmpipe/lp_context.c  |   2 +-
 src/gallium/drivers/llvmpipe/lp_fence.h|   2 +-
 src/gallium/drivers/llvmpipe/lp_flush.c|   8 +-
 src/gallium/drivers/llvmpipe/lp_flush.h|   4 +-
 src/gallium/drivers/llvmpipe/lp_query.c|   2 +-
 src/gallium/drivers/llvmpipe/lp_scene.c|   4 +-
 src/gallium/drivers/llvmpipe/lp_screen.c   |  19 +-
 src/gallium/drivers/llvmpipe/lp_setup.c|  12 +-
 src/gallium/drivers/llvmpipe/lp_setup.h|   4 +-
 src/gallium/drivers/noop/noop_pipe.c   |  16 +-
 src/gallium/drivers/nouveau/nouveau_fence.h|   2 +-
 src/gallium/drivers/nouveau/nouveau_screen.c   |  12 +-
 src/gallium/drivers/nouveau/nv30/nv30_context.c|   2 +-
 src/gallium/drivers/nouveau/nv30/nv30_screen.c |   3 +-
 src/gallium/drivers/nouveau/nv50/nv50_context.c|   2 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |   3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context

Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2017-12-21 Thread Rob Clark
On Thu, Dec 21, 2017 at 3:36 AM, Daniel Vetter  wrote:
> On Thu, Dec 21, 2017 at 9:06 AM, James Jones  wrote:
>> However, making some assumptions, I suspect it's probably going to come down
>> to yes we can fit what we need in some number of bits marginally less than
>> 56 now, with the current use cases and hardware, but we're very concerned
>> about extensibility given the number has only ever grown in our HW, is
>> uncomfortably close to the limit if it isn't over it already, and it's been
>> demonstrated it takes a monumental effort to change the mechanism if it
>> isn't extensible.  While it's hard to change the mechanism one more time
>> now, better to change it to something truly extensible now because it will
>> be much, much harder to make such a change ~5 years from now in a world
>> where it's baked in to pervasively deployed Wayland and X protocol, the EGL
>> and Vulkan extensions have been defined for a few years and in use by apps
>> besides Wayland, and the allocator stuff is deployed on ~5 operating systems
>> that have some derivative version of DRM modifiers to support it and a bunch
>> of funky embedded apps using it.  Further, we're volunteering to handle the
>> bulk of the effort needed to make the change now, so I hope architectural
>> correctness and maintainability can be the primary points of debate.
>
> I think that's already happened. So no matter what we do, we're going
> to live with an ecosystem that uses modifiers all over the place in 5
> years. Even if it's not fully pervasive we will have to keep the
> support around for 10 years (at least on the kernel side).
>
> So the option is between reving the entire ecosystem now, or reving it
> in a few years when the current scheme has run out of steam for good.
> And I much prefer the 2nd option for the simple reason that by then
> the magic 8ball has gained another 5 years of clarity for looking into
> the future.

Drive by comment (and disclaimer, haven't had chance to read rest of
thread yet), but I think there is a reasonable path to increase the
modifier space to something like 2^^568 (minus the cases were
modifiers[0]==modifiers[1]==modifiers[2]==modifiers[3]).. (Yeah, yeah,
 I'm sure there is a 640k should be enough joke here somewhere)

Fortunately currently modifiers array is at end of 'struct
drm_mode_fb_cmd2', so there maybe some other options to extend it as
well.  Possibly reserving the modifier value ~0 now might be a good
idea.

It does seem like, if possible, starting out with modifiers for now at
the kernel interface would make life easier, vs trying to reinvent
both kernel and userspace APIs at the same time.  Userspace APIs are
easier to change or throw away.  Presumably by the time we get to the
point of changing kernel uabi, we are already using, and pretty happy
with, serialized liballoc data over the wire in userspace so it is
only a matter of changing the kernel interface.

The downside of this is needing a per-driver userspace bit to map
liballoc to modifiers.  We kinda have this already in mesa, even for
the modesetting-only drivers that can be paired with a render-only
driver.

BR,
-R

> I think in the interim figuring out how to expose kms capabilities
> better (and necessarily standardizing at least some of them which
> matter at the compositor level, like size limits of framebuffers)
> feels like the place to push the ecosystem forward. In some way
> Miguel's proposal looks a bit backwards, since it adds the pitch
> capabilities to addfb, but at addfb time you've allocated everything
> already, so way too late to fix things up. With modifiers we've added
> a very simple per-plane property to list which modifiers can be
> combined with which pixel formats. Tiny start, but obviously very far
> from all that we'll need.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radv: handle depth/stencil image copy with layouts better. (v3)

2017-12-21 Thread Bas Nieuwenhuizen
Just a stylistic nit.

On Thu, Dec 21, 2017 at 8:25 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> If we are doing a general->general transfer with HIZ enabled,
> we want to hit the tile surface disable bits in radv_emit_fb_ds_state,
> however we never get the current layout to know we are in general
> and meta hardcoded the transfer layout which is always tile enabled.
>
> This fixes:
> dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.depth_stencil.d32_sfloat_s8_uint_d32_sfloat_s8_uint.optimal_general
> dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.depth_stencil.d32_sfloat_s8_uint_d32_sfloat_s8_uint.general_general
>
> v2: refactor some shared helpers for blit patches
> v3: we only need multiple render passes as they should be compatible.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_meta.h|   1 +
>  src/amd/vulkan/radv_meta_blit2d.c | 130 
> --
>  src/amd/vulkan/radv_meta_copy.c   |  20 +-
>  src/amd/vulkan/radv_private.h |  18 +-
>  4 files changed, 104 insertions(+), 65 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
> index d10ec99413..3edf5fa646 100644
> --- a/src/amd/vulkan/radv_meta.h
> +++ b/src/amd/vulkan/radv_meta.h
> @@ -109,6 +109,7 @@ struct radv_meta_blit2d_surf {
> unsigned level;
> unsigned layer;
> VkImageAspectFlags aspect_mask;
> +   VkImageLayout current_layout;
>  };
>
>  struct radv_meta_blit2d_buffer {
> diff --git a/src/amd/vulkan/radv_meta_blit2d.c 
> b/src/amd/vulkan/radv_meta_blit2d.c
> index 08a1bae7c6..31570656ef 100644
> --- a/src/amd/vulkan/radv_meta_blit2d.c
> +++ b/src/amd/vulkan/radv_meta_blit2d.c
> @@ -278,10 +278,11 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer 
> *cmd_buffer,
>
> bind_pipeline(cmd_buffer, src_type, fs_key);
> } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
> +   int idx = 
> radv_meta_blit_ds_to_type(dst->current_layout);
> 
> radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
> 
> &(VkRenderPassBeginInfo) {
> .sType = 
> VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
> -   
> .renderPass = device->meta_state.blit2d.depth_only_rp,
> +   
> .renderPass = device->meta_state.blit2d.depth_only_rp[idx],
> 
> .framebuffer = dst_temps.fb,
> 
> .renderArea = {
> 
> .offset = { rects[r].dst_x, rects[r].dst_y, },
> @@ -295,10 +296,11 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer 
> *cmd_buffer,
> bind_depth_pipeline(cmd_buffer, src_type);
>
> } else if (aspect_mask == 
> VK_IMAGE_ASPECT_STENCIL_BIT) {
> +   int idx = 
> radv_meta_blit_ds_to_type(dst->current_layout);
> 
> radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
> 
> &(VkRenderPassBeginInfo) {
> .sType = 
> VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
> -   
> .renderPass = device->meta_state.blit2d.stencil_only_rp,
> +   
> .renderPass = device->meta_state.blit2d.stencil_only_rp[idx],
> 
> .framebuffer = dst_temps.fb,
> 
> .renderArea = {
> 
> .offset = { rects[r].dst_x, rects[r].dst_y, },
> @@ -614,10 +616,12 @@ radv_device_finish_meta_blit2d_state(struct radv_device 
> *device)
>&state->alloc);
> }
>
> -   radv_DestroyRenderPass(radv_device_to_handle(device),
> -  state->blit2d.depth_only_rp, &state->alloc);
> -   radv_DestroyRenderPass(radv_device_to_handle(device),
> -  state->blit2d.stencil_only_rp, &state->alloc);
> +   for (unsigned j = 0; j < RADV_BLIT_DS_NUM_LAYOUTS; j++) {
> +   radv_DestroyRenderPass(radv_device_to_handle(device),
> +  state->blit2d.depth_only_rp[j], 
> &state->alloc);
> +   radv_DestroyRenderPass(radv_device_to_handle(device),
> +  

Re: [Mesa-dev] [PATCH 1/4] radv: fix surface max layer count

2017-12-21 Thread Bas Nieuwenhuizen
On Thu, Dec 21, 2017 at 2:50 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> looking at traces I noticed we'd set slice_max too large sometimes.

Too small? Otherwise patch 1,3,4 are also

Reviewed-by: Bas Nieuwenhuizen 

>
> This should fix it.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_device.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 5148828..9ea2b92 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -3001,9 +3001,9 @@ si_tile_mode_index(const struct radv_image *image, 
> unsigned level, bool stencil)
> return image->surface.u.legacy.tiling_index[level];
>  }
>
> -static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
> +static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
>  {
> -   return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : 
> iview->layer_count;
> +   return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : 
> (iview->base_layer + iview->layer_count);
>  }
>
>  static void
> @@ -3084,9 +3084,9 @@ radv_initialise_color_surface(struct radv_device 
> *device,
> cb->cb_dcc_base = va >> 8;
> cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
>
> -   uint32_t max_slice = radv_surface_layer_count(iview);
> +   uint32_t max_slice = radv_surface_max_layer_count(iview);
> cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
> -   S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
> +   S_028C6C_SLICE_MAX(max_slice);
>
> if (iview->image->info.samples > 1) {
> unsigned log_samples = 
> util_logbase2(iview->image->info.samples);
> @@ -3231,9 +3231,9 @@ radv_initialise_ds_surface(struct radv_device *device,
> stencil_format = iview->image->surface.has_stencil ?
> V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
>
> -   uint32_t max_slice = radv_surface_layer_count(iview);
> +   uint32_t max_slice = radv_surface_max_layer_count(iview);
> ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
> -   S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
> +   S_028008_SLICE_MAX(max_slice);
>
> ds->db_htile_data_base = 0;
> ds->db_htile_surface = 0;
> @@ -3397,7 +3397,7 @@ VkResult radv_CreateFramebuffer(
> }
> framebuffer->width = MIN2(framebuffer->width, 
> iview->extent.width);
> framebuffer->height = MIN2(framebuffer->height, 
> iview->extent.height);
> -   framebuffer->layers = MIN2(framebuffer->layers, 
> radv_surface_layer_count(iview));
> +   framebuffer->layers = MIN2(framebuffer->layers, 
> radv_surface_max_layer_count(iview));
> }
>
> *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
> --
> 2.9.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] radv/gfx9: add 3d sampler image->buffer copy shader. (v3)

2017-12-21 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Thu, Dec 21, 2017 at 2:50 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> On GFX9 we must access 3D textures with 3D samplers AFAICS.
>
> This fixes:
> dEQP-VK.api.image_clearing.core.clear_color_image.3d.single_layer
>
> on GFX9 for me.
>
> v1.1: fix tex->sampler_dim to dim
> v2: send layer in from outside
> v3: don't regress on pre-gfx9
>
> Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."'
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_meta_bufimage.c | 76 
> -
>  src/amd/vulkan/radv_private.h   |  1 +
>  2 files changed, 59 insertions(+), 18 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
> b/src/amd/vulkan/radv_meta_bufimage.c
> index dfd99aa..a1e67b6 100644
> --- a/src/amd/vulkan/radv_meta_bufimage.c
> +++ b/src/amd/vulkan/radv_meta_bufimage.c
> @@ -29,11 +29,15 @@
>   * Compute queue: implementation also of buffer->image, image->image, and 
> image clear.
>   */
>
> +/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has 
> the options
> + * for that.
> + */
>  static nir_shader *
> -build_nir_itob_compute_shader(struct radv_device *dev)
> +build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
>  {
> nir_builder b;
> -   const struct glsl_type *sampler_type = 
> glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
> +   enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : 
> GLSL_SAMPLER_DIM_2D;
> +   const struct glsl_type *sampler_type = glsl_sampler_type(dim,
>  false,
>  false,
>  
> GLSL_TYPE_FLOAT);
> @@ -42,7 +46,7 @@ build_nir_itob_compute_shader(struct radv_device *dev)
>  false,
>  GLSL_TYPE_FLOAT);
> nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
> -   b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs");
> +   b.shader->info.name = ralloc_strdup(b.shader, is_3d ? 
> "meta_itob_cs_3d" : "meta_itob_cs");
> b.shader->info.cs.local_size[0] = 16;
> b.shader->info.cs.local_size[1] = 16;
> b.shader->info.cs.local_size[2] = 1;
> @@ -69,32 +73,31 @@ build_nir_itob_compute_shader(struct radv_device *dev)
>
> nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, 
> nir_intrinsic_load_push_constant);
> nir_intrinsic_set_base(offset, 0);
> -   nir_intrinsic_set_range(offset, 12);
> +   nir_intrinsic_set_range(offset, 16);
> offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
> -   offset->num_components = 2;
> -   nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
> +   offset->num_components = is_3d ? 3 : 2;
> +   nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, 
> "offset");
> nir_builder_instr_insert(&b, &offset->instr);
>
> nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, 
> nir_intrinsic_load_push_constant);
> nir_intrinsic_set_base(stride, 0);
> -   nir_intrinsic_set_range(stride, 12);
> -   stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
> +   nir_intrinsic_set_range(stride, 16);
> +   stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
> stride->num_components = 1;
> nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
> nir_builder_instr_insert(&b, &stride->instr);
>
> nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
> -
> nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
> -   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
> +   tex->sampler_dim = dim;
> tex->op = nir_texop_txf;
> tex->src[0].src_type = nir_tex_src_coord;
> -   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, 0x3));
> +   tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 
> 0x7 : 0x3));
> tex->src[1].src_type = nir_tex_src_lod;
> tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
> tex->dest_type = nir_type_float;
> tex->is_array = false;
> -   tex->coord_components = 2;
> +   tex->coord_components = is_3d ? 3 : 2;
> tex->texture = nir_deref_var_create(tex, input_img);
> tex->sampler = NULL;
>
> @@ -126,8 +129,11 @@ radv_device_init_meta_itob_state(struct radv_device 
> *device)
>  {
> VkResult result;
> struct radv_shader_module cs = { .nir = NULL };
> +   struct radv_shader_module cs_3d = { .nir = NULL };
>
> -   cs.nir = build_nir_itob_compute_shader(device);
> +   cs.nir = build_nir_itob_compute_shader(device, false);
> +   if (device->physical_device->rad_info.chip_class >= GFX9)
> +

Re: [Mesa-dev] [PATCH 2/2] amd/common: add ac_export_mrt_z() helper

2017-12-21 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.

On Thu, Dec 21, 2017 at 5:53 PM, Samuel Pitoiset
 wrote:
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c  | 64 +++-
>  src/amd/common/ac_shader_util.c  | 72 
> 
>  src/amd/common/ac_shader_util.h  |  6 +++
>  src/gallium/drivers/radeonsi/si_shader.c | 61 +--
>  4 files changed, 84 insertions(+), 119 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 93624ee419..a90dd9f0a5 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -6309,67 +6309,13 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
>  }
>
>  static void
> -si_export_mrt_z(struct nir_to_llvm_context *ctx,
> -   LLVMValueRef depth, LLVMValueRef stencil,
> -   LLVMValueRef samplemask)
> +radv_export_mrt_z(struct nir_to_llvm_context *ctx,
> + LLVMValueRef depth, LLVMValueRef stencil,
> + LLVMValueRef samplemask)
>  {
> struct ac_export_args args;
>
> -   args.enabled_channels = 0;
> -   args.valid_mask = 1;
> -   args.done = 1;
> -   args.target = V_008DFC_SQ_EXP_MRTZ;
> -   args.compr = false;
> -
> -   args.out[0] = LLVMGetUndef(ctx->ac.f32); /* R, depth */
> -   args.out[1] = LLVMGetUndef(ctx->ac.f32); /* G, stencil test val[0:7], 
> stencil op val[8:15] */
> -   args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
> -   args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
> -
> -   unsigned format = ac_get_spi_shader_z_format(depth != NULL,
> -stencil != NULL,
> -samplemask != NULL);
> -
> -   if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
> -   assert(!depth);
> -   args.compr = 1; /* COMPR flag */
> -
> -   if (stencil) {
> -   /* Stencil should be in X[23:16]. */
> -   stencil = ac_to_integer(&ctx->ac, stencil);
> -   stencil = LLVMBuildShl(ctx->builder, stencil,
> -  LLVMConstInt(ctx->ac.i32, 16, 
> 0), "");
> -   args.out[0] = ac_to_float(&ctx->ac, stencil);
> -   args.enabled_channels |= 0x3;
> -   }
> -   if (samplemask) {
> -   /* SampleMask should be in Y[15:0]. */
> -   args.out[1] = samplemask;
> -   args.enabled_channels |= 0xc;
> -   }
> -   } else {
> -   if (depth) {
> -   args.out[0] = depth;
> -   args.enabled_channels |= 0x1;
> -   }
> -
> -   if (stencil) {
> -   args.out[1] = stencil;
> -   args.enabled_channels |= 0x2;
> -   }
> -
> -   if (samplemask) {
> -   args.out[2] = samplemask;
> -   args.enabled_channels |= 0x4;
> -   }
> -   }
> -
> -   /* SI (except OLAND and HAINAN) has a bug that it only looks
> -* at the X writemask component. */
> -   if (ctx->options->chip_class == SI &&
> -   ctx->options->family != CHIP_OLAND &&
> -   ctx->options->family != CHIP_HAINAN)
> -   args.enabled_channels |= 0x1;
> +   ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
>
> ac_build_export(&ctx->ac, &args);
>  }
> @@ -6417,7 +6363,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
> for (unsigned i = 0; i < index; i++)
> ac_build_export(&ctx->ac, &color_args[i]);
> if (depth || stencil || samplemask)
> -   si_export_mrt_z(ctx, depth, stencil, samplemask);
> +   radv_export_mrt_z(ctx, depth, stencil, samplemask);
> else if (!index) {
> si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, 
> &color_args[0]);
> ac_build_export(&ctx->ac, &color_args[0]);
> diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
> index 12f86dc677..531395f4f6 100644
> --- a/src/amd/common/ac_shader_util.c
> +++ b/src/amd/common/ac_shader_util.c
> @@ -22,7 +22,10 @@
>   */
>
>  #include 
> +#include 
> +#include 
>
> +#include "ac_nir_to_llvm.h"
>  #include "ac_shader_util.h"
>  #include "sid.h"
>
> @@ -105,3 +108,72 @@ ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class 
> chip_class)
>S_028A40_GS_WRITE_OPTIMIZE(1) |
>S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
>  }
> +
> +void
> +ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
> +   LLVMValueRef stencil, LLVMValueRef samplemask,
> +   struct ac_export_args *a

Re: [Mesa-dev] [PATCH] radv: reduce the number of small surfaces that need CMASK or DCC

2017-12-21 Thread Bas Nieuwenhuizen
Assuming you tested with vega,

Reviewed-by: Bas Nieuwenhuizen 

On Thu, Dec 21, 2017 at 5:45 PM, Samuel Pitoiset
 wrote:
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_image.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index efd17e4889..15410f140e 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -813,7 +813,7 @@ static inline bool
>  radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
>  {
> if (image->info.samples <= 1 &&
> -   image->info.width <= 512 && image->info.height <= 512) {
> +   image->info.width * image->info.height <= 512 * 512) {
> /* Do not enable CMASK or DCC for small surfaces where the 
> cost
>  * of the eliminate pass can be higher than the benefit of 
> fast
>  * clear. RadeonSI does this, but the image threshold is
> --
> 2.15.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] spirv: avoid infinite loop / freeze in vtn_cfg_walk_blocks()

2017-12-21 Thread Mark Janes
This patch doesn't apply to master as formatted.

I've reverted the bisected commit, since it disables testing on master.

Eero Tamminen  writes:

> Fixes: 9702fac68e (spirv: consider bitsize when handling OpSwitch cases)
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104359
> ---
>   src/compiler/spirv/vtn_cfg.c | 14 +++---
>   1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
> index 9c4cbe2..3d5de37 100644
> --- a/src/compiler/spirv/vtn_cfg.c
> +++ b/src/compiler/spirv/vtn_cfg.c
> @@ -549,19 +549,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct 
> list_head *cf_list,
>   struct vtn_block *case_block =
>  vtn_value(b, *w, vtn_value_type_block)->block;
>
> -if (case_block == break_block)
> -   continue;
> -
> -vtn_assert(case_block->switch_case);
> -
> -vtn_order_case(swtch, case_block->switch_case);
> -
>   if (bitsize <= 32) {
>  w += 2;
>   } else {
>  assert(bitsize == 64);
>  w += 3;
>   }
> +
> +if (case_block == break_block)
> +   continue;
> +
> +vtn_assert(case_block->switch_case);
> +
> +vtn_order_case(swtch, case_block->switch_case);
>}
>
>enum vtn_branch_type branch_type =
> --
> 2.7.4
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] intel/fs/bank_conflicts: Use posix_memalign() instead of overaligned new to obtain vector storage.

2017-12-21 Thread Ian Romanick
Series is

Reviewed-by: Ian Romanick 

On 12/18/2017 03:26 PM, Francisco Jerez wrote:
> The weight_vector_type constructor was inadvertently assuming C++17
> semantics of the new operator applied on a type with alignment
> requirement greater than the largest fundamental alignment.
> Unfortunately on earlier C++ dialects the implementation was allowed
> to raise an allocation failure when the alignment requirement of the
> allocated type was unsupported, in an implementation-defined fashion.
> It's expected that a C++ implementation recent enough to implement
> P0035R4 would have honored allocation requests for such over-aligned
> types even if the C++17 dialect wasn't active, which is likely the
> reason why this problem wasn't caught by our CI system.
> 
> A more elegant fix would involve wrapping the __SSE2__ block in a
> '__cpp_aligned_new >= 201606' preprocessor conditional and continue
> taking advantage of the language feature, but that would yield lower
> compile-time performance on old compilers not implementing it
> (e.g. GCC versions older than 7.0).
> 
> Fixes: af2c320190f3c731 "intel/fs: Implement GRF bank conflict mitigation 
> pass."
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104226
> Reported-by: Józef Kucia 
> ---
>  src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 --
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp 
> b/src/intel/compiler/brw_fs_bank_conflicts.cpp
> index 0cd880d44f2..e87fcbfc5eb 100644
> --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
> +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
> @@ -277,13 +277,10 @@ namespace {
> struct weight_vector_type {
>weight_vector_type() : v(NULL), size(0) {}
>  
> -  weight_vector_type(unsigned n) :
> - v(new vector_type[DIV_ROUND_UP(n, vector_width)]()),
> - size(n) {}
> +  weight_vector_type(unsigned n) : v(alloc(n)), size(n) {}
>  
>weight_vector_type(const weight_vector_type &u) :
> - v(new vector_type[DIV_ROUND_UP(u.size, vector_width)]()),
> - size(u.size)
> + v(alloc(u.size)), size(u.size)
>{
>   memcpy(v, u.v,
>  DIV_ROUND_UP(u.size, vector_width) * sizeof(vector_type));
> @@ -291,7 +288,7 @@ namespace {
>  
>~weight_vector_type()
>{
> - delete[] v;
> + free(v);
>}
>  
>weight_vector_type &
> @@ -304,6 +301,19 @@ namespace {
>  
>vector_type *v;
>unsigned size;
> +
> +   private:
> +  static vector_type *
> +  alloc(unsigned n)
> +  {
> + const unsigned align = MAX2(sizeof(void *), 
> __alignof__(vector_type));
> + const unsigned size = DIV_ROUND_UP(n, vector_width) * 
> sizeof(vector_type);
> + void *p;
> + if (posix_memalign(&p, align, size))
> +return NULL;
> + memset(p, 0, size);
> + return reinterpret_cast(p);
> +  }
> };
>  
> /**
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] radv: gfx9 3d image fixes

2017-12-21 Thread Dave Airlie
On 21 December 2017 at 19:52, Alex Smith  wrote:
> Nice - this does fix the issue I was seeing, thanks.
>
> Can at least patches 2 and 3 go to stable?

Yes once I get some review I'll tag those two for stable.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] spirv: avoid infinite loop / freeze in vtn_cfg_walk_blocks()

2017-12-21 Thread Jason Ekstrand

Rb


On December 21, 2017 08:05:24 Lionel Landwerlin 
 wrote:



Reviewed-by: Lionel Landwerlin 

On 21/12/17 13:53, Eero Tamminen wrote:

Fixes: 9702fac68e (spirv: consider bitsize when handling OpSwitch cases)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104359
---
 src/compiler/spirv/vtn_cfg.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 9c4cbe2..3d5de37 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -549,19 +549,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b,
struct list_head *cf_list,
 struct vtn_block *case_block =
    vtn_value(b, *w, vtn_value_type_block)->block;

-    if (case_block == break_block)
-   continue;
-
-    vtn_assert(case_block->switch_case);
-
-    vtn_order_case(swtch, case_block->switch_case);
-
 if (bitsize <= 32) {
    w += 2;
 } else {
    assert(bitsize == 64);
    w += 3;
 }
+
+    if (case_block == break_block)
+   continue;
+
+    vtn_assert(case_block->switch_case);
+
+    vtn_order_case(swtch, case_block->switch_case);
  }

  enum vtn_branch_type branch_type =
--
2.7.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104351] X Error of failed request: BadAlloc (insufficient resources for operation)

2017-12-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104351

--- Comment #2 from Breno Souza  ---
Same error on fresh install. I have installed the following packages:

base, base-devel, gnome, gnome-extra, freeglut, qt5-base, cower, pacaur,
google-chrome (from aur), mesa-demos

I also have lost access to mesa 17.2.6-1 since I haven't saved it from my
previously installation.

As of $ LD_DEBUG=libs glxinfo, binaries are located under /usr/lib. It halted
on this line:

 10721: calling init: /usr/lib/libnss_files.so.2
 10721:

I don't "make install" anything, maybe pacaur did. I don't know what to check
in the disowned list.

I forget to mention that I use a Radeon HD 7850. Could it be something in my
setup still? Is this distro related?

Thanks for your time,
Breno Souza

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHv2] intel/fs: Optimize and simplify the copy propagation dataflow logic.

2017-12-21 Thread Francisco Jerez
Eero Tamminen  writes:

> Hi,
>
> I tested this on HSW GT2, BXT & SKL GT3e, and didn't see any significant 
> regressions this time.  I'll try it also on a machine with smaller 
> variance than those (now that it became free), and send a note if that 
> does show something.
>

You're not expected to be able to see any performance changes with this
revision except in test-cases bound to the shader compilation
performance (e.g. SynMark OglDrvShComp), and only if they do complex
enough CFGs for the quadratic factor described below to be significant.

>   - Eero
>
> On 20.12.2017 21:27, Francisco Jerez wrote:
>> Previously the dataflow propagation algorithm would calculate the ACP
>> live-in and -out sets in a two-pass fixed-point algorithm.  The first
>> pass would update the live-out sets of all basic blocks of the program
>> based on their live-in sets, while the second pass would update the
>> live-in sets based on the live-out sets.  This is incredibly
>> inefficient in the typical case where the CFG of the program is
>> approximately acyclic, because it can take up to 2*n passes for an ACP
>> entry introduced at the top of the program to reach the bottom (where
>> n is the number of basic blocks in the program), until which point the
>> algorithm won't be able to reach a fixed point.
>> 
>> The same effect can be achieved in a single pass by computing the
>> live-in and -out sets in lock-step, because that makes sure that
>> processing of any basic block will pick up the updated live-out sets
>> of the lexically preceding blocks.  This gives the dataflow
>> propagation algorithm effectively O(n) run-time instead of O(n^2) in
>> the acyclic case.
>> 
>> The time spent in dataflow propagation is reduced by 30x in the
>> GLES31.functional.ssbo.layout.random.all_shared_buffer.5 dEQP
>> test-case on my CHV system (the improvement is likely to be of the
>> same order of magnitude on other platforms).  This more than reverses
>> an apparent run-time regression in this test-case from my previous
>> copy-propagation undefined-value handling patch, which was ultimately
>> caused by the additional work introduced in that commit to account for
>> undefined values being multiplied by a huge quadratic factor.
>> 
>> According to Chad this test was failing on CHV due to a 30s time-out
>> imposed by the Android CTS (this was the case regardless of my
>> undefined-value handling patch, even though my patch substantially
>> exacerbated the issue).  On my CHV system this patch reduces the
>> overall run-time of the test by approximately 12x, getting us to
>> around 13s, well below the time-out.
>> 
>> v2: Initialize live-out set to the universal set to avoid rather
>>  pessimistic dataflow estimation in shaders with cycles (Addresses
>>  performance regression reported by Eero in GpuTest Piano).
>>  Performance numbers given above still apply.  No shader-db changes
>>  with respect to master.
>> 
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104271
>> Reported-by: Chad Versace 
>> ---
>>   src/intel/compiler/brw_fs_copy_propagation.cpp | 35 
>> --
>>   1 file changed, 11 insertions(+), 24 deletions(-)
>> 
>> diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
>> b/src/intel/compiler/brw_fs_copy_propagation.cpp
>> index af5635eacef..92cc0a8de58 100644
>> --- a/src/intel/compiler/brw_fs_copy_propagation.cpp
>> +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
>> @@ -186,8 +186,7 @@ fs_copy_prop_dataflow::setup_initial_values()
>>   
>>  /* Populate the initial values for the livein and liveout sets.  For the
>>   * block at the start of the program, livein = 0 and liveout = copy.
>> -* For the others, set liveout to 0 (the empty set) and livein to ~0
>> -* (the universal set).
>> +* For the others, set liveout and livein to ~0 (the universal set).
>>   */
>>  foreach_block (block, cfg) {
>> if (block->parents.is_empty()) {
>> @@ -197,7 +196,7 @@ fs_copy_prop_dataflow::setup_initial_values()
>>}
>> } else {
>>for (int i = 0; i < bitset_words; i++) {
>> -bd[block->num].liveout[i] = 0u;
>> +bd[block->num].liveout[i] = ~0u;
>>   bd[block->num].livein[i] = ~0u;
>>}
>> }
>> @@ -228,34 +227,17 @@ fs_copy_prop_dataflow::run()
>>  do {
>> progress = false;
>>   
>> -  /* Update liveout for all blocks. */
>> foreach_block (block, cfg) {
>>if (block->parents.is_empty())
>>   continue;
>>   
>>for (int i = 0; i < bitset_words; i++) {
>>   const BITSET_WORD old_liveout = bd[block->num].liveout[i];
>> -
>> -bd[block->num].liveout[i] =
>> -   bd[block->num].copy[i] | (bd[block->num].livein[i] &
>> - ~bd[block->num].kill[i]);
>> -
>> -if (old_liveout != bd[block->num].liveout[i])
>> -   progress

Re: [Mesa-dev] [RFC libdrm 0/5] Move alloc_handle_t from gralloc impls.

2017-12-21 Thread Rob Herring
On Wed, Dec 13, 2017 at 5:02 PM, Gurchetan Singh
 wrote:
> Hi Robert,
>
> Thanks for looking into this!  We need to decide if we want:
>
> (1) A common struct that implementations can subclass, i.e:
>
> struct blah_gralloc_handle {
> alloc_handle_t alloc_handle;
> int x, y, z;
> 
> }
>
> (2) An accessor library that vendors can implement, i.e:
>
> struct drmAndroidHandleInfo {
>uint32_t (*get_fourcc)(buffer_handle_t handle);
>uint32_t (*get_stride)(buffer_handle_t handle, uint32_t plane);
>uint32_t (*get_offsets)(buffer_handle_t handle, uint32_t plane);
>uint64_t (*get_modifier)(buffer_handle_t handle);
> };
>
> From my perspective as someone who has to maintain the minigbm gralloc
> implementation, (2) is preferable since:

Yeah, I'd prefer not to encourage 1 as the default.

> a) We really don't have a need for fields like data_owner, void *data, etc.

We should be able to get rid of this. It's just for tracking imports.

> Also, minigbm puts per plane fds, strides, offsets into the handle.
> Separating the information for the first plane (for the alloc_handle_t) and
> then rest of the planes would be annoying.

The plan is to add those to alloc_handle_t.

> b) we can avoid the struct within a struct that happens when we subclass,
> since alignment/padding issues often pop up during
> serialization/de-serialization.  Using __attribute__((aligned(xx))) is less
> portable than maintaining a POD struct.

Yes. Even just between 32 and 64 bit it's problematic.

> c) IMO creating the handle should be left to the gralloc implementation.
> Having accessor functions clearly defines what we need from libdrm -- to
> make up for shortcomings of the gralloc API for DRM/KMS use cases.
>
>
> On Wed, Dec 13, 2017 at 9:30 AM, Robert Foss 
> wrote:
>>
>> This series moves {gbm,drm,cros}_gralloc_handle_t struct to libdrm,
>> since at least 4 implementations exist, and share a lot of contents.
>> The idea is to keep the common stuff defined in one place, and libdrm
>> is the common codebase to all of these platforms.
>>
>> Additionally, having this struct defined in libdrm will make it
>> easier for mesa and grallocs to communicate.
>>
>> Curretly missing is:
>>  - Planar formats
>>  - Get/Set functions
>>
>>
>> Planar formats
>> --
>> Support for planar formats is needed, but has not been added
>> yet, mostly since this was not already implemented in {gbm,drm}_gralloc
>> and the fact the having at least initial backwards compatability would
>> be nice. Anonymous unions can of course be used later on to provide
>> backwards compatability if so desired.
>>
>>
>> Get/Set functions
>> -
>> During the previous discussion[1] one suggestion was to add accessor
>> functions. In this RFC I've only provided a alloc_handle_create()
>> function.
>>
>> The Get/Set functions have not been added yet, I was hoping for some
>> conclusive arguments for them being adeded.
>>
>> Lastly it was suggested by Rob Herring that having a fourcc<->android
>> pixel format conversion function would be useful.
>>
>>
>> [1]
>> https://lists.freedesktop.org/archives/mesa-dev/2017-November/178199.html
>>
>> Robert Foss (5):
>>   android: Move gralloc handle struct to libdrm
>>   android: Add version variable to alloc_handle_t
>>   android: Mark alloc_handle_t magic variable as const
>>   android: Remove member name from alloc_handle_t
>>   android: Change alloc_handle_t format from Android format to fourcc
>>
>>  Android.mk   |  8 +++-
>>  Makefile.sources |  3 ++
>>  android/alloc_handle.h   | 87
>> 
>>  android/gralloc_drm_handle.h |  1 +
>>  4 files changed, 97 insertions(+), 2 deletions(-)
>>  create mode 100644 android/alloc_handle.h
>>  create mode 12 android/gralloc_drm_handle.h
>>
>> --
>> 2.14.1
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 17.3.1

2017-12-21 Thread Emil Velikov
Mesa 17.3.1 is now available.

In this release we have:

Multiple fixes and improvements of the GLSL shader cache. The RADV driver
no longer advertises VK_EXT_debug_report - there is no support for it.

The i965, radeonsi, nvc0 and freedreno drivers have received a few small
fixes each.

Last but not least - a number of big endian fixes have been merged.


Alex Smith (1):
  radv: Add LLVM version to the device name string

Bas Nieuwenhuizen (3):
  spirv: Fix loading an entire block at once.
  radv: Don't advertise VK_EXT_debug_report.
  radv: Fix multi-layer blits.

Ben Crocker (1):
  docs/llvmpipe: document ppc64le as alternative architecture to x86.

Brian Paul (2):
  xlib: call _mesa_warning() instead of fprintf()
  gallium/aux: include nr_samples in util_resource_size() computation

Bruce Cherniak (1):
  swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.

Dave Airlie (1):
  radv: port merge tess info from anv

Emil Velikov (6):
  docs: add sha256 checksums for 17.3.0
  util: scons: wire up the sha1 test
  cherry-ignore: meson: fix strtof locale support check
  cherry-ignore: util: add mesa-sha1 test to meson
  Update version to 17.3.1
  docs: add release notes for 17.3.1

Eric Anholt (1):
  broadcom/vc4: Fix handling of GFXH-515 workaround with a start
vertex count.

Eric Engestrom (1):
  compiler: use NDEBUG to guard asserts

Fabian Bieler (2):
  glsl: Match order of gl_LightSourceParameters elements.
  glsl: Fix gl_NormalScale.

Gert Wollny (1):
  r600/sb: do not convert if-blocks that contain indirect array access

James Legg (1):
  nir/opcodes: Fix constant-folding of bitfield_insert

Jason Ekstrand (1):
  i965: Switch over to fully external-or-not MOCS scheme

Juan A. Suarez Romero (1):
  travis: disable Meson build

Kenneth Graunke (2):
  meta: Initialize depth/clear values on declaration.
  meta: Fix ClearTexture with GL_DEPTH_COMPONENT.

Leo Liu (1):
  radeon/vce: move destroy command before feedback command

Marek Olšák (4):
  radeonsi: flush the context after resource_copy_region for buffer exports
  radeonsi: allow DMABUF exports for local buffers
  winsys/amdgpu: disable local BOs again due to worse performance
  radeonsi: don't call force_dcc_off for buffers

Matt Turner (2):
  util: Assume little endian in the absence of platform-specific handling
  util: Add a SHA1 unit test program

Nicolai Hähnle (1):
  radeonsi: fix the R600_RESOURCE_FLAG_UNMAPPABLE check

Pierre Moreau (1):
  nvc0/ir: Properly lower 64-bit shifts when the shift value is >32

Timothy Arceri (1):
  glsl: get correct member type when processing xfb ifc arrays

Vadym Shovkoplias (2):
  glx/dri3: Remove unused deviceName variable
  util/disk_cache: Remove unneeded free() on always null string

git tag: mesa-17.3.1

https://mesa.freedesktop.org/archive/mesa-17.3.1.tar.gz
MD5:  9e0a9337ff81031cdb77482074568a0e  mesa-17.3.1.tar.gz
SHA1: e348e35cb39c1d87150107e976278b5f4c077e70  mesa-17.3.1.tar.gz
SHA256: b0bb0419dbe3043ed4682a28eaf95721f427ca3f23a3c2a7dc77dbe8a3b6384d
 mesa-17.3.1.tar.gz
SHA512: 
2340d1699f4e09bc94caee153c3f80197340639d2ade8eda7ff33492d1d7d83c7ae93b2495d9748f09b343c96512ccb904dfd86fea3866c8e073ff88f0422e5d
 mesa-17.3.1.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-17.3.1.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-17.3.1.tar.xz
MD5:  b4b021279ea21e1eb8a1369afa6f19a0  mesa-17.3.1.tar.xz
SHA1: 1bda76199addb01946e5a6cd35e00a05540b7eb0  mesa-17.3.1.tar.xz
SHA256: 9ae607e0998a586fb2c866cfc8e45e6f52d1c56cb1b41288253ea83eada824c1
 mesa-17.3.1.tar.xz
SHA512: 
a2bd0123bf2df0db03197001a9e001b25f2609f3c6bf1bae66f3bc50c51d2558b5b77e232e81c1658c3a4dec88eda32a8b5a85c1fedc937a7441476228ef8238
 mesa-17.3.1.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-17.3.1.tar.xz.sig
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] amd/common: pass the family to ac_llvm_context_init()

2017-12-21 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c  | 3 ++-
 src/amd/common/ac_llvm_build.h  | 3 ++-
 src/amd/common/ac_nir_to_llvm.c | 6 --
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index b407678c3b..c74a47a799 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -47,11 +47,12 @@
  */
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
-enum chip_class chip_class)
+enum chip_class chip_class, enum radeon_family family)
 {
LLVMValueRef args[1];
 
ctx->chip_class = chip_class;
+   ctx->family = family;
 
ctx->context = context;
ctx->module = NULL;
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 4a570c41bc..6427d5315a 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -74,13 +74,14 @@ struct ac_llvm_context {
LLVMValueRef empty_md;
 
enum chip_class chip_class;
+   enum radeon_family family;
 
LLVMValueRef lds;
 };
 
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
-enum chip_class chip_class);
+enum chip_class chip_class, enum radeon_family family);
 
 unsigned ac_get_type_size(LLVMTypeRef type);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 554d07b635..93624ee419 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6670,7 +6670,8 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.context = LLVMContextCreate();
ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
 
-   ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
+   ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
+options->family);
ctx.ac.module = ctx.module;
LLVMSetTarget(ctx.module, options->supports_spill ? 
"amdgcn-mesa-mesa3d" : "amdgcn--");
 
@@ -7052,7 +7053,8 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
ctx.options = options;
ctx.shader_info = shader_info;
 
-   ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
+   ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
+options->family);
ctx.ac.module = ctx.module;
 
ctx.is_gs_copy_shader = true;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 0843b3c63c..2ca036e67d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1167,7 +1167,8 @@ void si_llvm_context_init(struct si_shader_context *ctx,
ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
 float_mode);
 
-   ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, 
sscreen->info.chip_class);
+   ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
+sscreen->info.chip_class, sscreen->info.family);
ctx->ac.module = ctx->gallivm.module;
ctx->ac.builder = ctx->gallivm.builder;
 
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] amd/common: add ac_export_mrt_z() helper

2017-12-21 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c  | 64 +++-
 src/amd/common/ac_shader_util.c  | 72 
 src/amd/common/ac_shader_util.h  |  6 +++
 src/gallium/drivers/radeonsi/si_shader.c | 61 +--
 4 files changed, 84 insertions(+), 119 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 93624ee419..a90dd9f0a5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6309,67 +6309,13 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
 }
 
 static void
-si_export_mrt_z(struct nir_to_llvm_context *ctx,
-   LLVMValueRef depth, LLVMValueRef stencil,
-   LLVMValueRef samplemask)
+radv_export_mrt_z(struct nir_to_llvm_context *ctx,
+ LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask)
 {
struct ac_export_args args;
 
-   args.enabled_channels = 0;
-   args.valid_mask = 1;
-   args.done = 1;
-   args.target = V_008DFC_SQ_EXP_MRTZ;
-   args.compr = false;
-
-   args.out[0] = LLVMGetUndef(ctx->ac.f32); /* R, depth */
-   args.out[1] = LLVMGetUndef(ctx->ac.f32); /* G, stencil test val[0:7], 
stencil op val[8:15] */
-   args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
-   args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
-
-   unsigned format = ac_get_spi_shader_z_format(depth != NULL,
-stencil != NULL,
-samplemask != NULL);
-
-   if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
-   assert(!depth);
-   args.compr = 1; /* COMPR flag */
-
-   if (stencil) {
-   /* Stencil should be in X[23:16]. */
-   stencil = ac_to_integer(&ctx->ac, stencil);
-   stencil = LLVMBuildShl(ctx->builder, stencil,
-  LLVMConstInt(ctx->ac.i32, 16, 
0), "");
-   args.out[0] = ac_to_float(&ctx->ac, stencil);
-   args.enabled_channels |= 0x3;
-   }
-   if (samplemask) {
-   /* SampleMask should be in Y[15:0]. */
-   args.out[1] = samplemask;
-   args.enabled_channels |= 0xc;
-   }
-   } else {
-   if (depth) {
-   args.out[0] = depth;
-   args.enabled_channels |= 0x1;
-   }
-
-   if (stencil) {
-   args.out[1] = stencil;
-   args.enabled_channels |= 0x2;
-   }
-
-   if (samplemask) {
-   args.out[2] = samplemask;
-   args.enabled_channels |= 0x4;
-   }
-   }
-
-   /* SI (except OLAND and HAINAN) has a bug that it only looks
-* at the X writemask component. */
-   if (ctx->options->chip_class == SI &&
-   ctx->options->family != CHIP_OLAND &&
-   ctx->options->family != CHIP_HAINAN)
-   args.enabled_channels |= 0x1;
+   ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
 
ac_build_export(&ctx->ac, &args);
 }
@@ -6417,7 +6363,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
for (unsigned i = 0; i < index; i++)
ac_build_export(&ctx->ac, &color_args[i]);
if (depth || stencil || samplemask)
-   si_export_mrt_z(ctx, depth, stencil, samplemask);
+   radv_export_mrt_z(ctx, depth, stencil, samplemask);
else if (!index) {
si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, 
&color_args[0]);
ac_build_export(&ctx->ac, &color_args[0]);
diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
index 12f86dc677..531395f4f6 100644
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@@ -22,7 +22,10 @@
  */
 
 #include 
+#include 
+#include 
 
+#include "ac_nir_to_llvm.h"
 #include "ac_shader_util.h"
 #include "sid.h"
 
@@ -105,3 +108,72 @@ ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class 
chip_class)
   S_028A40_GS_WRITE_OPTIMIZE(1) |
   S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
 }
+
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+   LLVMValueRef stencil, LLVMValueRef samplemask,
+   struct ac_export_args *args)
+{
+   unsigned mask = 0;
+   unsigned format = ac_get_spi_shader_z_format(depth != NULL,
+stencil != NULL,
+samplemask != NULL);
+
+   assert(depth || stencil || samplemask);
+
+   memset(args, 0, sizeof(*args));

[Mesa-dev] [PATCH] radv: reduce the number of small surfaces that need CMASK or DCC

2017-12-21 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_image.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index efd17e4889..15410f140e 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -813,7 +813,7 @@ static inline bool
 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
 {
if (image->info.samples <= 1 &&
-   image->info.width <= 512 && image->info.height <= 512) {
+   image->info.width * image->info.height <= 512 * 512) {
/* Do not enable CMASK or DCC for small surfaces where the cost
 * of the eliminate pass can be higher than the benefit of fast
 * clear. RadeonSI does this, but the image threshold is
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/22] nvir: move common converter code in base class

2017-12-21 Thread Karol Herbst
On Thu, Dec 21, 2017 at 5:32 PM, Ilia Mirkin  wrote:
> Tgsi values should stay inside of from_tgsi. If there are leaks into the
> rest of codegen, let's plug them. I assume this is for assignslots?
>

the translateSysVal thing not directly, but I can translate the glsl
values to the tgsi ones with the code I wrote for assignSlots and
because the conversion from TGSI to NVIR already existed, I just used
that for now.

> On Dec 21, 2017 11:26 AM, "Karol Herbst"  wrote:
>>
>> On Thu, Dec 21, 2017 at 5:21 PM, Ilia Mirkin  wrote:
>> > On Thu, Dec 21, 2017 at 10:51 AM, Karol Herbst 
>> > wrote:
>> >> this is more or less a todo list of things I should move elsewhere. Not
>> >> all of
>> >> it should be actually moved, but...
>> >>
>> >> Signed-off-by: Karol Herbst 
>> >> ---
>> >>  src/gallium/drivers/nouveau/Makefile.sources   |   2 +
>> >>  .../nouveau/codegen/nv50_ir_from_common.cpp| 145
>> >> 
>> >>  .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  59 +
>> >>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 146
>> >> +
>> >>  src/gallium/drivers/nouveau/meson.build|   2 +
>> >>  5 files changed, 212 insertions(+), 142 deletions(-)
>> >>  create mode 100644
>> >> src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>> >>  create mode 100644
>> >> src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
>> >>
>> >> diff --git a/src/gallium/drivers/nouveau/Makefile.sources
>> >> b/src/gallium/drivers/nouveau/Makefile.sources
>> >> index 65f08c7d8d..fee5e59522 100644
>> >> --- a/src/gallium/drivers/nouveau/Makefile.sources
>> >> +++ b/src/gallium/drivers/nouveau/Makefile.sources
>> >> @@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
>> >> codegen/nv50_ir_build_util.h \
>> >> codegen/nv50_ir_driver.h \
>> >> codegen/nv50_ir_emit_nv50.cpp \
>> >> +   codegen/nv50_ir_from_common.cpp \
>> >> +   codegen/nv50_ir_from_common.h \
>> >> codegen/nv50_ir_from_tgsi.cpp \
>> >> codegen/nv50_ir_graph.cpp \
>> >> codegen/nv50_ir_graph.h \
>> >> diff --git
>> >> a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>> >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>> >> new file mode 100644
>> >> index 00..aa5f52fe81
>> >> --- /dev/null
>> >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>> >> @@ -0,0 +1,145 @@
>> >> +/*
>> >> + * Copyright 2011 Christoph Bumiller
>> >> + *
>> >> + * Permission is hereby granted, free of charge, to any person
>> >> obtaining a
>> >> + * copy of this software and associated documentation files (the
>> >> "Software"),
>> >> + * to deal in the Software without restriction, including without
>> >> limitation
>> >> + * the rights to use, copy, modify, merge, publish, distribute,
>> >> sublicense,
>> >> + * and/or sell copies of the Software, and to permit persons to whom
>> >> the
>> >> + * Software is furnished to do so, subject to the following
>> >> conditions:
>> >> + *
>> >> + * The above copyright notice and this permission notice shall be
>> >> included in
>> >> + * all copies or substantial portions of the Software.
>> >> + *
>> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> >> EXPRESS OR
>> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> >> MERCHANTABILITY,
>> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> >> SHALL
>> >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
>> >> OR
>> >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> >> OTHERWISE,
>> >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
>> >> OR
>> >> + * OTHER DEALINGS IN THE SOFTWARE.
>> >> + */
>> >> +
>> >> +#include "codegen/nv50_ir_from_common.h"
>> >> +
>> >> +namespace nv50_ir {
>> >> +
>> >> +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info
>> >> *info)
>> >> +   :  BuildUtil(prog),
>> >> +  info(info) {}
>> >> +
>> >> +ConverterCommon::Subroutine *
>> >> +ConverterCommon::getSubroutine(unsigned ip)
>> >> +{
>> >> +   std::map::iterator it = sub.map.find(ip);
>> >> +
>> >> +   if (it == sub.map.end())
>> >> +  it = sub.map.insert(std::make_pair(
>> >> +  ip, Subroutine(new Function(prog, "SUB", ip.first;
>> >> +
>> >> +   return &it->second;
>> >> +}
>> >> +
>> >> +ConverterCommon::Subroutine *
>> >> +ConverterCommon::getSubroutine(Function *f)
>> >> +{
>> >> +   unsigned ip = f->getLabel();
>> >> +   std::map::iterator it = sub.map.find(ip);
>> >> +
>> >> +   if (it == sub.map.end())
>> >> +  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
>> >> +
>> >> +   return &it->second;
>> >> +}
>> >> +
>> >> +uint8_t
>> >> +ConverterCommon::translateInterpMode(const nv50_ir_varying *var,
>> >> operation& op)
>> >> +{
>> >> +   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
>> >> +
>> >> +   if (var->flat)
>> >> +  mode = NV50_IR_INTERP_FLAT;

Re: [Mesa-dev] [PATCH 02/22] nvir: move common converter code in base class

2017-12-21 Thread Ilia Mirkin
Tgsi values should stay inside of from_tgsi. If there are leaks into the
rest of codegen, let's plug them. I assume this is for assignslots?

On Dec 21, 2017 11:26 AM, "Karol Herbst"  wrote:

> On Thu, Dec 21, 2017 at 5:21 PM, Ilia Mirkin  wrote:
> > On Thu, Dec 21, 2017 at 10:51 AM, Karol Herbst 
> wrote:
> >> this is more or less a todo list of things I should move elsewhere. Not
> all of
> >> it should be actually moved, but...
> >>
> >> Signed-off-by: Karol Herbst 
> >> ---
> >>  src/gallium/drivers/nouveau/Makefile.sources   |   2 +
> >>  .../nouveau/codegen/nv50_ir_from_common.cpp| 145
> 
> >>  .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  59 +
> >>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 146
> +
> >>  src/gallium/drivers/nouveau/meson.build|   2 +
> >>  5 files changed, 212 insertions(+), 142 deletions(-)
> >>  create mode 100644 src/gallium/drivers/nouveau/
> codegen/nv50_ir_from_common.cpp
> >>  create mode 100644 src/gallium/drivers/nouveau/
> codegen/nv50_ir_from_common.h
> >>
> >> diff --git a/src/gallium/drivers/nouveau/Makefile.sources
> b/src/gallium/drivers/nouveau/Makefile.sources
> >> index 65f08c7d8d..fee5e59522 100644
> >> --- a/src/gallium/drivers/nouveau/Makefile.sources
> >> +++ b/src/gallium/drivers/nouveau/Makefile.sources
> >> @@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
> >> codegen/nv50_ir_build_util.h \
> >> codegen/nv50_ir_driver.h \
> >> codegen/nv50_ir_emit_nv50.cpp \
> >> +   codegen/nv50_ir_from_common.cpp \
> >> +   codegen/nv50_ir_from_common.h \
> >> codegen/nv50_ir_from_tgsi.cpp \
> >> codegen/nv50_ir_graph.cpp \
> >> codegen/nv50_ir_graph.h \
> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
> >> new file mode 100644
> >> index 00..aa5f52fe81
> >> --- /dev/null
> >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
> >> @@ -0,0 +1,145 @@
> >> +/*
> >> + * Copyright 2011 Christoph Bumiller
> >> + *
> >> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> >> + * copy of this software and associated documentation files (the
> "Software"),
> >> + * to deal in the Software without restriction, including without
> limitation
> >> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> >> + * and/or sell copies of the Software, and to permit persons to whom
> the
> >> + * Software is furnished to do so, subject to the following conditions:
> >> + *
> >> + * The above copyright notice and this permission notice shall be
> included in
> >> + * all copies or substantial portions of the Software.
> >> + *
> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> OTHERWISE,
> >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> OR
> >> + * OTHER DEALINGS IN THE SOFTWARE.
> >> + */
> >> +
> >> +#include "codegen/nv50_ir_from_common.h"
> >> +
> >> +namespace nv50_ir {
> >> +
> >> +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info
> *info)
> >> +   :  BuildUtil(prog),
> >> +  info(info) {}
> >> +
> >> +ConverterCommon::Subroutine *
> >> +ConverterCommon::getSubroutine(unsigned ip)
> >> +{
> >> +   std::map::iterator it = sub.map.find(ip);
> >> +
> >> +   if (it == sub.map.end())
> >> +  it = sub.map.insert(std::make_pair(
> >> +  ip, Subroutine(new Function(prog, "SUB", ip.first;
> >> +
> >> +   return &it->second;
> >> +}
> >> +
> >> +ConverterCommon::Subroutine *
> >> +ConverterCommon::getSubroutine(Function *f)
> >> +{
> >> +   unsigned ip = f->getLabel();
> >> +   std::map::iterator it = sub.map.find(ip);
> >> +
> >> +   if (it == sub.map.end())
> >> +  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
> >> +
> >> +   return &it->second;
> >> +}
> >> +
> >> +uint8_t
> >> +ConverterCommon::translateInterpMode(const nv50_ir_varying *var,
> operation& op)
> >> +{
> >> +   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
> >> +
> >> +   if (var->flat)
> >> +  mode = NV50_IR_INTERP_FLAT;
> >> +   else
> >> +   if (var->linear)
> >> +  mode = NV50_IR_INTERP_LINEAR;
> >> +   else
> >> +   if (var->sc)
> >> +  mode = NV50_IR_INTERP_SC;
> >> +
> >> +   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode ==
> NV50_IR_INTERP_SC)
> >> +  ? OP_PINTERP : OP_LINTERP;
> >> +
> >> +   if (var->centroid)
> >> +  mode |= NV50_IR_INTERP_CENTROID;
> >> +
> >> +   return mode;
> >> +}
> >> +
> >> +void
> >> +ConverterCommon::handleUserClipPlanes()
> >> +{
> >> 

Re: [Mesa-dev] [PATCH 02/22] nvir: move common converter code in base class

2017-12-21 Thread Karol Herbst
On Thu, Dec 21, 2017 at 5:21 PM, Ilia Mirkin  wrote:
> On Thu, Dec 21, 2017 at 10:51 AM, Karol Herbst  wrote:
>> this is more or less a todo list of things I should move elsewhere. Not all 
>> of
>> it should be actually moved, but...
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/gallium/drivers/nouveau/Makefile.sources   |   2 +
>>  .../nouveau/codegen/nv50_ir_from_common.cpp| 145 
>> 
>>  .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  59 +
>>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 146 
>> +
>>  src/gallium/drivers/nouveau/meson.build|   2 +
>>  5 files changed, 212 insertions(+), 142 deletions(-)
>>  create mode 100644 
>> src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>>  create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
>>
>> diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
>> b/src/gallium/drivers/nouveau/Makefile.sources
>> index 65f08c7d8d..fee5e59522 100644
>> --- a/src/gallium/drivers/nouveau/Makefile.sources
>> +++ b/src/gallium/drivers/nouveau/Makefile.sources
>> @@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
>> codegen/nv50_ir_build_util.h \
>> codegen/nv50_ir_driver.h \
>> codegen/nv50_ir_emit_nv50.cpp \
>> +   codegen/nv50_ir_from_common.cpp \
>> +   codegen/nv50_ir_from_common.h \
>> codegen/nv50_ir_from_tgsi.cpp \
>> codegen/nv50_ir_graph.cpp \
>> codegen/nv50_ir_graph.h \
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>> new file mode 100644
>> index 00..aa5f52fe81
>> --- /dev/null
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>> @@ -0,0 +1,145 @@
>> +/*
>> + * Copyright 2011 Christoph Bumiller
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included 
>> in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + */
>> +
>> +#include "codegen/nv50_ir_from_common.h"
>> +
>> +namespace nv50_ir {
>> +
>> +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info)
>> +   :  BuildUtil(prog),
>> +  info(info) {}
>> +
>> +ConverterCommon::Subroutine *
>> +ConverterCommon::getSubroutine(unsigned ip)
>> +{
>> +   std::map::iterator it = sub.map.find(ip);
>> +
>> +   if (it == sub.map.end())
>> +  it = sub.map.insert(std::make_pair(
>> +  ip, Subroutine(new Function(prog, "SUB", ip.first;
>> +
>> +   return &it->second;
>> +}
>> +
>> +ConverterCommon::Subroutine *
>> +ConverterCommon::getSubroutine(Function *f)
>> +{
>> +   unsigned ip = f->getLabel();
>> +   std::map::iterator it = sub.map.find(ip);
>> +
>> +   if (it == sub.map.end())
>> +  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
>> +
>> +   return &it->second;
>> +}
>> +
>> +uint8_t
>> +ConverterCommon::translateInterpMode(const nv50_ir_varying *var, operation& 
>> op)
>> +{
>> +   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
>> +
>> +   if (var->flat)
>> +  mode = NV50_IR_INTERP_FLAT;
>> +   else
>> +   if (var->linear)
>> +  mode = NV50_IR_INTERP_LINEAR;
>> +   else
>> +   if (var->sc)
>> +  mode = NV50_IR_INTERP_SC;
>> +
>> +   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
>> +  ? OP_PINTERP : OP_LINTERP;
>> +
>> +   if (var->centroid)
>> +  mode |= NV50_IR_INTERP_CENTROID;
>> +
>> +   return mode;
>> +}
>> +
>> +void
>> +ConverterCommon::handleUserClipPlanes()
>> +{
>> +   Value *res[8];
>> +   int n, i, c;
>> +
>> +   for (c = 0; c < 4; ++c) {
>> +  for (i = 0; i < info->io.genUserClip; ++i) {
>> + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
>> +TYPE_F32, info->io.ucpBase + i * 16 + c * 
>> 4);
>> + Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
>> + if (c == 0)
>> +res[i] = mkOp2v(OP_MUL, TYPE_F32, getScr

Re: [Mesa-dev] [PATCH 02/22] nvir: move common converter code in base class

2017-12-21 Thread Ilia Mirkin
On Thu, Dec 21, 2017 at 10:51 AM, Karol Herbst  wrote:
> this is more or less a todo list of things I should move elsewhere. Not all of
> it should be actually moved, but...
>
> Signed-off-by: Karol Herbst 
> ---
>  src/gallium/drivers/nouveau/Makefile.sources   |   2 +
>  .../nouveau/codegen/nv50_ir_from_common.cpp| 145 
>  .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  59 +
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 146 
> +
>  src/gallium/drivers/nouveau/meson.build|   2 +
>  5 files changed, 212 insertions(+), 142 deletions(-)
>  create mode 100644 
> src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>  create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
>
> diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
> b/src/gallium/drivers/nouveau/Makefile.sources
> index 65f08c7d8d..fee5e59522 100644
> --- a/src/gallium/drivers/nouveau/Makefile.sources
> +++ b/src/gallium/drivers/nouveau/Makefile.sources
> @@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
> codegen/nv50_ir_build_util.h \
> codegen/nv50_ir_driver.h \
> codegen/nv50_ir_emit_nv50.cpp \
> +   codegen/nv50_ir_from_common.cpp \
> +   codegen/nv50_ir_from_common.h \
> codegen/nv50_ir_from_tgsi.cpp \
> codegen/nv50_ir_graph.cpp \
> codegen/nv50_ir_graph.h \
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
> new file mode 100644
> index 00..aa5f52fe81
> --- /dev/null
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
> @@ -0,0 +1,145 @@
> +/*
> + * Copyright 2011 Christoph Bumiller
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include "codegen/nv50_ir_from_common.h"
> +
> +namespace nv50_ir {
> +
> +ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info)
> +   :  BuildUtil(prog),
> +  info(info) {}
> +
> +ConverterCommon::Subroutine *
> +ConverterCommon::getSubroutine(unsigned ip)
> +{
> +   std::map::iterator it = sub.map.find(ip);
> +
> +   if (it == sub.map.end())
> +  it = sub.map.insert(std::make_pair(
> +  ip, Subroutine(new Function(prog, "SUB", ip.first;
> +
> +   return &it->second;
> +}
> +
> +ConverterCommon::Subroutine *
> +ConverterCommon::getSubroutine(Function *f)
> +{
> +   unsigned ip = f->getLabel();
> +   std::map::iterator it = sub.map.find(ip);
> +
> +   if (it == sub.map.end())
> +  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
> +
> +   return &it->second;
> +}
> +
> +uint8_t
> +ConverterCommon::translateInterpMode(const nv50_ir_varying *var, operation& 
> op)
> +{
> +   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
> +
> +   if (var->flat)
> +  mode = NV50_IR_INTERP_FLAT;
> +   else
> +   if (var->linear)
> +  mode = NV50_IR_INTERP_LINEAR;
> +   else
> +   if (var->sc)
> +  mode = NV50_IR_INTERP_SC;
> +
> +   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
> +  ? OP_PINTERP : OP_LINTERP;
> +
> +   if (var->centroid)
> +  mode |= NV50_IR_INTERP_CENTROID;
> +
> +   return mode;
> +}
> +
> +void
> +ConverterCommon::handleUserClipPlanes()
> +{
> +   Value *res[8];
> +   int n, i, c;
> +
> +   for (c = 0; c < 4; ++c) {
> +  for (i = 0; i < info->io.genUserClip; ++i) {
> + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
> +TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
> + Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
> + if (c == 0)
> +res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
> + else
> +mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
> +  }
> +   }
> +
> +   const int first = info->numOutputs - (info->io.genUserClip + 3

Re: [Mesa-dev] [PATCH 00/22] Nir support for Nouveau

2017-12-21 Thread Ilia Mirkin
On Thu, Dec 21, 2017 at 10:51 AM, Karol Herbst  wrote:
> Plans are to get SPIR-V support for Nouveau with a cheap way. Before that I

I'm not on board with this plan, as I'm sure you know. Cheap way is to
just support SPIR-V directly.

That said, as long as impact to surrounding code is minimal, happy to
support nir as one of the inputs.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/22] Nir support for Nouveau

2017-12-21 Thread Karol Herbst
uhm, actually that should have been marked as a RFC, sorry for that.

On Thu, Dec 21, 2017 at 4:51 PM, Karol Herbst  wrote:
> Plans are to get SPIR-V support for Nouveau with a cheap way. Before that I
> was looking into Pierres work on his direct SPIR-V to nvir pass and seemed
> much more complicated than doing NIR to nvir.
>
> In the end we still plan to get Compute support through SPIR-V and my hope is
> to get that piped through NIR, so that a lot of other drivers could benefit
> from this as well.
>
> Stuff up to GLSL 1.30 should work (just 2 piglit fails up to that), some games
> run as well (truth is, I only checked one, but I expect that more will run).
> Usually I took the add-stuff-until-piglit-test-passes approach so a lot of
> details are still missing.
>
> But nevertheless I would like to get some feedback on the work and suggestions
> for improving this work.
>
> To use NIR just set NV50_PROG_USE_NIR=1 for a mesa DEBUG build. I plan to move
> it to a non debug variable for the next series.
>
> Features I want to work next on:
> * Geometry shaders
> * UBOs
>
> ./piglit run -x glx -x egl -x streaming-texture-leak -x max-texture-size 
> tests/gpu.py:
> [26073/26073] skip: 1574, pass: 13451, warn: 9, fail: 5287, crash: 5752
>
> Note: a lot of crashes are geometry/tesselation stuff hitting asserts.
>
> Karol Herbst (22):
>   nvir: print the shader type when dumping headers
>   nvir: move common converter code in base class
>   nvc0: add support for NIR
>   nvc0/debug: add env var to make nir default
>   nvir/nir: run some passes to make the conversion easier
>   nvir/nir: track defs and provide easy access functions
>   nvir/nir: add nir type helper functions
>   nvir/nir: implement CFG handling
>   nvir/nir: implement nir_load_const_instr
>   nvir/nir: add skeleton for nir_intrinsic_instr
>   nvir/nir: implement nir_alu_instr handling
>   nvir/nir: implement nir_intrinsic_load_uniform
>   nvir/nir: implement nir_intrinsic_store_output
>   nvir/nir: implement nir_intrinsic_load_input
>   nvir/nir: run assignSlots
>   nvir/nir: parse NIR shader info
>   nvir/nir: implement intrinsic_discard(_if)
>   nvir/nir: implement loading system values
>   nvir/nir: implement nir_ssa_undef_instr
>   nvir/nir: implement nir_instr_type_tex
>   nvir/nir: implement vote
>   nvir/nir: implement variable indexing
>
>  src/gallium/drivers/nouveau/Makefile.sources   |3 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|3 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h  |1 +
>  .../nouveau/codegen/nv50_ir_from_common.cpp|  145 ++
>  .../drivers/nouveau/codegen/nv50_ir_from_common.h  |   59 +
>  .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 2009 
> 
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  146 +-
>  src/gallium/drivers/nouveau/meson.build|   12 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c|   17 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   53 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |   24 +-
>  11 files changed, 2321 insertions(+), 151 deletions(-)
>  create mode 100644 
> src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
>  create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
>  create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
>
> --
> 2.14.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/22] nvir/nir: implement vote

2017-12-21 Thread Karol Herbst
TODO: there is more
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 384127aff5..dda7507d76 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1338,6 +1338,22 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp1(OP_RDSV, TYPE_U32, newDefs[0], sym);
   break;
}
+   case nir_intrinsic_vote_all:
+   case nir_intrinsic_vote_any: {
+  unsigned subop = 0;
+
+  if (insn->intrinsic == nir_intrinsic_vote_all)
+ subop = NV50_IR_SUBOP_VOTE_ALL;
+  else if (insn->intrinsic == nir_intrinsic_vote_any)
+ subop = NV50_IR_SUBOP_VOTE_ANY;
+
+  LValues &newDefs = convert(&insn->dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), 
zero);
+  mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = subop;
+  mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/22] nvir/nir: implement variable indexing

2017-12-21 Thread Karol Herbst
we store those arrays in local memory and reserve some space for each of the
arrays. Tthe arrays are stored in a packed format, because we know quite easily
the context of each index. We don't do that in TGSI so far.

This causes various issues to come up in the MemoryOpt pass, because ld/st with
indirects aren't guarenteed to be aligned to 0x10 anymore.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 58 ++
 1 file changed, 58 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index dda7507d76..3052c57d3c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -48,6 +48,7 @@ public:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
+   typedef std::unordered_map NirArrayLMemOffsets;
typedef std::unordered_map 
NirBlockMap;
 
Converter(Program *, nir_shader *, nv50_ir_prog_info *);
@@ -110,6 +111,7 @@ private:
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
 
@@ -935,6 +937,7 @@ bool Converter::assignSlots() {
 bool
 Converter::parseNIR()
 {
+   info->bin.tlsSpace = 0;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
 
@@ -990,6 +993,15 @@ Converter::visit(nir_function *function)
   break;
}
 
+   nir_foreach_register(reg, &function->impl->registers) {
+  if (reg->num_array_elems) {
+ uint32_t size = reg->num_components * reg->num_array_elems * 
(reg->bit_size / 8);
+ // reserve some lmem
+ regToLmemOffset[reg->index] = info->bin.tlsSpace;
+ info->bin.tlsSpace += size;
+  }
+   }
+
nir_index_ssa_defs(function->impl);
foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
   if (!visit(node))
@@ -1530,6 +1542,51 @@ Converter::visit(nir_alu_instr *insn)
 *   2. they basically just merge multiple values into one data type
 */
CASE_OP(mov):
+  if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) 
{
+ nir_reg_dest& reg = insn->dest.dest.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ auto comps = reg.reg->num_components;
+ auto size = reg.reg->bit_size / 8;
+ auto csize = comps * size;
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), 
getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < comps; ++i) {
+if (!((1u << i) & insn->dest.write_mask))
+   continue;
+
+Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + 
aoffset + i * size);
+mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
+ }
+ break;
+  } else if (!insn->src[0].src.is_ssa && 
insn->src[0].src.reg.reg->num_array_elems) {
+ LValues &newDefs = convert(&insn->dest);
+ nir_reg_src& reg = insn->src[0].src.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ auto comps = reg.reg->num_components;
+ auto size = reg.reg->bit_size / 8;
+ auto csize = comps * size;
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), 
getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < newDefs.size(); ++i) {
+Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + 
aoffset + i * size);
+mkLoad(dType, newDefs[i], sym, indirect);
+ }
+ break;
+  } else {
+ LValues &newDefs = convert(&insn->dest);
+ for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
+mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
+ }
+  }
+  break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {
@@ -1907,6 +1964,7 @@ Converter::run()
   NIR_PASS(progress, nir, nir_opt_dead_cf);
} while (progress);
 
+   NIR_PASS_V(nir, nir_lower_locals_to_regs);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local);
NIR_PASS_V(nir, nir_convert_from_ssa, true);
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/22] nvir/nir: implement nir_instr_type_tex

2017-12-21 Thread Karol Herbst
TODO: a lot of those fields are not valid for a lot of tex ops. Not quite sure
if it's worth the effort to check for those or just keep it like that. It seems
to kind of work.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 192 +
 1 file changed, 192 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index bf213bca51..384127aff5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -52,6 +52,7 @@ public:
 
Converter(Program *, nir_shader *, nv50_ir_prog_info *);
 
+   TexTarget convert(glsl_sampler_dim&, bool isArray, bool isShadow);
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
@@ -82,6 +83,7 @@ public:
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
bool visit(nir_ssa_undef_instr *);
+   bool visit(nir_tex_instr *);
 
bool run();
 
@@ -96,9 +98,13 @@ public:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
operation getOperation(nir_op);
+   operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
int getSubOp(nir_op);
CondCode getCondCode(nir_op);
+
+   // tex stuff
+   Value* applyProjection(Value *src, Value *proj);
 private:
nir_shader *nir;
 
@@ -331,6 +337,36 @@ Converter::getOperation(nir_op op)
 #undef CASE_OP3
 #undef CASE_OPIU
 
+operation
+Converter::getOperation(nir_texop op)
+{
+   switch (op) {
+   case nir_texop_tex:
+  return OP_TEX;
+   case nir_texop_lod:
+  return OP_TXLQ;
+   case nir_texop_txb:
+  return OP_TXB;
+   case nir_texop_txd:
+  return OP_TXD;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+  return OP_TXF;
+   case nir_texop_tg4:
+  return OP_TXG;
+   case nir_texop_txl:
+  return OP_TXL;
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_txs:
+  return OP_TXQ;
+   default:
+  ERROR("couldn't get operation for nir_texop %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -1102,6 +1138,10 @@ Converter::visit(nir_instr *insn)
   if (!visit(nir_instr_as_alu(insn)))
  return false;
   break;
+   case nir_instr_type_tex:
+  if (!visit(nir_instr_as_tex(insn)))
+ return false;
+  break;
case nir_instr_type_intrinsic:
   if (!visit(nir_instr_as_intrinsic(insn)))
  return false;
@@ -1674,6 +1714,158 @@ Converter::visit(nir_ssa_undef_instr *insn)
return true;
 }
 
+#define CASE_SAMPLER(ty) \
+   case GLSL_SAMPLER_DIM_ ## ty : \
+  if (isArray && !isShadow) \
+ return TEX_TARGET_ ## ty ## _ARRAY; \
+  else if (!isArray && isShadow) \
+ return TEX_TARGET_## ty ## _SHADOW; \
+  else if (isArray && isShadow) \
+ return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
+  else \
+ return TEX_TARGET_ ## ty
+
+TexTarget
+Converter::convert(glsl_sampler_dim &dim, bool isArray, bool isShadow)
+{
+   switch (dim) {
+   CASE_SAMPLER(1D);
+   CASE_SAMPLER(2D);
+   CASE_SAMPLER(CUBE);
+   case GLSL_SAMPLER_DIM_3D:
+  return TEX_TARGET_3D;
+   case GLSL_SAMPLER_DIM_MS:
+  if (isArray)
+ return TEX_TARGET_2D_MS_ARRAY;
+  return TEX_TARGET_2D_MS;
+   case GLSL_SAMPLER_DIM_RECT:
+  if (isShadow)
+ return TEX_TARGET_RECT_SHADOW;
+  return TEX_TARGET_RECT;
+   case GLSL_SAMPLER_DIM_BUF:
+  return TEX_TARGET_BUFFER;
+   default:
+  ERROR("unknown glsl_sampler_dim %u\n", dim);
+  assert(false);
+  return TEX_TARGET_COUNT;
+   }
+}
+#undef CASE_SAMPLER
+
+
+Value*
+Converter::applyProjection(Value *src, Value *proj)
+{
+   if (!proj)
+  return src;
+   return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
+}
+
+bool
+Converter::visit(nir_tex_instr *insn)
+{
+   switch (insn->op) {
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_tex:
+   case nir_texop_texture_samples:
+   case nir_texop_tg4:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txl:
+   case nir_texop_txs: {
+  LValues &newDefs = convert(&insn->dest);
+  std::vector srcs;
+  std::vector defs;
+  uint8_t mask = 0;
+  Value *proj = nullptr;
+  TexInstruction::Target target = convert(insn->sampler_dim, 
insn->is_array, insn->is_shadow);
+
+  int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);
+  int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);
+  int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);
+  int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);
+  int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);
+  int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);
+ 

[Mesa-dev] [PATCH 17/22] nvir/nir: implement intrinsic_discard(_if)

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index cf324a3e8f..412675fcb1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1252,6 +1252,21 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_discard:
+  mkOp(OP_DISCARD, TYPE_NONE, NULL);
+  break;
+   case nir_intrinsic_discard_if: {
+  // we get a nir boolean value
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
+ assert(false);
+ return false;
+  }
+  mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), 
zero);
+  mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/22] nvir/nir: implement nir_ssa_undef_instr

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 98cd73451e..bf213bca51 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -81,6 +81,7 @@ public:
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
+   bool visit(nir_ssa_undef_instr *);
 
bool run();
 
@@ -1113,6 +1114,10 @@ Converter::visit(nir_instr *insn)
   if (!visit(nir_instr_as_jump(insn)))
  return false;
   break;
+   case nir_instr_type_ssa_undef:
+  if (!visit(nir_instr_as_ssa_undef(insn)))
+ return false;
+  break;
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -1659,6 +1664,16 @@ Converter::visit(nir_alu_instr *insn)
 #undef CASE_OPIU
 #undef DEFAULT_CHECKS
 
+bool
+Converter::visit(nir_ssa_undef_instr *insn)
+{
+   LValues &newDefs = convert(&insn->def);
+   for (auto i = 0u; i < insn->def.num_components; ++i) {
+  mkOp1(OP_NOP, TYPE_NONE, newDefs[i], nullptr);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/22] nvir/nir: implement nir_intrinsic_load_input

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 896a02af96..009f3df875 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -779,6 +779,36 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_input: {
+  nir_const_value *const_offset = nir_src_as_const_value(insn->src[0]);
+  const DataType dType = getDType(insn);
+
+  if (!const_offset) {
+ ERROR("nir_intrinsic_load_input without const_value not 
supported!\n");
+ return false;
+  }
+
+  LValues &newDefs = convert(&insn->dest);
+  auto idx = nir_intrinsic_base(insn) + const_offset->u32[0];
+  uint8_t offset = insn->const_index[1];
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ assert(i + offset < 4);
+ uint32_t address = info->in[idx].slot[i + offset];
+ Symbol *sym = mkSymbol(FILE_SHADER_INPUT, 0, TYPE_U32, address * 4);
+ switch(prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+operation op;
+auto mode = translateInterpMode(&info->in[idx], op);
+mkOp2(op, TYPE_F32, newDefs[i], sym, op == OP_PINTERP ? 
fp.position : nullptr)->setInterpolate(mode);
+break;
+ }
+ default:
+mkLoad(dType, newDefs[i], sym, nullptr);
+break;
+ }
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/22] nvir/nir: implement loading system values

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 412675fcb1..98cd73451e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -55,6 +55,7 @@ public:
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
+   SVSemantic convert(nir_intrinsic_op);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
@@ -1161,6 +1162,14 @@ Converter::visit(nir_jump_instr *insn)
return true;
 }
 
+SVSemantic
+Converter::convert(nir_intrinsic_op type)
+{
+   unsigned name, index;
+   system_val_to_tgsi_semantic(nir_system_value_from_intrinsic(type), &name, 
&index);
+   return translateSysVal(name);
+}
+
 bool
 Converter::visit(nir_intrinsic_instr *insn)
 {
@@ -1267,6 +1276,23 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
   break;
}
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
+   case nir_intrinsic_load_front_face:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_sample_id:
+   case nir_intrinsic_load_vertex_id: {
+  if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_front_face only with 1 component supported!\n");
+ assert(false);
+ return false;
+  }
+  Symbol *sym = mkSysVal(convert(insn->intrinsic), 0);
+  LValues &newDefs = convert(&insn->dest);
+  mkOp1(OP_RDSV, TYPE_U32, newDefs[0], sym);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/22] nvir/nir: run assignSlots

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 427 +
 1 file changed, 427 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 009f3df875..d114fe28dd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -64,6 +64,11 @@ public:
Value* getSrc(nir_src *, uint8_t);
Value* getSrc(nir_ssa_def *, uint8_t);
 
+   void setInterpolate(nv50_ir_varying *,
+   decltype(nir_variable().data.interpolation),
+   unsigned semantics);
+   bool assignSlots();
+
bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
@@ -471,6 +476,423 @@ Converter::getSrc(nir_ssa_def *src, uint8_t idx)
return (*it).second[idx];
 }
 
+static void
+vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VERT_ATTRIB_GENERIC0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VERT_ATTRIB_GENERIC0;
+  return;
+   }
+
+   if (slot == VERT_ATTRIB_POINT_SIZE) {
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  return;
+   }
+
+   if (slot >= VERT_ATTRIB_TEX0) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VERT_ATTRIB_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VERT_ATTRIB_COLOR0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_COLOR1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VERT_ATTRIB_EDGEFLAG:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_FOG:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_NORMAL:
+  *name = TGSI_SEMANTIC_NORMAL;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+   }
+}
+
+static void
+varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VARYING_SLOT_VAR0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VARYING_SLOT_VAR0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VARYING_SLOT_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VARYING_SLOT_BFC0:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_BFC1:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_DIST0:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 0;
+  break;
+   case VARYING_SLOT_CLIP_DIST1:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_VERTEX:
+  *name = TGSI_SEMANTIC_CLIPVERTEX;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_EDGE:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FACE:
+  *name = TGSI_SEMANTIC_FACE;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FOGC:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_LAYER:
+  *name = TGSI_SEMANTIC_LAYER;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PNTC:
+  *name = TGSI_SEMANTIC_PCOORD;
+  *index = 0;
+  break;
+   case VARYING_SLOT_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PRIMITIVE_ID:
+  *name = TGSI_SEMANTIC_PRIMID;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PSIZ:
+  *name = TGSI_SEMANTIC_PSIZE;
+  *index = 0;
+  break;
+   case VARYING_SLOT_VIEWPORT:
+  *name = TGSI_SEMANTIC_VIEWPORT_INDEX;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown varying slot %u\n", slot);
+  assert(false);
+   }
+}
+
+static void
+frag_result_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= FRAG_RESULT_DATA0) {
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = slot - FRAG_RESULT_COLOR - 2; // intentional
+  return;
+   }
+
+   switch (slot) {
+   case FRAG_RESULT_COLOR:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case FRAG_RESULT_DEPTH:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown frag result slot %u\n", slot);
+  assert(false);
+   }
+}
+
+static void
+system_val_to_tgsi_semantic(unsigned val, unsigned *name, unsigned *index)
+{
+   *index = 0;
+   switch (val) {
+   /* Vertex 

[Mesa-dev] [PATCH 10/22] nvir/nir: add skeleton for nir_intrinsic_instr

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index f13699a393..fe11280537 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -69,6 +69,7 @@ public:
bool visit(nir_function *);
bool visit(nir_if *);
bool visit(nir_instr *);
+   bool visit(nir_intrinsic_instr *);
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
@@ -485,6 +486,10 @@ bool
 Converter::visit(nir_instr *insn)
 {
switch (insn->type) {
+   case nir_instr_type_intrinsic:
+  if (!visit(nir_instr_as_intrinsic(insn)))
+ return false;
+  break;
case nir_instr_type_load_const:
   if (!visit(nir_instr_as_load_const(insn)))
  return false;
@@ -542,6 +547,18 @@ Converter::visit(nir_jump_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_intrinsic_instr *insn)
+{
+   switch (insn->intrinsic) {
+   default:
+  ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
+  return false;
+   }
+
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/22] nvir/nir: parse NIR shader info

2017-12-21 Thread Karol Herbst
TODO: this is far from being complete, but at least this let the basics things
work already.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d114fe28dd..cf324a3e8f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -68,6 +68,7 @@ public:
decltype(nir_variable().data.interpolation),
unsigned semantics);
bool assignSlots();
+   bool parseNIR();
 
bool visit(nir_alu_instr *);
bool visit(nir_block *);
@@ -893,6 +894,26 @@ bool Converter::assignSlots() {
return info->assignSlots(info) == 0;
 }
 
+bool
+Converter::parseNIR()
+{
+   info->io.clipDistances = nir->info.clip_distance_array_size;
+   info->io.cullDistances = nir->info.cull_distance_array_size;
+
+   switch(prog->getType()) {
+   case Program::TYPE_FRAGMENT:
+  info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+  break;
+   case Program::TYPE_GEOMETRY:
+  info->prop.gp.instanceCount = nir->info.gs.invocations;
+  break;
+   default:
+  break;
+   }
+
+   return true;
+}
+
 bool
 Converter::visit(nir_function *function)
 {
@@ -1631,6 +1652,11 @@ Converter::run()
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
   nir_print_shader(nir, stderr);
 
+   if (!parseNIR()) {
+  ERROR("Couldn't prase NIR!\n");
+  return false;
+   }
+
if (!assignSlots()) {
   ERROR("Couldn't assign slots!\n");
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/22] nvir/nir: implement nir_alu_instr handling

2017-12-21 Thread Karol Herbst
TODO: move lowering code somewhere else. We do the same thing as from_tgsi for
a few ops and we could move that down a bit so the input IR doesn't have to
deal with a few things, like slct and min/max with 64bit dest types.

TODO: move DEFAULT_HANDLER into its own function

TODO: check if some code duplication can be eliminated through templates
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 524 -
 1 file changed, 523 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index fe11280537..d2b2236c17 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -64,6 +64,7 @@ public:
Value* getSrc(nir_src *, uint8_t);
Value* getSrc(nir_ssa_def *, uint8_t);
 
+   bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
bool visit(nir_function *);
@@ -86,6 +87,10 @@ public:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_op);
+   operation preOperationNeeded(nir_op);
+   int getSubOp(nir_op);
+   CondCode getCondCode(nir_op);
 private:
nir_shader *nir;
 
@@ -95,6 +100,7 @@ private:
unsigned int curLoopDepth;
 
BasicBlock *exit;
+   Value *zero;
 
union {
   struct {
@@ -106,7 +112,10 @@ private:
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir),
- curLoopDepth(0) {}
+ curLoopDepth(0)
+{
+   zero = mkImm((uint32_t)0);
+}
 
 BasicBlock *
 Converter::convert(nir_block *block)
@@ -224,6 +233,157 @@ Converter::getSType(nir_src &src, bool isFloat, bool 
isSigned)
return typeOfSize(bitSize / 8, isFloat, isSigned);
 }
 
+#define CASE_OP(ni, no) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+  return OP_ ## no
+#define CASE_OP3(ni, no) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return OP_ ## no
+#define CASE_OPIU(ni, no) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return OP_ ## no
+
+operation
+Converter::getOperation(nir_op op)
+{
+   switch (op) {
+   // basic ops with float and int variants
+   CASE_OP(abs, ABS);
+   CASE_OP(add, ADD);
+   CASE_OP(and, AND);
+   CASE_OP3(div, DIV);
+   CASE_OPIU(find_msb, BFIND);
+   CASE_OP3(max, MAX);
+   CASE_OP3(min, MIN);
+   CASE_OP3(mod, MOD);
+   CASE_OP(mul, MUL);
+   CASE_OPIU(mul_high, MUL);
+   CASE_OP(neg, NEG);
+   CASE_OP(not, NOT);
+   CASE_OP(or, OR);
+   CASE_OP(eq, SET);
+   CASE_OP3(ge, SET);
+   CASE_OP3(lt, SET);
+   CASE_OP(ne, SET);
+   CASE_OPIU(shr, SHR);
+   CASE_OP(sub, SUB);
+   CASE_OP(xor, XOR);
+   case nir_op_fceil:
+  return OP_CEIL;
+   case nir_op_fcos:
+  return OP_COS;
+   case nir_op_f2f32:
+   case nir_op_f2f64:
+   case nir_op_f2i32:
+   case nir_op_f2i64:
+   case nir_op_f2u32:
+   case nir_op_f2u64:
+   case nir_op_i2f32:
+   case nir_op_i2f64:
+   case nir_op_u2f32:
+   case nir_op_u2f64:
+  return OP_CVT;
+   case nir_op_fddx:
+  return OP_DFDX;
+   case nir_op_fddy:
+  return OP_DFDY;
+   case nir_op_fexp2:
+  return OP_EX2;
+   case nir_op_ffloor:
+  return OP_FLOOR;
+   case nir_op_ffma:
+  return OP_FMA;
+   case nir_op_flog2:
+  return OP_LG2;
+   case nir_op_frcp:
+  return OP_RCP;
+   case nir_op_frsq:
+  return OP_RSQ;
+   case nir_op_fsat:
+  return OP_SAT;
+   case nir_op_ishl:
+  return OP_SHL;
+   case nir_op_fsin:
+  return OP_SIN;
+   case nir_op_fsqrt:
+  return OP_SQRT;
+   case nir_op_ftrunc:
+  return OP_TRUNC;
+   default:
+  ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
+  assert(false);
+  return OP_NOP;
+   }
+}
+#undef CASE_OP
+#undef CASE_OP3
+#undef CASE_OPIU
+
+operation
+Converter::preOperationNeeded(nir_op op)
+{
+   switch (op) {
+   case nir_op_fcos:
+   case nir_op_fsin:
+  return OP_PRESIN;
+   default:
+  return OP_NOP;
+   }
+}
+
+#define CASE_OPIU(ni, no) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return NV50_IR_SUBOP_ ## no
+int
+Converter::getSubOp(nir_op op)
+{
+   switch (op) {
+   CASE_OPIU(mul_high, MUL_HIGH);
+   default:
+  return 0;
+   }
+}
+#undef CASE_OPIU
+
+#define CASE_OP(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni
+#define CASE_OP3(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+CondCode
+Converter::getCondCode(nir_op op)
+{
+   switch (op) {
+   CASE_OP(eq):
+  return CC_EQ;
+   CASE_OP3(ge):
+  return CC_GE;
+   CASE_OP3(lt):
+  return CC_LT;
+   CASE_OP(ne):
+  return CC_NEU;
+   default:
+  ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);
+  assert(false);
+  return CC_FL;
+   }
+}
+#undef CASE_O

[Mesa-dev] [PATCH 09/22] nvir/nir: implement nir_load_const_instr

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 5f0746055a..f13699a393 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -70,6 +70,7 @@ public:
bool visit(nir_if *);
bool visit(nir_instr *);
bool visit(nir_jump_instr *);
+   bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
 
bool run();
@@ -484,6 +485,10 @@ bool
 Converter::visit(nir_instr *insn)
 {
switch (insn->type) {
+   case nir_instr_type_load_const:
+  if (!visit(nir_instr_as_load_const(insn)))
+ return false;
+  break;
case nir_instr_type_jump:
   if (!visit(nir_instr_as_jump(insn)))
  return false;
@@ -495,6 +500,21 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_load_const_instr *insn)
+{
+   assert(insn->def.bit_size <= 64);
+
+   LValues &newDefs = convert(&insn->def);
+   for (int i = 0; i < insn->def.num_components; i++) {
+  if (insn->def.bit_size > 32)
+ loadImm(newDefs[i], insn->value.u64[i]);
+  else
+ loadImm(newDefs[i], insn->value.u32[i]);
+   }
+   return true;
+}
+
 bool
 Converter::visit(nir_jump_instr *insn)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/22] nvir/nir: implement nir_intrinsic_store_output

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 43 ++
 1 file changed, 43 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 9ccd84bc20..896a02af96 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -487,6 +487,11 @@ Converter::visit(nir_function *function)
 
setPosition(entry, true);
 
+   if (info->io.genUserClip > 0) {
+  for (int c = 0; c < 4; ++c)
+ clipVtx[c] = getScratch();
+   }
+
switch (prog->getType()) {
case Program::TYPE_TESSELLATION_CONTROL:
   outBase = mkOp2v(
@@ -513,6 +518,8 @@ Converter::visit(nir_function *function)
bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
setPosition(exit, true);
 
+   if (info->io.genUserClip > 0)
+  handleUserClipPlanes();
// TODO: for non main function this needs to be a OP_RETURN
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
return true;
@@ -736,6 +743,42 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_store_output: {
+  nir_const_value *const_offset = nir_src_as_const_value(insn->src[1]);
+  auto idx = nir_intrinsic_base(insn) + const_offset->u32[0];
+  uint8_t offset = insn->const_index[2];
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ Value *src = getSrc(&insn->src[0], i);
+ switch (prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
+   // TGSI uses a different interface than NIR, TGSI stores that 
value in the z component, NIR in X
+   offset += 2;
+   src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
+}
+break;
+ }
+ case Program::TYPE_VERTEX: {
+if (info->io.genUserClip > 0) {
+   mkMov(clipVtx[i], src);
+   src = clipVtx[i];
+}
+break;
+ }
+ default:
+break;
+ }
+
+ assert(i + offset < 4);
+ uint32_t address = info->out[idx].slot[i + offset];
+ mkStore(OP_EXPORT,
+ TYPE_F32,
+ mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, address * 4),
+ nullptr,
+ src);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/22] nvir/nir: implement nir_intrinsic_load_uniform

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d2b2236c17..9ccd84bc20 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -715,6 +715,27 @@ bool
 Converter::visit(nir_intrinsic_instr *insn)
 {
switch (insn->intrinsic) {
+   case nir_intrinsic_load_uniform: {
+  LValues &newDefs = convert(&insn->dest);
+  nir_const_value *const_offset = nir_src_as_const_value(insn->src[0]);
+  Value *indirect = nullptr;
+  auto offset = nir_intrinsic_base(insn);
+  const DataType dType = getDType(insn);
+  auto dTypeSize = std::max(4u, typeSizeof(dType));
+
+  if (const_offset)
+ offset += const_offset->u32[0];
+  else {
+ // because nir we have to multiply with 16
+ indirect = getSrc(&insn->src[0], 0);
+ mkOp2(OP_MUL, TYPE_U32, indirect, indirect, loadImm(getScratch(), 
16));
+  }
+
+  for (auto i = 0; i < insn->num_components; ++i) {
+ mkLoad(dType, newDefs[i], mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, 
(offset * 4 + i * (dTypeSize / 4)) * 4), nullptr)->setIndirect(0, 0, indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", 
nir_intrinsic_infos[insn->intrinsic].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/22] nvir/nir: add nir type helper functions

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 116 +
 1 file changed, 116 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 6516bd2d8f..c78cd7031d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -46,6 +46,7 @@ class Converter : public ConverterCommon
 public:
typedef std::vector LValues;
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
+   typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
 
Converter(Program *, nir_shader *, nv50_ir_prog_info *);
@@ -62,6 +63,17 @@ public:
Value* getSrc(nir_ssa_def *, uint8_t);
 
bool run();
+
+   bool isFloatType(nir_alu_type);
+   bool isSignedType(nir_alu_type);
+   bool isResultFloat(nir_op);
+   bool isResultSigned(nir_op);
+   DataType getDType(nir_alu_instr*);
+   DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_op, NirSSADefBitSize);
+   std::vector getSTypes(nir_alu_instr*);
+   DataType getSType(nir_src&, bool isFloat, bool isSigned);
+
 private:
nir_shader *nir;
 
@@ -73,6 +85,110 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+bool
+Converter::isFloatType(nir_alu_type type)
+{
+   return !!(nir_alu_type_get_base_type(type) == nir_type_float);
+}
+
+bool
+Converter::isSignedType(nir_alu_type type)
+{
+   return !!(nir_alu_type_get_base_type(type) == nir_type_int);
+}
+
+bool
+Converter::isResultFloat(nir_op op)
+{
+   const nir_op_info &info = nir_op_infos[op];
+   if (info.output_type != nir_type_invalid)
+  return isFloatType(info.output_type);
+
+   switch (op) {
+   default:
+  ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);
+  assert(false);
+  return true;
+   }
+}
+
+bool
+Converter::isResultSigned(nir_op op)
+{
+   const nir_op_info &info = nir_op_infos[op];
+   if (info.output_type != nir_type_invalid)
+  return isSignedType(info.output_type);
+
+   switch (op) {
+   default:
+  ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);
+  assert(false);
+  return true;
+   }
+}
+
+DataType
+Converter::getDType(nir_alu_instr *insn)
+{
+   if (insn->dest.dest.is_ssa)
+  return getDType(insn->op, insn->dest.dest.ssa.bit_size);
+   else
+  return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn)
+{
+   if (insn->dest.is_ssa)
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+   else
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+}
+
+DataType
+Converter::getDType(nir_op op, Converter::NirSSADefBitSize bitSize)
+{
+   DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), 
isResultSigned(op));
+   if (ty == TYPE_NONE) {
+  ERROR("couldn't get Type for op %s with bitSize %u\n", 
nir_op_infos[op].name, bitSize);
+  assert(false);
+   }
+   return ty;
+}
+
+std::vector
+Converter::getSTypes(nir_alu_instr *insn)
+{
+   const nir_op_info &info = nir_op_infos[insn->op];
+   std::vector res(info.num_inputs);
+
+   for (auto i = 0u; i < info.num_inputs; ++i) {
+  if (info.input_types[i] != nir_type_invalid)
+ res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), 
isSignedType(info.input_types[i]));
+  else switch (insn->op) {
+ default:
+ERROR("getSType not implemented for %s idx %u\n", info.name, i);
+assert(false);
+res[i] = TYPE_NONE;
+break;
+  }
+   }
+
+   return res;
+}
+
+DataType
+Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
+{
+   NirSSADefBitSize bitSize;
+   if (src.is_ssa)
+  bitSize = src.ssa->bit_size;
+   else
+  bitSize = src.reg.reg->bit_size;
+
+   return typeOfSize(bitSize / 8, isFloat, isSigned);
+}
+
 Converter::LValues&
 Converter::convert(nir_dest *dest)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/22] nvir/nir: track defs and provide easy access functions

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 108 +
 1 file changed, 108 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4833da5914..6516bd2d8f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -28,6 +28,9 @@
 #include "codegen/nv50_ir_from_common.h"
 #include "codegen/nv50_ir_util.h"
 
+#include 
+#include 
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -41,17 +44,122 @@ using namespace nv50_ir;
 class Converter : public ConverterCommon
 {
 public:
+   typedef std::vector LValues;
+   typedef decltype(nir_ssa_def().index) NirSSADefIdx;
+   typedef std::unordered_map NirDefMap;
+
Converter(Program *, nir_shader *, nv50_ir_prog_info *);
 
+   LValues& convert(nir_alu_dest *);
+   LValues& convert(nir_dest *);
+   LValues& convert(nir_register *);
+   LValues& convert(nir_ssa_def *);
+
+   // nir_alu_src needs special handling due to neg and abs modifiers
+   Value* getSrc(nir_alu_src *, uint8_t component = 0);
+   Value* getSrc(nir_register *, uint8_t);
+   Value* getSrc(nir_src *, uint8_t);
+   Value* getSrc(nir_ssa_def *, uint8_t);
+
bool run();
 private:
nir_shader *nir;
+
+   NirDefMap ssaDefs;
+   NirDefMap regDefs;
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir) {}
 
+Converter::LValues&
+Converter::convert(nir_dest *dest)
+{
+   if (dest->is_ssa)
+  return convert(&dest->ssa);
+   if (dest->reg.indirect) {
+  ERROR("no support for indirects.");
+  assert(false);
+   }
+   return convert(dest->reg.reg);
+}
+
+Converter::LValues&
+Converter::convert(nir_register *reg)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it != regDefs.end())
+  return (*it).second;
+
+   LValues newDef(reg->num_components);
+   for (auto i = 0u; i < reg->num_components; i++)
+  newDef[i] = getScratch(reg->bit_size / 8);
+   return regDefs[reg->index] = newDef;
+}
+
+Converter::LValues&
+Converter::convert(nir_ssa_def *def)
+{
+   NirDefMap::iterator it = ssaDefs.find(def->index);
+   if (it != ssaDefs.end())
+  return (*it).second;
+
+   LValues newDef(def->num_components);
+   for (auto i = 0; i < def->num_components; i++)
+  newDef[i] = getScratch(def->bit_size / 8);
+   return ssaDefs[def->index] = newDef;
+}
+
+Value*
+Converter::getSrc(nir_alu_src *src, uint8_t component)
+{
+   if (src->abs || src->negate) {
+  ERROR("modifiers currently not supported on nir_alu_src\n");
+  assert(false);
+   }
+   return getSrc(&src->src, src->swizzle[component]);
+}
+
+Value*
+Converter::getSrc(nir_register *reg, uint8_t idx)
+{
+   NirDefMap::iterator it = regDefs.find(reg->index);
+   if (it == regDefs.end()) {
+  ERROR("Register %u not found\n", reg->index);
+  assert(false);
+  return nullptr;
+   }
+   return (*it).second[idx];
+}
+
+Value*
+Converter::getSrc(nir_src *src, uint8_t idx)
+{
+   if (src->is_ssa)
+  return getSrc(src->ssa, idx);
+
+   if (src->reg.indirect) {
+  ERROR("no support for indirects.");
+  assert(false);
+  return nullptr;
+   }
+
+   return getSrc(src->reg.reg, idx);
+}
+
+Value*
+Converter::getSrc(nir_ssa_def *src, uint8_t idx)
+{
+   NirDefMap::iterator it = ssaDefs.find(src->index);
+   if (it == ssaDefs.end()) {
+  ERROR("SSA value %u not found\n", src->index);
+  assert(false);
+  return nullptr;
+   }
+   return (*it).second[idx];
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/22] nvir/nir: implement CFG handling

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 255 -
 1 file changed, 253 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index c78cd7031d..5f0746055a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -48,10 +48,12 @@ public:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
+   typedef std::unordered_map 
NirBlockMap;
 
Converter(Program *, nir_shader *, nv50_ir_prog_info *);
 
LValues& convert(nir_alu_dest *);
+   BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
@@ -62,6 +64,14 @@ public:
Value* getSrc(nir_src *, uint8_t);
Value* getSrc(nir_ssa_def *, uint8_t);
 
+   bool visit(nir_block *);
+   bool visit(nir_cf_node *);
+   bool visit(nir_function *);
+   bool visit(nir_if *);
+   bool visit(nir_instr *);
+   bool visit(nir_jump_instr *);
+   bool visit(nir_loop *);
+
bool run();
 
bool isFloatType(nir_alu_type);
@@ -79,11 +89,34 @@ private:
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirBlockMap blocks;
+   unsigned int curLoopDepth;
+
+   BasicBlock *exit;
+
+   union {
+  struct {
+ Value *position;
+  } fp;
+   };
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
- nir(nir) {}
+ nir(nir),
+ curLoopDepth(0) {}
+
+BasicBlock *
+Converter::convert(nir_block *block)
+{
+   NirBlockMap::iterator it = blocks.find(block->index);
+   if (it != blocks.end())
+  return (*it).second;
+
+   BasicBlock *bb = new BasicBlock(func);
+   blocks[block->index] = bb;
+   return bb;
+}
 
 bool
 Converter::isFloatType(nir_alu_type type)
@@ -276,6 +309,219 @@ Converter::getSrc(nir_ssa_def *src, uint8_t idx)
return (*it).second[idx];
 }
 
+bool
+Converter::visit(nir_function *function)
+{
+   // we only support emiting the main function for now
+   assert(!strcmp(function->name, "main"));
+   assert(function->impl);
+
+   // usually the blocks will set everything up, but main is special
+   BasicBlock *entry = new BasicBlock(prog->main);
+   exit = new BasicBlock(prog->main);
+   blocks[nir_start_block(function->impl)->index] = entry;
+   prog->main->setEntry(entry);
+   prog->main->setExit(exit);
+
+   setPosition(entry, true);
+
+   switch (prog->getType()) {
+   case Program::TYPE_TESSELLATION_CONTROL:
+  outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+  break;
+   case Program::TYPE_FRAGMENT: {
+  Symbol *sv = mkSysVal(SV_POSITION, 3);
+  fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+  fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+  break;
+   }
+   default:
+  break;
+   }
+
+   nir_index_ssa_defs(function->impl);
+   foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
+  if (!visit(node))
+ return false;
+   }
+
+   bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);
+   setPosition(exit, true);
+
+   // TODO: for non main function this needs to be a OP_RETURN
+   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+   return true;
+}
+
+bool
+Converter::visit(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+  if (!visit(nir_cf_node_as_block(node)))
+ return false;
+  break;
+   case nir_cf_node_if:
+  if (!visit(nir_cf_node_as_if(node)))
+ return false;
+  break;
+   case nir_cf_node_loop:
+  if (!visit(nir_cf_node_as_loop(node)))
+ return false;
+  break;
+   default:
+  ERROR("unknown nir_cf_node type %u\n", node->type);
+  return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_block *block)
+{
+   BasicBlock *bb = convert(block);
+
+   setPosition(bb, true);
+   nir_foreach_instr(insn, block) {
+  if (!visit(insn))
+ return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_if *nif)
+{
+   DataType sType = getSType(nif->condition, false, false);
+   Value *src = getSrc(&nif->condition, 0);
+
+   nir_block *lastThen = nir_if_last_then_block(nif);
+   nir_block *lastElse = nir_if_last_else_block(nif);
+
+   assert(!lastThen->successors[1]);
+   assert(!lastElse->successors[1]);
+
+   BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
+   BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
+
+   bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
+   bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
+
+   // we only insert joinats, if both nodes end up at the

[Mesa-dev] [PATCH 05/22] nvir/nir: run some passes to make the conversion easier

2017-12-21 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 37 ++
 1 file changed, 37 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index bfaeb1ffd5..4833da5914 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -28,6 +28,12 @@
 #include "codegen/nv50_ir_from_common.h"
 #include "codegen/nv50_ir_util.h"
 
+static int
+type_size(const struct glsl_type *type)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
 namespace {
 
 using namespace nv50_ir;
@@ -49,6 +55,37 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
 bool
 Converter::run()
 {
+   bool progress;
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+  nir_print_shader(nir, stderr);
+
+   // converts intrinsic load_var to intrinsic load_uniform
+   NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, 
(nir_lower_io_options)0);
+
+   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
+   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+
+   do {
+  progress = false;
+  // we need this to_ssa otherwise the later opts are less effective
+  NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+  NIR_PASS(progress, nir, nir_lower_alu_to_scalar);
+  NIR_PASS(progress, nir, nir_lower_phis_to_scalar);
+  NIR_PASS(progress, nir, nir_copy_prop);
+  NIR_PASS(progress, nir, nir_opt_dce);
+  NIR_PASS(progress, nir, nir_opt_dead_cf);
+   } while (progress);
+
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local);
+   NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+   /* Garbage collect dead instructions */
+   nir_sweep(nir);
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+  nir_print_shader(nir, stderr);
+
return false;
 }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/22] nvc0/debug: add env var to make nir default

2017-12-21 Thread Karol Herbst
can't be turned on for release builds for now.

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 90fc01ecd0..83884cb099 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -353,9 +353,15 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
   return 0;
}
 
+#ifdef DEBUG
+   long prefer_ir = debug_get_num_option("NV50_PROG_USE_NIR", 0) ? 
PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
+#else
+   long prefer_ir = PIPE_SHADER_IR_TGSI;
+#endif
+
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
-  return PIPE_SHADER_IR_TGSI;
+  return prefer_ir;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
   return 1 << PIPE_SHADER_IR_TGSI |
  1 << PIPE_SHADER_IR_NIR;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/22] nvc0: add support for NIR

2017-12-21 Thread Karol Herbst
not all those nir options are actually required, it just made the work a
little easier.

TODO:
there is a little memory leak, because the nir shader is duplicated twice.
this has to be done though, we just need to clean it up properly

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/Makefile.sources   |  1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|  3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 69 ++
 src/gallium/drivers/nouveau/meson.build| 10 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 16 -
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  | 24 +++-
 8 files changed, 161 insertions(+), 8 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index fee5e59522..e31413a2f3 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_emit_nv50.cpp \
codegen/nv50_ir_from_common.cpp \
codegen/nv50_ir_from_common.h \
+   codegen/nv50_ir_from_nir.cpp \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 6f12df70a1..b95ba8e4e9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1231,6 +1231,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
+   case PIPE_SHADER_IR_NIR:
+  ret = prog->makeFromNIR(info) ? 0 : -2;
+  break;
case PIPE_SHADER_IR_TGSI:
   ret = prog->makeFromTGSI(info) ? 0 : -2;
   break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 10b872e0dd..ac7972f36e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1268,6 +1268,7 @@ public:
inline void del(Function *fn, int& id) { allFuncs.remove(id); }
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
 
+   bool makeFromNIR(struct nv50_ir_prog_info *);
bool makeFromTGSI(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
new file mode 100644
index 00..bfaeb1ffd5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright 20117 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst 
+ */
+
+#include "compiler/nir/nir.h"
+
+#include "codegen/nv50_ir.h"
+#include "codegen/nv50_ir_from_common.h"
+#include "codegen/nv50_ir_util.h"
+
+namespace {
+
+using namespace nv50_ir;
+
+class Converter : public ConverterCommon
+{
+public:
+   Converter(Program *, nir_shader *, nv50_ir_prog_info *);
+
+   bool run();
+private:
+   nir_shader *nir;
+};
+
+Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
+   : ConverterCommon(prog, info),
+ nir(nir) {}
+
+bool
+Converter::run()
+{
+   return false;
+}
+
+} // unnamed namespace
+
+namespace nv50_ir {
+
+bool
+Program::makeFromNIR(struct nv50_ir_prog_info *info)
+{
+   nir_shader *nir = (nir_shader*)info->bin.source;
+   Converter converter(this, nir, info);
+   bool result = converter.run();
+   tlsSize = info->bin.tlsSpace;
+   return result;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nouveau/me

[Mesa-dev] [PATCH 02/22] nvir: move common converter code in base class

2017-12-21 Thread Karol Herbst
this is more or less a todo list of things I should move elsewhere. Not all of
it should be actually moved, but...

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 .../nouveau/codegen/nv50_ir_from_common.cpp| 145 
 .../drivers/nouveau/codegen/nv50_ir_from_common.h  |  59 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 146 +
 src/gallium/drivers/nouveau/meson.build|   2 +
 5 files changed, 212 insertions(+), 142 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 65f08c7d8d..fee5e59522 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -115,6 +115,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_build_util.h \
codegen/nv50_ir_driver.h \
codegen/nv50_ir_emit_nv50.cpp \
+   codegen/nv50_ir_from_common.cpp \
+   codegen/nv50_ir_from_common.h \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
codegen/nv50_ir_graph.h \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
new file mode 100644
index 00..aa5f52fe81
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "codegen/nv50_ir_from_common.h"
+
+namespace nv50_ir {
+
+ConverterCommon::ConverterCommon(Program *prog, nv50_ir_prog_info *info)
+   :  BuildUtil(prog),
+  info(info) {}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(unsigned ip)
+{
+   std::map::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(
+  ip, Subroutine(new Function(prog, "SUB", ip.first;
+
+   return &it->second;
+}
+
+ConverterCommon::Subroutine *
+ConverterCommon::getSubroutine(Function *f)
+{
+   unsigned ip = f->getLabel();
+   std::map::iterator it = sub.map.find(ip);
+
+   if (it == sub.map.end())
+  it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
+
+   return &it->second;
+}
+
+uint8_t
+ConverterCommon::translateInterpMode(const nv50_ir_varying *var, operation& op)
+{
+   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
+
+   if (var->flat)
+  mode = NV50_IR_INTERP_FLAT;
+   else
+   if (var->linear)
+  mode = NV50_IR_INTERP_LINEAR;
+   else
+   if (var->sc)
+  mode = NV50_IR_INTERP_SC;
+
+   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
+  ? OP_PINTERP : OP_LINTERP;
+
+   if (var->centroid)
+  mode |= NV50_IR_INTERP_CENTROID;
+
+   return mode;
+}
+
+void
+ConverterCommon::handleUserClipPlanes()
+{
+   Value *res[8];
+   int n, i, c;
+
+   for (c = 0; c < 4; ++c) {
+  for (i = 0; i < info->io.genUserClip; ++i) {
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
+TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
+ Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
+ if (c == 0)
+res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
+ else
+mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
+  }
+   }
+
+   const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
+
+   for (i = 0; i < info->io.genUserClip; ++i) {
+  n = i / 4 + first;
+  c = i % 4;
+  Symbol *sym =
+ mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
+  mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
+   }
+}
+
+SVSemantic
+ConverterCommon::translateSysVal(uint sysv

[Mesa-dev] [PATCH 00/22] Nir support for Nouveau

2017-12-21 Thread Karol Herbst
Plans are to get SPIR-V support for Nouveau with a cheap way. Before that I
was looking into Pierres work on his direct SPIR-V to nvir pass and seemed
much more complicated than doing NIR to nvir.

In the end we still plan to get Compute support through SPIR-V and my hope is
to get that piped through NIR, so that a lot of other drivers could benefit
from this as well.

Stuff up to GLSL 1.30 should work (just 2 piglit fails up to that), some games
run as well (truth is, I only checked one, but I expect that more will run).
Usually I took the add-stuff-until-piglit-test-passes approach so a lot of
details are still missing.

But nevertheless I would like to get some feedback on the work and suggestions
for improving this work.

To use NIR just set NV50_PROG_USE_NIR=1 for a mesa DEBUG build. I plan to move
it to a non debug variable for the next series.

Features I want to work next on:
* Geometry shaders
* UBOs

./piglit run -x glx -x egl -x streaming-texture-leak -x max-texture-size 
tests/gpu.py:
[26073/26073] skip: 1574, pass: 13451, warn: 9, fail: 5287, crash: 5752

Note: a lot of crashes are geometry/tesselation stuff hitting asserts.

Karol Herbst (22):
  nvir: print the shader type when dumping headers
  nvir: move common converter code in base class
  nvc0: add support for NIR
  nvc0/debug: add env var to make nir default
  nvir/nir: run some passes to make the conversion easier
  nvir/nir: track defs and provide easy access functions
  nvir/nir: add nir type helper functions
  nvir/nir: implement CFG handling
  nvir/nir: implement nir_load_const_instr
  nvir/nir: add skeleton for nir_intrinsic_instr
  nvir/nir: implement nir_alu_instr handling
  nvir/nir: implement nir_intrinsic_load_uniform
  nvir/nir: implement nir_intrinsic_store_output
  nvir/nir: implement nir_intrinsic_load_input
  nvir/nir: run assignSlots
  nvir/nir: parse NIR shader info
  nvir/nir: implement intrinsic_discard(_if)
  nvir/nir: implement loading system values
  nvir/nir: implement nir_ssa_undef_instr
  nvir/nir: implement nir_instr_type_tex
  nvir/nir: implement vote
  nvir/nir: implement variable indexing

 src/gallium/drivers/nouveau/Makefile.sources   |3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|3 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.h  |1 +
 .../nouveau/codegen/nv50_ir_from_common.cpp|  145 ++
 .../drivers/nouveau/codegen/nv50_ir_from_common.h  |   59 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 2009 
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  146 +-
 src/gallium/drivers/nouveau/meson.build|   12 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c|   17 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   53 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |   24 +-
 11 files changed, 2321 insertions(+), 151 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_common.h
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/22] nvir: print the shader type when dumping headers

2017-12-21 Thread Karol Herbst
this makes debugging a little easier

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 46a15d76df..0b305af316 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -541,6 +541,7 @@ nvc0_program_dump(struct nvc0_program *prog)
unsigned pos;
 
if (prog->type != PIPE_SHADER_COMPUTE) {
+  debug_printf("dumping HDR for type %i\n", prog->type);
   for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos)
  debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
   pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] Wayland/Weston/Mesa HDR support (proof of concept)

2017-12-21 Thread Ville Syrjälä
Here's a quick proof of concept implementation of HDR support
for Wayland/Weston/Mesa.

I'm not posting this as patches right now because I'm not sure
that would do much good given how rough this is. But here are
all the repos/branches:
git://github.com/vsyrjala/wayland.git hdr_poc
git://github.com/vsyrjala/wayland-protocols.git hdr_poc
git://github.com/vsyrjala/weston.git hdr_poc
git://github.com/vsyrjala/mesa.git hdr_poc
git://github.com/vsyrjala/linux.git hdr_poc

The kernel HDR bits were partially done by Uma Shankar, the rest
I hacked together myself.

As far as Wayland protocol goes I'm adding three new
extensions (should probably just have one with several requests?):
- zwp_colorspace_v1 - Specify the primaries/whitepoint chromacities 
  and transfer function for a surface
- zwp_ycbcr_encoding_v1 - Specify the encoding for YCbCr surfaces
- zwp_hdr_metadata_v1 - Allow the client to pass HDR metadata to 
  the compositor
Note that I've not given any thought to how the compositor might
advertize its capabilities.

I also hacked in a bunch of 10bpc+ YCbCr support to the protocol and
Weston so that I can actually get some HDR video onto the screen.

On the Mesa side I've crudely implementated the following egl/vk
extesions:
- EXT_gl_colorspace_*
- EXT_surface_SMPTE2086_metadata
- EXT_surface_CTA861_3_metadata
  (sidenote: these egl extension don't seem to match CTA-861.3 nicely
   when it comes to the min/max luminance stuff)
- VK_EXT_hdr_metadata

VK_EXT_hdr_metadata I plugged in for anv only, but the implementation
is in the common wayland wsi code. Note that I haven't actually tested
the vulkan stuff at all because I don't talk Vulkan (at least not yet).

Also note that I've not connected up the HDR metadata pipeline 
properly. The client can provide the metadata, but the compositor
doesn't actually pass it on to the display. For the time being the
HDR metadata that gets passed to the display is partially specified
in weston.ini and partially just hardocded (see
libweston/compositor-drm.c).

The Weston implementation involves a bunch of shaders and matrices to
do the ycbcr->rgb, "degamma", csc for each surface, blend it all as
linear RGB into an fp16 fbo, and finally blit that out to the final
framebuffer while applying the ST2084 PQ transfer function in the
process.

The reason for the fp16 fbo is that we store the full 1 nits of
linear RGB. That needs plenty of precisions in the low end so your
regular 10bpc fb doesn't seem to cut it. And also the display gamma LUT
doesn't have enough input precision for it either. Sadly there's no
fixed function hardware in the GPU to do the ST2084 PQ when blending.
When the output is not HDR I do skip the fp16 fbo step and use the
gamma LUT in the display engine instead. 

Another approach to the precisions problem might be to not store the
entire 1 nits of linear, and just cut off the super bright stuff
(your display can't show it anyway). But I've not really bothered to
figure out how low in nits we'd have to go here, probably too low.
Maybe blending as sRGB and the doing sRGB->PQ with the gamma LUT might
help a little bit?

Ideally we would bypass this all for a single fullscreen HDR surface
and just pass the PQ encoded data directly through. But I've not
implemented that. In fact I even disable the buffer_age damage stuff
when using the fp16 fbo, so we'll recompose the entire screen every
time. Yeah, I'm lazy.

Another thought that occurred to me was that it shouldn't be too hard
to make Weston do some tone mapping when there's a HDR client and no
HDR screen. To that end I included the ACES colorspaces in my
colorspace list, but I didn't actually look into plugging the ACES tone
mapping curve into the shaders. Might be a fun excercise, even though
the practical applications might be close to nil. Probably better to
not advertize HDR/wide gamuts when we can't actually output the stuff,
and instead let the client do its own tone mapping.

OK, so what can you do with this? I've included a few test clients:
- simple-colorspace
  Just a copy of simple-egl but it uses the egl extension to specify
  the colorspace, and produces ST2084 PQ encoded data when asked
- simple-hdr-video
  Uses ffmpeg to decode video into shm buffers, and sets the 
  colorspace/ycbcr encoding etc. appropriately. Ie. this one can
  actually output HDR video

Here's a weston.ini snippet that gets you outputting HDR:
[core]
gbm-format=xrgb2101010

[output]
name=HDMI-A-2
colorspace=BT.2020
gamma=ST2084
max_sdr_nits=100

Hardware wise you'll need a HDR capable display obviously, and 
you'll need an Intel Geminilake GPU. Older Intel platforms don't
support the HDR infoframe, so the display wouldn't know what to do
with the data you're feeding it.

As for the future, right now I don't really have any solid plans on
continuing to develop this. I might dabble with it a bit more out of
curiosity, but I'm more hoping we can find other people to move this
forward properly.

-- 
Ville Syrjälä
Int

Re: [Mesa-dev] [PATCHv2] intel/fs: Optimize and simplify the copy propagation dataflow logic.

2017-12-21 Thread Eero Tamminen

Hi,

I tested this on HSW GT2, BXT & SKL GT3e, and didn't see any significant 
regressions this time.  I'll try it also on a machine with smaller 
variance than those (now that it became free), and send a note if that 
does show something.


- Eero

On 20.12.2017 21:27, Francisco Jerez wrote:

Previously the dataflow propagation algorithm would calculate the ACP
live-in and -out sets in a two-pass fixed-point algorithm.  The first
pass would update the live-out sets of all basic blocks of the program
based on their live-in sets, while the second pass would update the
live-in sets based on the live-out sets.  This is incredibly
inefficient in the typical case where the CFG of the program is
approximately acyclic, because it can take up to 2*n passes for an ACP
entry introduced at the top of the program to reach the bottom (where
n is the number of basic blocks in the program), until which point the
algorithm won't be able to reach a fixed point.

The same effect can be achieved in a single pass by computing the
live-in and -out sets in lock-step, because that makes sure that
processing of any basic block will pick up the updated live-out sets
of the lexically preceding blocks.  This gives the dataflow
propagation algorithm effectively O(n) run-time instead of O(n^2) in
the acyclic case.

The time spent in dataflow propagation is reduced by 30x in the
GLES31.functional.ssbo.layout.random.all_shared_buffer.5 dEQP
test-case on my CHV system (the improvement is likely to be of the
same order of magnitude on other platforms).  This more than reverses
an apparent run-time regression in this test-case from my previous
copy-propagation undefined-value handling patch, which was ultimately
caused by the additional work introduced in that commit to account for
undefined values being multiplied by a huge quadratic factor.

According to Chad this test was failing on CHV due to a 30s time-out
imposed by the Android CTS (this was the case regardless of my
undefined-value handling patch, even though my patch substantially
exacerbated the issue).  On my CHV system this patch reduces the
overall run-time of the test by approximately 12x, getting us to
around 13s, well below the time-out.

v2: Initialize live-out set to the universal set to avoid rather
 pessimistic dataflow estimation in shaders with cycles (Addresses
 performance regression reported by Eero in GpuTest Piano).
 Performance numbers given above still apply.  No shader-db changes
 with respect to master.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104271
Reported-by: Chad Versace 
---
  src/intel/compiler/brw_fs_copy_propagation.cpp | 35 --
  1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index af5635eacef..92cc0a8de58 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -186,8 +186,7 @@ fs_copy_prop_dataflow::setup_initial_values()
  
 /* Populate the initial values for the livein and liveout sets.  For the

  * block at the start of the program, livein = 0 and liveout = copy.
-* For the others, set liveout to 0 (the empty set) and livein to ~0
-* (the universal set).
+* For the others, set liveout and livein to ~0 (the universal set).
  */
 foreach_block (block, cfg) {
if (block->parents.is_empty()) {
@@ -197,7 +196,7 @@ fs_copy_prop_dataflow::setup_initial_values()
   }
} else {
   for (int i = 0; i < bitset_words; i++) {
-bd[block->num].liveout[i] = 0u;
+bd[block->num].liveout[i] = ~0u;
  bd[block->num].livein[i] = ~0u;
   }
}
@@ -228,34 +227,17 @@ fs_copy_prop_dataflow::run()
 do {
progress = false;
  
-  /* Update liveout for all blocks. */

foreach_block (block, cfg) {
   if (block->parents.is_empty())
  continue;
  
   for (int i = 0; i < bitset_words; i++) {

  const BITSET_WORD old_liveout = bd[block->num].liveout[i];
-
-bd[block->num].liveout[i] =
-   bd[block->num].copy[i] | (bd[block->num].livein[i] &
- ~bd[block->num].kill[i]);
-
-if (old_liveout != bd[block->num].liveout[i])
-   progress = true;
- }
-  }
-
-  /* Update livein for all blocks.  If a copy is live out of all parent
-   * blocks, it's live coming in to this block.
-   */
-  foreach_block (block, cfg) {
- if (block->parents.is_empty())
-continue;
-
- for (int i = 0; i < bitset_words; i++) {
-const BITSET_WORD old_livein = bd[block->num].livein[i];
  BITSET_WORD livein_from_any_block = 0;
  
+/* Update livein for this block.  If a copy is live out of all

+ * parent blocks, it's live coming i

Re: [Mesa-dev] [PATCH] spirv: avoid infinite loop / freeze in vtn_cfg_walk_blocks()

2017-12-21 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 21/12/17 13:53, Eero Tamminen wrote:

Fixes: 9702fac68e (spirv: consider bitsize when handling OpSwitch cases)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104359
---
 src/compiler/spirv/vtn_cfg.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 9c4cbe2..3d5de37 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -549,19 +549,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, 
struct list_head *cf_list,

 struct vtn_block *case_block =
    vtn_value(b, *w, vtn_value_type_block)->block;

-    if (case_block == break_block)
-   continue;
-
-    vtn_assert(case_block->switch_case);
-
-    vtn_order_case(swtch, case_block->switch_case);
-
 if (bitsize <= 32) {
    w += 2;
 } else {
    assert(bitsize == 64);
    w += 3;
 }
+
+    if (case_block == break_block)
+   continue;
+
+    vtn_assert(case_block->switch_case);
+
+    vtn_order_case(swtch, case_block->switch_case);
  }

  enum vtn_branch_type branch_type =
--
2.7.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] spirv: avoid infinite loop / freeze in vtn_cfg_walk_blocks()

2017-12-21 Thread Eero Tamminen

Fixes: 9702fac68e (spirv: consider bitsize when handling OpSwitch cases)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104359
---
 src/compiler/spirv/vtn_cfg.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 9c4cbe2..3d5de37 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -549,19 +549,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct 
list_head *cf_list,

 struct vtn_block *case_block =
vtn_value(b, *w, vtn_value_type_block)->block;

-if (case_block == break_block)
-   continue;
-
-vtn_assert(case_block->switch_case);
-
-vtn_order_case(swtch, case_block->switch_case);
-
 if (bitsize <= 32) {
w += 2;
 } else {
assert(bitsize == 64);
w += 3;
 }
+
+if (case_block == break_block)
+   continue;
+
+vtn_assert(case_block->switch_case);
+
+vtn_order_case(swtch, case_block->switch_case);
  }

  enum vtn_branch_type branch_type =
--
2.7.4
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] v3: ext_memory_object: Test sampling memory exported from Vulkan

2017-12-21 Thread Pohjolainen, Topi
On Thu, Dec 21, 2017 at 02:01:22PM +0200, Topi Pohjolainen wrote:
> Here is a revision taking into account feedback from Andres and Fredrik.
> Many thanks for both, I hope I didn't miss anything.

My apologies, sent to the wrong list. Only realized too late after hitting
enter...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [v3 01/11] framework: Check for vulkan availability

2017-12-21 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen 
---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4259ec832..c90109907 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -173,6 +173,8 @@ ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
endif()
 ENDIF()
 
+pkg_check_modules(LIBVULKAN QUIET vulkan)
+
 IF(PIGLIT_HAS_GLX)
option(PIGLIT_BUILD_GLX_TESTS "Build tests that require GLX" ON)
 ELSE()
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] v3: ext_memory_object: Test sampling memory exported from Vulkan

2017-12-21 Thread Topi Pohjolainen
Here is a revision taking into account feedback from Andres and Fredrik.
Many thanks for both, I hope I didn't miss anything.

CC: Andres Rodriguez 
CC: Fredrik Hoeglund 
CC: Jason Ekstrand 

Topi Pohjolainen (11):
  framework: Check for vulkan availability
  framework: HACK: Read glslc path from env
  ext_memory_object: Add script for turning glsl into spirv c-array
  ext_memory_object: Support for setting up vulkan device
  ext_memory_object: Support for drawing with vulkan
  ext_memory_object: Support for setting up vulkan framebuffer
  ext_memory_object: Add tex layout command line
  ext_memory_object: Support for importing vulkan memory
  ext_memory_object: Support for creating simple vulkan pipelines
  ext_memory_object: Add helper for image type support
  ext_memory_object: Test render with vulkan and sample with gl

 CMakeLists.txt |   3 +
 tests/spec/ext_memory_object/CMakeLists.gl.txt |  18 +
 tests/spec/ext_memory_object/common.c  | 167 +
 tests/spec/ext_memory_object/common.h  |  51 ++
 .../compile_and_dump_glsl_as_spirv.py  | 139 +
 tests/spec/ext_memory_object/vk_common.c   | 670 +
 tests/spec/ext_memory_object/vk_common.h   | 176 ++
 .../ext_memory_object/vk_export_image_as_tex.c | 219 +++
 tests/spec/ext_memory_object/vk_fb.c   | 346 +++
 tests/spec/ext_memory_object/vk_fragcoord.fs   |   7 +
 tests/spec/ext_memory_object/vk_fragcoord.vs   |   8 +
 11 files changed, 1804 insertions(+)
 create mode 100644 tests/spec/ext_memory_object/common.c
 create mode 100644 tests/spec/ext_memory_object/common.h
 create mode 100644 
tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py
 create mode 100644 tests/spec/ext_memory_object/vk_common.c
 create mode 100644 tests/spec/ext_memory_object/vk_common.h
 create mode 100644 tests/spec/ext_memory_object/vk_export_image_as_tex.c
 create mode 100644 tests/spec/ext_memory_object/vk_fb.c
 create mode 100644 tests/spec/ext_memory_object/vk_fragcoord.fs
 create mode 100644 tests/spec/ext_memory_object/vk_fragcoord.vs

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [v3 03/11] ext_memory_object: Add script for turning glsl into spirv c-array

2017-12-21 Thread Topi Pohjolainen
This stripped down version of glsl_scraper.py found in crucible.

Signed-off-by: Topi Pohjolainen 
---
 .../compile_and_dump_glsl_as_spirv.py  | 139 +
 1 file changed, 139 insertions(+)
 create mode 100644 
tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py

diff --git a/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py 
b/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py
new file mode 100644
index 0..b7fdeafe2
--- /dev/null
+++ b/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py
@@ -0,0 +1,139 @@
+#! /usr/bin/env python3
+
+import argparse
+import io
+import os
+import re
+import shutil
+import struct
+import subprocess
+import sys
+import tempfile
+from textwrap import dedent
+
+class ShaderCompileError(RuntimeError):
+def __init__(self, *args):
+super(ShaderCompileError, self).__init__(*args)
+
+class Shader:
+def __init__(self, stage, infname):
+self.stage = stage
+self.infname = infname
+self.dwords = None
+self.var_prefix = os.path.basename(infname).replace('.', '_')
+
+def __run_glslc(self, extra_args=[]):
+stage_flag = '-fshader-stage=' + self.stage
+
+with subprocess.Popen([glslc] + extra_args +
+  [stage_flag, '-std=430core', '-o', '-',
+   self.infname],
+  stdout = subprocess.PIPE,
+  stderr = subprocess.PIPE,
+  stdin = subprocess.PIPE) as proc:
+
+out, err = proc.communicate(timeout=30)
+
+if proc.returncode != 0:
+# Unfortunately, glslang dumps errors to standard out.
+# However, since we don't really want to count on that,
+# we'll grab the output of both
+message = out.decode('utf-8') + '\n' + err.decode('utf-8')
+raise ShaderCompileError(message.strip())
+
+return out
+
+def compile(self):
+def dwords(f):
+while True:
+dword_str = f.read(4)
+if not dword_str:
+return
+assert len(dword_str) == 4
+yield struct.unpack('I', dword_str)[0]
+
+spirv = self.__run_glslc()
+self.dwords = list(dwords(io.BytesIO(spirv)))
+self.assembly = str(self.__run_glslc(['-S']), 'utf-8')
+
+def _dump_glsl_code(self, f, var_name):
+# First dump the GLSL source as strings
+f.write('static const char {0}[] ='.format(var_name))
+f.write('\n"#version 330\\n"')
+
+infile = open_file(self.infname, 'r')
+for line in infile:
+f.write('\n"{0}\\n"'.format(line.strip('\n')))
+f.write(';\n\n')
+
+def _dump_spirv_code(self, f, var_name):
+f.write('/* SPIR-V Assembly:\n')
+f.write(' *\n')
+for line in self.assembly.splitlines():
+f.write(' * ' + line + '\n')
+f.write(' */\n')
+
+f.write('static const uint32_t {0}[] = {{'.format(var_name))
+line_start = 0
+while line_start < len(self.dwords):
+f.write('\n')
+for i in range(line_start, min(line_start + 6, len(self.dwords))):
+f.write(' 0x{:08x},'.format(self.dwords[i]))
+line_start += 6
+f.write('\n};\n')
+
+def dump_c_code(self, f):
+self._dump_glsl_code(f, self.var_prefix + '_glsl_src')
+self._dump_spirv_code(f, self.var_prefix + '_spir_v_src')
+
+def parse_args():
+description = dedent("""\
+This program compiles the given glsl source file into SPIR-V and
+writes it to another C file as an array of 32-bit words.
+
+If '-' is passed as the input file or output file, stdin or stdout
+will be used instead of a file on disc.""")
+
+p = argparse.ArgumentParser(
+description=description,
+formatter_class=argparse.RawDescriptionHelpFormatter)
+p.add_argument('-o', '--outfile', default='-',
+help='Output to the given file (default: stdout).')
+p.add_argument('--with-glslc', metavar='PATH',
+default='glslc',
+dest='glslc',
+help='Full path to the glslc shader compiler.')
+p.add_argument('--stage', dest='stage')
+p.add_argument('infile', metavar='INFILE')
+
+return p.parse_args()
+
+def open_file(name, mode):
+if name == '-':
+if mode == 'w':
+return sys.stdout
+elif mode == 'r':
+return sys.stdin
+else:
+assert False
+else:
+return open(name, mode)
+
+args = parse_args()
+outfname = args.outfile
+glslc = args.glslc
+
+shader = Shader(args.stage, args.infile)
+shader.compile()
+
+with open_file(outfname, 'w') as outfile:
+outfile.write(dedent("""\
+/* ===

[Mesa-dev] [v3 02/11] framework: HACK: Read glslc path from env

2017-12-21 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen 
---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c90109907..767b90add 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -174,6 +174,7 @@ ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
 ENDIF()
 
 pkg_check_modules(LIBVULKAN QUIET vulkan)
+set(GLSLC $ENV{GLSLC})
 
 IF(PIGLIT_HAS_GLX)
option(PIGLIT_BUILD_GLX_TESTS "Build tests that require GLX" ON)
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] shader compile

2017-12-21 Thread Alejandro Piñeiro
On 21/12/17 08:21, 吴畏 wrote:
> Hello,every
> I want to know why vertex shader and fragment shader are compiled to
> many shader binarys.

So many binaries? Do you mean the intermediate representations? AST, IR,
NIR, TGSI ...?

> When and where these bianrys are linked

flex/bison creates a AST representation, that it is a raw representation
of the shader. It is converted to IR. Take a look to
src/compiler/glsl/ast_to_hir.cpp

For GLSL and for most backends, the linking is done using IR. Take a
look to src/compiler/linker.cpp

Although IR is a real intermediate representation, these days most of
the backends doesn't use it to generate the final assembly.

i965 intel backed converts it to NIR. Take a look to
src/compiler/glsl/glsl_to_nir.cpp for the conversion, and
src/compiler/nir/nir.h for nir in general.

Some time ago I wrote a blog post about those intermediate
representations. If you are curious:
https://blogs.igalia.com/apinheiro/2016/06/02/introducing-mesa-intermediate-representations-on-intel-drivers-with-a-practical-example/


> and uploaded?

What do you mean for uploaded?

BR

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] radv: gfx9 3d image fixes

2017-12-21 Thread Alex Smith
Nice - this does fix the issue I was seeing, thanks.

Can at least patches 2 and 3 go to stable?

On 21 December 2017 at 01:50, Dave Airlie  wrote:

> This series fixes about 340 CTS tests on Vega that involve 3D images.
>
> The two main things are to use 3D samplers for copy paths sources that
> are 3D images.
>
> I've also found another bug, and refactors a bit of code at the end.
>
> I've also test this on a tonga and tests don't seem to break.
>
> Dave.
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix issue with multisample positions and interp_var_at_sample.

2017-12-21 Thread Samuel Pitoiset

I think this needs to be backported to mesa-stable.

Thanks for fixing this, it makes sense.

Reviewed-by: Samuel Pitoiset 

On 12/21/2017 05:05 AM, Dave Airlie wrote:

From: Dave Airlie 

This fixes vmfaults seen on vega with:
dEQP-VK.pipeline.multisample_interpolation.sample_interpolate_at_single_sample_.128_128_1.samples_1

These were caused by the don't allocate cmask but it was just accidental.

The actual problem was the shader was trying to get the sample positions from
a buffer, but the buffer was never getting configured to contain them, as the
previous shader never needed them.

Signed-off-by: Dave Airlie 
---
  src/amd/vulkan/radv_cmd_buffer.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index a366facd63..6a89d4e568 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -593,7 +593,8 @@ radv_update_multisample_state(struct radv_cmd_buffer 
*cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, ms->db_eqaa);
radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, 
ms->pa_sc_mode_cntl_1);
  
-	if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)

+   if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples 
&&
+   
old_pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions == 
pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
return;
  
  	radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix issue with multisample positions and interp_var_at_sample.

2017-12-21 Thread Bas Nieuwenhuizen
Nice catch!

Reviewed-by: Bas Nieuwenhuizen 

On Thu, Dec 21, 2017 at 5:05 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This fixes vmfaults seen on vega with:
> dEQP-VK.pipeline.multisample_interpolation.sample_interpolate_at_single_sample_.128_128_1.samples_1
>
> These were caused by the don't allocate cmask but it was just accidental.
>
> The actual problem was the shader was trying to get the sample positions from
> a buffer, but the buffer was never getting configured to contain them, as the
> previous shader never needed them.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index a366facd63..6a89d4e568 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -593,7 +593,8 @@ radv_update_multisample_state(struct radv_cmd_buffer 
> *cmd_buffer,
> radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, ms->db_eqaa);
> radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, 
> ms->pa_sc_mode_cntl_1);
>
> -   if (old_pipeline && num_samples == 
> old_pipeline->graphics.ms.num_samples)
> +   if (old_pipeline && num_samples == 
> old_pipeline->graphics.ms.num_samples &&
> +   
> old_pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions
>  == 
> pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
> return;
>
> radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 
> 2);
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] shader compile

2017-12-21 Thread 吴畏
Hello,every
I want to know why vertex shader and fragment shader are compiled to many 
shader binarys. When and where these bianrys are linked and uploaded?
thanks___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH] gm107/ir: use lane 0 for manual textureGrad handling

2017-12-21 Thread Karol Herbst
On Wed, Dec 20, 2017 at 3:44 PM, Ilia Mirkin  wrote:
> On Tue, Dec 19, 2017 at 11:41 PM, Ilia Mirkin  wrote:
>> This is parallel to the pre-SM50 change which does this. Adjusts the
>> shuffles / quadops to make the values correct relative to lane 0, and
>> then splat the results to all lanes for the final move into the target
>> register.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> Entirely untested beyond compilation. Should check
>>
>> bin/tex-miplevel-selection textureGrad Cube
>> bin/tex-miplevel-selection textureGrad CubeShadow
>> bin/tex-miplevel-selection textureGrad CubeArray
>> KHR-GL45.texture_cube_map_array.sampling
>>
>> to see if they start passing with this change.
>>
>>  .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 56 
>> ++
>>  1 file changed, 35 insertions(+), 21 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
>> index 6b9edd48645..a2427526a81 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
>> @@ -95,18 +95,15 @@ GM107LegalizeSSA::visit(Instruction *i)
>>  bool
>>  GM107LoweringPass::handleManualTXD(TexInstruction *i)
>>  {
>> -   static const uint8_t qOps[4][2] =
>> -   {
>> -  { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // 
>> l0
>> -  { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // 
>> l1
>> -  { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, 
>> // l2
>> -  { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, 
>> // l3
>> -   };
>> +   // See NVC0LoweringPass::handleManualTXD for rationale. This function
>> +   // implements the same logic, but using SM50-friendly primitives.
>> +   static const uint8_t qOps[2] =
>> +  { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) };
>> Value *def[4][4];
>> -   Value *crd[3];
>> +   Value *crd[3], *arr, *shadow;
>> Value *tmp;
>> Instruction *tex, *add;
>> -   Value *zero = bld.loadImm(bld.getSSA(), 0);
>> +   Value *quad = bld.mkImm(SHFL_BOUND_QUAD);
>> int l, c;
>> const int dim = i->tex.target.getDim() + i->tex.target.isCube();
>> const int array = i->tex.target.isArray();
>> @@ -115,35 +112,40 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
>>
>> for (c = 0; c < dim; ++c)
>>crd[c] = bld.getScratch();
>> +   arr = bld.getScratch();
>> +   shadow = bld.getScratch();
>> tmp = bld.getScratch();
>>
>> for (l = 0; l < 4; ++l) {
>>Value *src[3], *val;
>> -  // mov coordinates from lane l to all lanes
>> +  Value *lane = bld.mkImm(l);
>>bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
>> +  // Make sure lane 0 has the appropriate array/depth compare values
>> +  if (l != 0) {
>> + if (array)
>> +bld.mkOp3(OP_SHFL, TYPE_F32, arr, i->getSrc(0), lane, quad);
>> + if (i->tex.target.isShadow())
>> +bld.mkOp3(OP_SHFL, TYPE_F32, shadow, i->getSrc(array + dim), 
>> lane, quad);
>
> In the great argument switcheroo between each SM version, the shadow
> compare is actually after the indirect handle (which in turn is after
> array + dim). So this should become array + dim + indirect (and
> similarly below).
>
>> +  }
>> +
>> +  // mov coordinates from lane l to all lanes
>>for (c = 0; c < dim; ++c) {
>> - bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array),
>> -   bld.mkImm(l), bld.mkImm(SHFL_BOUND_QUAD));
>> - add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
>> - add->subOp = 0x00;
>> - add->lanes = 1; /* abused for .ndv */
>> + bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), lane, 
>> quad);
>>}
>>
>>// add dPdx from lane l to lanes dx
>>for (c = 0; c < dim; ++c) {
>> - bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l),
>> -   bld.mkImm(SHFL_BOUND_QUAD));
>> + bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), lane, quad);
>>   add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
>> - add->subOp = qOps[l][0];
>> + add->subOp = qOps[0];
>>   add->lanes = 1; /* abused for .ndv */
>>}
>>
>>// add dPdy from lane l to lanes dy
>>for (c = 0; c < dim; ++c) {
>> - bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l),
>> -   bld.mkImm(SHFL_BOUND_QUAD));
>> + bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), lane, quad);
>>   add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
>> - add->subOp = qOps[l][1];
>> + add->subOp = qOps[1];
>>   add->lanes = 1; /* abused for .ndv */
>>}
>>
>> @@ -164,8 +166,20 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
>>
>>// texture

Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2017-12-21 Thread Daniel Vetter
On Thu, Dec 21, 2017 at 9:06 AM, James Jones  wrote:
> However, making some assumptions, I suspect it's probably going to come down
> to yes we can fit what we need in some number of bits marginally less than
> 56 now, with the current use cases and hardware, but we're very concerned
> about extensibility given the number has only ever grown in our HW, is
> uncomfortably close to the limit if it isn't over it already, and it's been
> demonstrated it takes a monumental effort to change the mechanism if it
> isn't extensible.  While it's hard to change the mechanism one more time
> now, better to change it to something truly extensible now because it will
> be much, much harder to make such a change ~5 years from now in a world
> where it's baked in to pervasively deployed Wayland and X protocol, the EGL
> and Vulkan extensions have been defined for a few years and in use by apps
> besides Wayland, and the allocator stuff is deployed on ~5 operating systems
> that have some derivative version of DRM modifiers to support it and a bunch
> of funky embedded apps using it.  Further, we're volunteering to handle the
> bulk of the effort needed to make the change now, so I hope architectural
> correctness and maintainability can be the primary points of debate.

I think that's already happened. So no matter what we do, we're going
to live with an ecosystem that uses modifiers all over the place in 5
years. Even if it's not fully pervasive we will have to keep the
support around for 10 years (at least on the kernel side).

So the option is between reving the entire ecosystem now, or reving it
in a few years when the current scheme has run out of steam for good.
And I much prefer the 2nd option for the simple reason that by then
the magic 8ball has gained another 5 years of clarity for looking into
the future.

I think in the interim figuring out how to expose kms capabilities
better (and necessarily standardizing at least some of them which
matter at the compositor level, like size limits of framebuffers)
feels like the place to push the ecosystem forward. In some way
Miguel's proposal looks a bit backwards, since it adds the pitch
capabilities to addfb, but at addfb time you've allocated everything
already, so way too late to fix things up. With modifiers we've added
a very simple per-plane property to list which modifiers can be
combined with which pixel formats. Tiny start, but obviously very far
from all that we'll need.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2017-12-21 Thread James Jones

On 12/20/2017 01:58 PM, Daniel Stone wrote:

Hi Miguel,

On 20 December 2017 at 16:51, Miguel Angel Vico  wrote:

In the meantime, I've been working on putting together an open source
implementation of the allocator mechanisms using the Nouveau driver for
all to be able to play with.


Thanks for taking a look at this! I'm still winding out my to-do list
for the year, but hoping to get to this more seriously soon.

As a general comment, now that modifiers are a first-class concept in
many places (KMS FBs, KMS plane format advertisement, V4L2 buffers,
EGL/Vulkan image import/export, Wayland buffer import, etc), I'd like
to see them included as a first-class concept in the allocator. I
understand one of the primary reservations against using them was that
QNX didn't have such a concept, but just specifying them to be ignored
on non-Linux platforms would probably work fine.


The allocator mechanisms and format modifiers are orthogonal though. 
Either capability sets can be represented using format modifiers (the 
direction one part of this thread is suggesting, which I think is a bad 
idea), or format modifiers could easily be included as a vendor-agnostic 
capability, similar to pitch layout.  There are no "First class 
citizens" in the allocator mechanism itself.  That's the whole idea: 
Apps don't need to care about things like how the OS represents its 
surface metadata beyond some truly universal things like width and 
height (assertions).  The rest is abstracted away such that the apps are 
portable, even if the drivers/backends aren't.  Even if the solution 
within Linux is "just use format modifiers", there's still some benefit 
to making the kernel ABI use something slightly higher level that 
translates to DRM format modifiers inside the kernel, just to keep the 
apps OS-agnostic.



Another of the missing pieces before we can move this to production is
importing allocations to DRM FB objects. This is probably one of the
most sensitive parts of the project as it requires modification/addition
of kernel driver interfaces.

At XDC2017, James had several hallway conversations with several people
about this, all having different opinions. I'd like to take this
opportunity to also start a discussion about what's the best option to
create a path to get allocator allocations added as DRM FB objects.

These are the few options we've considered to start with:

   A) Have vendor-private ioctls to set properties on GEM objects that
  are inherited by the FB objects. This is how our (NVIDIA) desktop
  DRM driver currently works. This would require every vendor to add
  their own ioctl to process allocator metadata, but the metadata is
  actually a vendor-agnostic object more like DRM modifiers. We'd
  like to come up with a vendor-agnostic solutions that can be
  integrated to core DRM.


This worries me. If the data is static for the lifetime of the buffer
- describing the tiling layout, for instance - then it would form
effective ABI for all the consumers/producers using that buffer type.
If it is dynamic, you also have a world of synchronisation problems
when multiple users race each other with different uses of that buffer
(and presumably you would need to reload the metadata on every use?).
Either way, anyone using this would need to have a very well-developed
compatibility story, given that you can mix and match kernel and
userspace versions.


I think the metadata is static.  The surface meta-state is not, but that 
would be a commit time thing if anything, not a GEM or FB object thing. 
Still attaching metadata to GEM objects, which seem to be opaque blobs 
of memory in the general case, rather than attaching it to FB's mapped 
onto the GEM objects always felt architecturally wrong to me.  You can 
have multiple FBs in one GEM object, for example.  There's no reason to 
assume they would share the same format let alone tiling layout.



   B) Add a new drmModeAddFBWithMetadata() command that takes allocator
  metadata blobs for each plane of the FB. Some people in the
  community have mentioned this is their preferred design. This,
  however, means we'd have to go through the exercise of adding
  another metadata mechanism to the whole graphics stack.


Similarly, this seems to be missing either a 'mandatory' flag so
userspace can inform the kernel it must fail if it does not understand
certain capabilities, or a way for the kernel to inform userspace
which capabilities it does/doesn't understand.


I think that will fall out of the discussion over exactly what 
capability sets look like.  Regardless, yes, the kernel must fail if it 
can't support a given capability set, just as it would fail if it 
couldn't support a given DRM Format modifier.  Like the format 
modifiers, the userspace allocator driver would have queried the DRM 
kernel driver when reporting supported capability sets for a usage that 
required creating FBs, so it would always be user error to r

Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces

2017-12-21 Thread Daniel Vetter
On Thu, Dec 21, 2017 at 12:22 AM, Kristian Kristensen
 wrote:
> On Wed, Dec 20, 2017 at 12:41 PM, Miguel Angel Vico 
> wrote:
>>
>> Inline.
>>
>> On Wed, 20 Dec 2017 11:54:10 -0800
>> Kristian Høgsberg  wrote:
>>
>> > On Wed, Dec 20, 2017 at 11:51 AM, Daniel Vetter  wrote:
>> > > Since this also involves the kernel let's add dri-devel ...
>>
>> Yeah, I forgot. Thanks Daniel!
>>
>> > >
>> > > On Wed, Dec 20, 2017 at 5:51 PM, Miguel Angel Vico
>> > >  wrote:
>> > >> Hi all,
>> > >>
>> > >> As many of you already know, I've been working with James Jones on
>> > >> the
>> > >> Generic Device Allocator project lately. He started a discussion
>> > >> thread
>> > >> some weeks ago seeking feedback on the current prototype of the
>> > >> library
>> > >> and advice on how to move all this forward, from a prototype stage to
>> > >> production. For further reference, see:
>> > >>
>> > >>
>> > >> https://lists.freedesktop.org/archives/mesa-dev/2017-November/177632.html
>> > >>
>> > >> From the thread above, we came up with very interesting high level
>> > >> design ideas for one of the currently missing parts in the library:
>> > >> Usage transitions. That's something I'll personally work on during
>> > >> the
>> > >> following weeks.
>> > >>
>> > >>
>> > >> In the meantime, I've been working on putting together an open source
>> > >> implementation of the allocator mechanisms using the Nouveau driver
>> > >> for
>> > >> all to be able to play with.
>> > >>
>> > >> Below I'm seeking feedback on a bunch of changes I had to make to
>> > >> different components of the graphics stack:
>> > >>
>> > >> ** Allocator **
>> > >>
>> > >>   An allocator driver implementation on top of Nouveau. The current
>> > >>   implementation only handles pitch linear layouts, but that's enough
>> > >>   to have the kmscube port working using the allocator and Nouveau
>> > >>   drivers.
>> > >>
>> > >>   You can pull these changes from
>> > >>
>> > >>
>> > >> https://github.com/mvicomoya/allocator/tree/wip/mvicomoya/nouveau-driver
>> > >>
>> > >> ** Mesa **
>> > >>
>> > >>   James's kmscube port to use the allocator relies on the
>> > >>   EXT_external_objects extension to import allocator allocations to
>> > >>   OpenGL as a texture object. However, the Nouveau implementation of
>> > >>   these mechanisms is missing in Mesa, so I went ahead and added
>> > >> them.
>> > >>
>> > >>   You can pull these changes from
>> > >>
>> > >>
>> > >> https://github.com/mvicomoya/mesa/tree/wip/mvicomoya/EXT_external_objects-nouveau
>> > >>
>> > >>   Also, James's kmscube port uses the NVX_unix_allocator_import
>> > >>   extension to attach allocator metadata to texture objects so the
>> > >>   driver knows how to deal with the imported memory.
>> > >>
>> > >>   Note that there isn't a formal spec for this extension yet. For
>> > >> now,
>> > >>   it just serves as an experimental mechanism to import allocator
>> > >>   memory in OpenGL, and attach metadata to texture objects.
>> > >>
>> > >>   You can pull these changes (written on top of the above) from:
>> > >>
>> > >>
>> > >> https://github.com/mvicomoya/mesa/tree/wip/mvicomoya/NVX_unix_allocator_import
>> > >>
>> > >> ** kmscube **
>> > >>
>> > >>   Mostly minor fixes and improvements on top of James's port to use
>> > >> the
>> > >>   allocator. Main thing is the allocator initialization path will use
>> > >>   EGL_MESA_platform_surfaceless if EGLDevice platform isn't supported
>> > >>   by the underlying EGL implementation.
>> > >>
>> > >>   You can pull these changes from:
>> > >>
>> > >>
>> > >> https://github.com/mvicomoya/kmscube/tree/wip/mvicomoya/allocator-nouveau
>> > >>
>> > >>
>> > >> With all the above you should be able to get kmscube working using
>> > >> the
>> > >> allocator on top of the Nouveau driver.
>> > >>
>> > >>
>> > >> Another of the missing pieces before we can move this to production
>> > >> is
>> > >> importing allocations to DRM FB objects. This is probably one of the
>> > >> most sensitive parts of the project as it requires
>> > >> modification/addition
>> > >> of kernel driver interfaces.
>> > >>
>> > >> At XDC2017, James had several hallway conversations with several
>> > >> people
>> > >> about this, all having different opinions. I'd like to take this
>> > >> opportunity to also start a discussion about what's the best option
>> > >> to
>> > >> create a path to get allocator allocations added as DRM FB objects.
>> > >>
>> > >> These are the few options we've considered to start with:
>> > >>
>> > >>   A) Have vendor-private ioctls to set properties on GEM objects that
>> > >>  are inherited by the FB objects. This is how our (NVIDIA)
>> > >> desktop
>> > >>  DRM driver currently works. This would require every vendor to
>> > >> add
>> > >>  their own ioctl to process allocator metadata, but the metadata
>> > >> is
>> > >>  actually a vendor-agnostic object more like DRM modifiers. We'd
>> > >>  like to come up with a vendor-agnostic solutions that