Re: [Mesa-dev] [PATCH 1/4] gallium/vl: fix compute tgsi shaders to not process undefined components

2019-07-26 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, Jul 25, 2019 at 11:30 PM Ilia Mirkin  wrote:

> This caused nouveau's function handling logic to think that the MAIN
> function was due to receive external parameters, and cascaded some
> failures after that. Instead avoid having the undefined components in
> the first place.
>
> Fixes: f6ac0b5d71 (gallium/auxiliary/vl: Add compute shader to support
> video compositor render)
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111213
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111217
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/auxiliary/vl/vl_compositor_cs.c | 102 ++--
>  1 file changed, 51 insertions(+), 51 deletions(-)
>
> diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c
> b/src/gallium/auxiliary/vl/vl_compositor_cs.c
> index 485b4174b8e..d84df7240da 100644
> --- a/src/gallium/auxiliary/vl/vl_compositor_cs.c
> +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c
> @@ -61,7 +61,7 @@ const char *compute_shader_video_buffer =
>"IMM[0] UINT32 { 8, 8, 1, 0}\n"
>"IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
>
> -  "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
> +  "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
>
>/* Drawn area check */
>"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
> @@ -70,20 +70,20 @@ const char *compute_shader_video_buffer =
>"AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
>"AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
>
> -  "UIF TEMP[1]\n"
> +  "UIF TEMP[1].\n"
>   /* Translate */
>   "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
> - "U2F TEMP[2], TEMP[2]\n"
> - "DIV TEMP[3], TEMP[2], IMM[1].\n"
> + "U2F TEMP[2].xy, TEMP[2].xyyy\n"
> + "DIV TEMP[3].xy, TEMP[2].xyyy, IMM[1].\n"
>
>   /* Scale */
> - "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
> - "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
> + "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
> + "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
>
>   /* Fetch texels */
> - "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
> - "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
> - "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
> + "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
> + "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
> + "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
>
>   "MOV TEMP[4].w, IMM[1].\n"
>
> @@ -93,12 +93,12 @@ const char *compute_shader_video_buffer =
>   "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
>
>   "MOV TEMP[5].w, TEMP[4].\n"
> - "SLE TEMP[6].w, TEMP[5], CONST[3].\n"
> - "SGT TEMP[5].w, TEMP[5], CONST[3].\n"
> + "SLE TEMP[6].w, TEMP[5]., CONST[3].\n"
> + "SGT TEMP[5].w, TEMP[5]., CONST[3].\n"
>
> - "MAX TEMP[7].w, TEMP[5], TEMP[6]\n"
> + "MAX TEMP[7].w, TEMP[5]., TEMP[6].\n"
>
> - "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
> + "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
>"ENDIF\n"
>
>"END\n";
> @@ -124,7 +124,7 @@ const char *compute_shader_weave =
>"IMM[2] UINT32 { 1, 2, 4, 0}\n"
>"IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
>
> -  "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
> +  "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
>
>/* Drawn area check */
>"USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
> @@ -133,22 +133,22 @@ const char *compute_shader_weave =
>"AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
>"AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
>
> -  "UIF TEMP[1]\n"
> - "MOV TEMP[2], TEMP[0]\n"
> +  "UIF TEMP[1].\n"
> + "MOV TEMP[2].xy, TEMP[0].xyyy\n"
>   /* Translate */
> - "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
> + "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
>
>   /* Top Y */
> - "U2F TEMP[2], TEMP[2]\n"
> + "U2F TEMP[2].xy, TEMP[2].xyyy\n"
>   "DIV TEMP[2].y, TEMP[2]., IMM[1].\n"
>   /* Down Y */
> - "MOV TEMP[12], TEMP[2]\n"
> + "MOV TEMP[12].xy, TEMP[2].xyyy\n"
>
>   /* Top UV */
> - "MOV TEMP[3], TEMP[2]\n"
> + "MOV TEMP[3].xy, TEMP[2].xyyy\n"
>   "DIV TEMP[3].xy, TEMP[3], IMM[1].\n"
>   /* Down UV */
> - "MOV TEMP[13], TEMP[3]\n"
> + "MOV TEMP[13].xy, TEMP[3].xyyy\n"
>
>   /* Texture offset */
>   "ADD TEMP[2].x, TEMP[2]., IMM[3].\n"
> @@ -162,10 +162,10 @@ const char *compute_shader_weave =
>   "ADD TEMP[13].y, TEMP[13]., IMM[3].\n"
>
>   /* Scale */
> - "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
> - "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
> - "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
> - "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
> + "DIV 

[Mesa-dev] [PATCH 1/4] gallium/vl: fix compute tgsi shaders to not process undefined components

2019-07-25 Thread Ilia Mirkin
This caused nouveau's function handling logic to think that the MAIN
function was due to receive external parameters, and cascaded some
failures after that. Instead avoid having the undefined components in
the first place.

Fixes: f6ac0b5d71 (gallium/auxiliary/vl: Add compute shader to support video 
compositor render)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111213
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111217
Signed-off-by: Ilia Mirkin 
---
 src/gallium/auxiliary/vl/vl_compositor_cs.c | 102 ++--
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c 
b/src/gallium/auxiliary/vl/vl_compositor_cs.c
index 485b4174b8e..d84df7240da 100644
--- a/src/gallium/auxiliary/vl/vl_compositor_cs.c
+++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c
@@ -61,7 +61,7 @@ const char *compute_shader_video_buffer =
   "IMM[0] UINT32 { 8, 8, 1, 0}\n"
   "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
 
-  "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+  "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
 
   /* Drawn area check */
   "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
@@ -70,20 +70,20 @@ const char *compute_shader_video_buffer =
   "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
   "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
 
-  "UIF TEMP[1]\n"
+  "UIF TEMP[1].\n"
  /* Translate */
  "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
- "U2F TEMP[2], TEMP[2]\n"
- "DIV TEMP[3], TEMP[2], IMM[1].\n"
+ "U2F TEMP[2].xy, TEMP[2].xyyy\n"
+ "DIV TEMP[3].xy, TEMP[2].xyyy, IMM[1].\n"
 
  /* Scale */
- "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
- "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
+ "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
+ "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
 
  /* Fetch texels */
- "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
- "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
- "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
+ "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
+ "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
+ "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
 
  "MOV TEMP[4].w, IMM[1].\n"
 
@@ -93,12 +93,12 @@ const char *compute_shader_video_buffer =
  "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
 
  "MOV TEMP[5].w, TEMP[4].\n"
- "SLE TEMP[6].w, TEMP[5], CONST[3].\n"
- "SGT TEMP[5].w, TEMP[5], CONST[3].\n"
+ "SLE TEMP[6].w, TEMP[5]., CONST[3].\n"
+ "SGT TEMP[5].w, TEMP[5]., CONST[3].\n"
 
- "MAX TEMP[7].w, TEMP[5], TEMP[6]\n"
+ "MAX TEMP[7].w, TEMP[5]., TEMP[6].\n"
 
- "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
+ "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
   "ENDIF\n"
 
   "END\n";
@@ -124,7 +124,7 @@ const char *compute_shader_weave =
   "IMM[2] UINT32 { 1, 2, 4, 0}\n"
   "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
 
-  "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+  "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
 
   /* Drawn area check */
   "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
@@ -133,22 +133,22 @@ const char *compute_shader_weave =
   "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
   "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
 
-  "UIF TEMP[1]\n"
- "MOV TEMP[2], TEMP[0]\n"
+  "UIF TEMP[1].\n"
+ "MOV TEMP[2].xy, TEMP[0].xyyy\n"
  /* Translate */
- "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
+ "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
 
  /* Top Y */
- "U2F TEMP[2], TEMP[2]\n"
+ "U2F TEMP[2].xy, TEMP[2].xyyy\n"
  "DIV TEMP[2].y, TEMP[2]., IMM[1].\n"
  /* Down Y */
- "MOV TEMP[12], TEMP[2]\n"
+ "MOV TEMP[12].xy, TEMP[2].xyyy\n"
 
  /* Top UV */
- "MOV TEMP[3], TEMP[2]\n"
+ "MOV TEMP[3].xy, TEMP[2].xyyy\n"
  "DIV TEMP[3].xy, TEMP[3], IMM[1].\n"
  /* Down UV */
- "MOV TEMP[13], TEMP[3]\n"
+ "MOV TEMP[13].xy, TEMP[3].xyyy\n"
 
  /* Texture offset */
  "ADD TEMP[2].x, TEMP[2]., IMM[3].\n"
@@ -162,10 +162,10 @@ const char *compute_shader_weave =
  "ADD TEMP[13].y, TEMP[13]., IMM[3].\n"
 
  /* Scale */
- "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
- "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
- "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
- "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
+ "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
+ "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"
+ "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"
+ "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"
 
  /* Weave offset */
  "ADD TEMP[2].y, TEMP[2].,