Patches 1, 3 & 4 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 18.07.2016 14:14, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

This effectively removes s_waitcnt instructions after FP16 exports.

Before:

     v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
     v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
     exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 00000100
     s_waitcnt expcnt(0)                  ; BF8C0F0F
     v_cvt_pkrtz_f16_f32_e32 v0, v4, v5   ; 5E000B04
     v_cvt_pkrtz_f16_f32_e32 v1, v6, v7   ; 5E020F06
     exp 15, 1, 1, 0, 0, v0, v1, v0, v0   ; F800041F 00000100
     s_waitcnt expcnt(0)                  ; BF8C0F0F
     v_cvt_pkrtz_f16_f32_e32 v0, v8, v9   ; 5E001308
     v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A
     exp 15, 2, 1, 0, 0, v0, v1, v0, v0   ; F800042F 00000100
     s_waitcnt expcnt(0)                  ; BF8C0F0F
     v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C
     v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E
     exp 15, 3, 1, 1, 1, v0, v1, v0, v0   ; F8001C3F 00000100
     s_endpgm                             ; BF810000

After:

     v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
     v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
     v_cvt_pkrtz_f16_f32_e32 v2, v4, v5   ; 5E040B04
     v_cvt_pkrtz_f16_f32_e32 v3, v6, v7   ; 5E060F06
     exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 00000100
     v_cvt_pkrtz_f16_f32_e32 v4, v8, v9   ; 5E081308
     v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A
     exp 15, 1, 1, 0, 0, v2, v3, v0, v0   ; F800041F 00000302
     v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C
     v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E
     exp 15, 2, 1, 0, 0, v4, v5, v0, v0   ; F800042F 00000504
     exp 15, 3, 1, 1, 1, v6, v7, v0, v0   ; F8001C3F 00000706
     s_endpgm                             ; BF810000
---
  src/gallium/drivers/radeonsi/si_shader.c | 44 ++++++++++++++++++++++----------
  1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 41bcbd4..adf706c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
        FREE(outputs);
  }

+struct si_ps_exports {
+       unsigned num;
+       LLVMValueRef args[10][9];
+};
+
  static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
-                          LLVMValueRef depth, LLVMValueRef stencil,
-                          LLVMValueRef samplemask)
+                           LLVMValueRef depth, LLVMValueRef stencil,
+                           LLVMValueRef samplemask, struct si_ps_exports *exp)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *base = &bld_base->base;
@@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct 
lp_build_tgsi_context *bld_base,
        /* Specify which components to enable */
        args[0] = lp_build_const_int32(base->gallivm, mask);

-       lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-                          ctx->voidt, args, 9, 0);
+       memcpy(exp->args[exp->num++], args, sizeof(args));
  }

  static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
                                LLVMValueRef *color, unsigned index,
                                unsigned samplemask_param,
-                               bool is_last)
+                               bool is_last, struct si_ps_exports *exp)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *base = &bld_base->base;
@@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
                        } else if (args[c][0] == bld_base->uint_bld.zero)
                                continue; /* unnecessary NULL export */

-                       lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
-                                          ctx->voidt, args[c], 9, 0);
+                       memcpy(exp->args[exp->num++], args[c], sizeof(args[c]));
                }
        } else {
                LLVMValueRef args[9];
@@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
                } else if (args[0] == bld_base->uint_bld.zero)
                        return; /* unnecessary NULL export */

-               lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-                                  ctx->voidt, args, 9, 0);
+               memcpy(exp->args[exp->num++], args, sizeof(args));
        }
  }

+static void si_emit_ps_exports(struct si_shader_context *ctx,
+                              struct si_ps_exports *exp)
+{
+       for (unsigned i = 0; i < exp->num; i++)
+               lp_build_intrinsic(ctx->radeon_bld.gallivm.builder,
+                                  "llvm.SI.export", ctx->voidt,
+                                  exp->args[i], 9, 0);
+}
+
  static void si_export_null(struct lp_build_tgsi_context *bld_base)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -3069,6 +3080,7 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context *bld_base)
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
        int last_color_export = -1;
        int i;
+       struct si_ps_exports exp = {};

        /* Determine the last export. If MRTZ is present, it's always last.
         * Otherwise, find the last color export.
@@ -3135,7 +3147,7 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context *bld_base)

                        si_export_mrt_color(bld_base, color, semantic_index,
                                            SI_PARAM_SAMPLE_COVERAGE,
-                                           last_color_export == i);
+                                           last_color_export == i, &exp);
                        break;
                default:
                        fprintf(stderr,
@@ -3145,7 +3157,9 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context *bld_base)
        }

        if (depth || stencil || samplemask)
-               si_export_mrt_z(bld_base, depth, stencil, samplemask);
+               si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
+
+       si_emit_ps_exports(ctx, &exp);
  }

  /**
@@ -7495,6 +7509,7 @@ static bool si_compile_ps_epilog(struct si_screen 
*sscreen,
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
        int last_sgpr, num_params, i;
        bool status = true;
+       struct si_ps_exports exp = {};

        si_init_shader_ctx(&ctx, sscreen, &shader, tm);
        ctx.type = PIPE_SHADER_FRAGMENT;
@@ -7564,7 +7579,7 @@ static bool si_compile_ps_epilog(struct si_screen 
*sscreen,

                si_export_mrt_color(bld_base, color, mrt,
                                    num_params - 1,
-                                   mrt == last_color_export);
+                                   mrt == last_color_export, &exp);
        }

        /* Process depth, stencil, samplemask. */
@@ -7576,10 +7591,13 @@ static bool si_compile_ps_epilog(struct si_screen 
*sscreen,
                samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);

        if (depth || stencil || samplemask)
-               si_export_mrt_z(bld_base, depth, stencil, samplemask);
+               si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
        else if (last_color_export == -1)
                si_export_null(bld_base);

+       if (exp.num)
+               si_emit_ps_exports(&ctx, &exp);
+
        /* Compile. */
        LLVMBuildRetVoid(gallivm->builder);
        radeon_llvm_finalize_module(&ctx.radeon_bld);

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to