Re: [Mesa-dev] [PATCH 01/19] radeonsi: clean up passing the is_monolithic flag for compilation

Timothy Arceri Mon, 25 Jun 2018 17:49:28 -0700

On 26/06/18 09:51, Dieter Nützel wrote:

Hello Marek,
after this series landed I get this:

Making all in targets/pipe-loader
make[4]: Verzeichnis „/opt/mesa/src/gallium/targets/pipe-loader“ wirdbetreten
   CXXLD    pipe_r600.la
../../../../src/gallium/winsys/radeon/drm/.libs/libradeonwinsys.a(radeon_drm_surface.o):In function `radeon_winsys_surface_init':/opt/mesa/src/gallium/winsys/radeon/drm/radeon_drm_surface.c:307:undefined reference to `ac_compute_cmask'
collect2: error: ld returned 1 exit status
make[4]: *** [Makefile:970: pipe_r600.la] Fehler 1

Didn't have more time for digging, yet.

r600 probably doesn't get linked to the amd common (ac) code that isnormally just shared between radv and radeonsi.


Dieter

Am 23.06.2018 00:31, schrieb Marek Olšák:

From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c      | 30 +++++++++----------
 src/gallium/drivers/radeonsi/si_shader.h      |  1 -
 .../drivers/radeonsi/si_shader_internal.h     |  3 --
 .../drivers/radeonsi/si_state_shaders.c       |  7 +++--
 4 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index e7e2a12a7b0..677853af60b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5047,22 +5047,21 @@ static void create_function(struct
si_shader_context *ctx)
         break;
     default:
         assert(0 && "unimplemented shader");
         return;
     }

     si_create_function(ctx, "main", returns, num_returns, &fninfo,
                si_get_max_workgroup_size(shader));

/* Reserve register locations for VGPR inputs the PS prolog mayneed. */

-    if (ctx->type == PIPE_SHADER_FRAGMENT &&
-        ctx->separate_prolog) {

+ if (ctx->type == PIPE_SHADER_FRAGMENT &&!ctx->shader->is_monolithic) {

         ac_llvm_add_target_dep_function_attr(ctx->main_fn,
                              "InitialPSInputAddr",
                              S_0286D0_PERSP_SAMPLE_ENA(1) |
                              S_0286D0_PERSP_CENTER_ENA(1) |
                              S_0286D0_PERSP_CENTROID_ENA(1) |
                              S_0286D0_LINEAR_SAMPLE_ENA(1) |
                              S_0286D0_LINEAR_CENTER_ENA(1) |
                              S_0286D0_LINEAR_CENTROID_ENA(1) |
                              S_0286D0_FRONT_FACE_ENA(1) |
                              S_0286D0_ANCILLARY_ENA(1) |
@@ -6049,22 +6048,21 @@ static void si_init_exec_from_input(struct
si_shader_context *ctx,
 }

 static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
                    const struct si_vs_prolog_bits *key)
 {

/* VGPR initialization fixup for Vega10 and Raven is always donein the

      * VS prolog. */
     return sel->vs_needs_prolog || key->ls_vgpr_fix;
 }

-static bool si_compile_tgsi_main(struct si_shader_context *ctx,
-                 bool is_monolithic)
+static bool si_compile_tgsi_main(struct si_shader_context *ctx)
 {
     struct si_shader *shader = ctx->shader;
     struct si_shader_selector *sel = shader->selector;
     struct lp_build_tgsi_context *bld_base = &ctx->bld_base;

     // TODO clean all this up!
     switch (ctx->type) {
     case PIPE_SHADER_VERTEX:
         ctx->load_input = declare_input_vs;
         if (shader->key.as_ls)
@@ -6135,31 +6133,31 @@ static bool si_compile_tgsi_main(struct
si_shader_context *ctx,
      * - Add a barrier before the second shader.

* - In the second shader, reset EXEC to ~0 and wrap the mainpart in

      *   an if-statement. This is required for correctness in geometry
      *   shaders, to ensure that empty GS waves do not send GS_EMIT and
      *   GS_CUT messages.
      *
      * For monolithic merged shaders, the first shader is wrapped in an
      * if-block together with its prolog in si_build_wrapper_function.
      */
     if (ctx->screen->info.chip_class >= GFX9) {
-        if (!is_monolithic &&
+        if (!shader->is_monolithic &&
             sel->info.num_instructions > 1 && /* not empty shader */
             (shader->key.as_es || shader->key.as_ls) &&
             (ctx->type == PIPE_SHADER_TESS_EVAL ||
              (ctx->type == PIPE_SHADER_VERTEX &&
               !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog)))) {
             si_init_exec_from_input(ctx,
                         ctx->param_merged_wave_info, 0);
         } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
                ctx->type == PIPE_SHADER_GEOMETRY) {
-            if (!is_monolithic)
+            if (!shader->is_monolithic)
                 ac_init_exec_full_mask(&ctx->ac);

             LLVMValueRef num_threads = si_unpack_param(ctx,
ctx->param_merged_wave_info, 8, 8);
             LLVMValueRef ena =
                 LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
                         ac_get_thread_id(&ctx->ac), num_threads, "");
             lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);

             /* The barrier must execute for all shaders in a
              * threadgroup.
@@ -6766,71 +6764,69 @@ static void si_build_wrapper_function(struct
si_shader_context *ctx,
             }
         }
     }

     LLVMBuildRetVoid(builder);
 }

 int si_compile_tgsi_shader(struct si_screen *sscreen,
                struct si_compiler *compiler,
                struct si_shader *shader,
-               bool is_monolithic,
                struct pipe_debug_callback *debug)
 {
     struct si_shader_selector *sel = shader->selector;
     struct si_shader_context ctx;
     int r = -1;

     /* Dump TGSI code before doing TGSI->LLVM conversion in case the
      * conversion fails. */
     if (si_can_dump_shader(sscreen, sel->info.processor) &&
         !(sscreen->debug_flags & DBG(NO_TGSI))) {
         if (sel->tokens)
             tgsi_dump(sel->tokens, 0);
         else
             nir_print_shader(sel->nir, stderr);
         si_dump_streamout(&sel->so);
     }

     si_init_shader_ctx(&ctx, sscreen, compiler);
     si_llvm_context_set_tgsi(&ctx, shader);
-    ctx.separate_prolog = !is_monolithic;

     memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
            sizeof(shader->info.vs_output_param_offset));

     shader->info.uses_instanceid = sel->info.uses_instanceid;

-    if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
+    if (!si_compile_tgsi_main(&ctx)) {
         si_llvm_dispose(&ctx);
         return -1;
     }

-    if (is_monolithic && ctx.type == PIPE_SHADER_VERTEX) {
+    if (shader->is_monolithic && ctx.type == PIPE_SHADER_VERTEX) {
         LLVMValueRef parts[2];
         bool need_prolog = sel->vs_needs_prolog;

         parts[1] = ctx.main_fn;

         if (need_prolog) {
             union si_shader_part_key prolog_key;
             si_get_vs_prolog_key(&sel->info,
                          shader->info.num_input_sgprs,
                          &shader->key.part.vs.prolog,
                          shader, &prolog_key);
             si_build_vs_prolog_function(&ctx, &prolog_key);
             parts[0] = ctx.main_fn;
         }

         si_build_wrapper_function(&ctx, parts + !need_prolog,
                       1 + need_prolog, need_prolog, 0);
-    } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {

+ } else if (shader->is_monolithic && ctx.type ==PIPE_SHADER_TESS_CTRL) {

         if (sscreen->info.chip_class >= GFX9) {
             struct si_shader_selector *ls = shader->key.part.tcs.ls;
             LLVMValueRef parts[4];
             bool vs_needs_prolog =
                 si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog);

             /* TCS main part */
             parts[2] = ctx.main_fn;

             /* TCS epilog */

@@ -6839,23 +6835,24 @@ int si_compile_tgsi_shader(struct si_screen*sscreen, tcs_epilog_key.tcs_epilog.states =shader->key.part.tcs.epilog;

             si_build_tcs_epilog_function(&ctx, &tcs_epilog_key);
             parts[3] = ctx.main_fn;

             /* VS as LS main part */
             struct si_shader shader_ls = {};
             shader_ls.selector = ls;
             shader_ls.key.as_ls = 1;
             shader_ls.key.mono = shader->key.mono;
             shader_ls.key.opt = shader->key.opt;
+            shader_ls.is_monolithic = true;
             si_llvm_context_set_tgsi(&ctx, &shader_ls);

-            if (!si_compile_tgsi_main(&ctx, true)) {
+            if (!si_compile_tgsi_main(&ctx)) {
                 si_llvm_dispose(&ctx);
                 return -1;
             }
             shader->info.uses_instanceid |= ls->info.uses_instanceid;
             parts[1] = ctx.main_fn;

             /* LS prolog */
             if (vs_needs_prolog) {
                 union si_shader_part_key vs_prolog_key;
                 si_get_vs_prolog_key(&ls->info,

@@ -6881,21 +6878,21 @@ int si_compile_tgsi_shader(struct si_screen*sscreen,


             parts[0] = ctx.main_fn;

             memset(&epilog_key, 0, sizeof(epilog_key));
             epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
             si_build_tcs_epilog_function(&ctx, &epilog_key);
             parts[1] = ctx.main_fn;

             si_build_wrapper_function(&ctx, parts, 2, 0, 0);
         }
-    } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {

+ } else if (shader->is_monolithic && ctx.type ==PIPE_SHADER_GEOMETRY) {

         if (ctx.screen->info.chip_class >= GFX9) {
             struct si_shader_selector *es = shader->key.part.gs.es;
             LLVMValueRef es_prolog = NULL;
             LLVMValueRef es_main = NULL;
             LLVMValueRef gs_prolog = NULL;
             LLVMValueRef gs_main = ctx.main_fn;

             /* GS prolog */
             union si_shader_part_key gs_prolog_key;
             memset(&gs_prolog_key, 0, sizeof(gs_prolog_key));

@@ -6903,23 +6900,24 @@ int si_compile_tgsi_shader(struct si_screen*sscreen,

             gs_prolog_key.gs_prolog.is_monolithic = true;
             si_build_gs_prolog_function(&ctx, &gs_prolog_key);
             gs_prolog = ctx.main_fn;

             /* ES main part */
             struct si_shader shader_es = {};
             shader_es.selector = es;
             shader_es.key.as_es = 1;
             shader_es.key.mono = shader->key.mono;
             shader_es.key.opt = shader->key.opt;
+            shader_es.is_monolithic = true;
             si_llvm_context_set_tgsi(&ctx, &shader_es);

-            if (!si_compile_tgsi_main(&ctx, true)) {
+            if (!si_compile_tgsi_main(&ctx)) {
                 si_llvm_dispose(&ctx);
                 return -1;
             }
             shader->info.uses_instanceid |= es->info.uses_instanceid;
             es_main = ctx.main_fn;

             /* ES prolog */
             if (es->vs_needs_prolog) {
                 union si_shader_part_key vs_prolog_key;
                 si_get_vs_prolog_key(&es->info,

@@ -6954,21 +6952,21 @@ int si_compile_tgsi_shader(struct si_screen*sscreen,


             parts[1] = ctx.main_fn;

             memset(&prolog_key, 0, sizeof(prolog_key));
             prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
             si_build_gs_prolog_function(&ctx, &prolog_key);
             parts[0] = ctx.main_fn;

             si_build_wrapper_function(&ctx, parts, 2, 1, 0);
         }
-    } else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {

+ } else if (shader->is_monolithic && ctx.type ==PIPE_SHADER_FRAGMENT) {

         LLVMValueRef parts[3];
         union si_shader_part_key prolog_key;
         union si_shader_part_key epilog_key;
         bool need_prolog;

         si_get_ps_prolog_key(shader, &prolog_key, false);
         need_prolog = si_need_ps_prolog(&prolog_key);

         parts[need_prolog ? 1 : 0] = ctx.main_fn;

@@ -8062,21 +8060,21 @@ int si_shader_create(struct si_screen
*sscreen, struct si_compiler *compiler,
     /* LS, ES, VS are compiled on demand if the main part hasn't been
      * compiled for that stage.
      *
      * Vertex shaders are compiled on demand when a vertex fetch
      * workaround must be applied.
      */
     if (shader->is_monolithic) {
         /* Monolithic shader (compiled as a whole, has many variants,
          * may take a long time to compile).
          */

- r = si_compile_tgsi_shader(sscreen, compiler, shader, true,debug);

+        r = si_compile_tgsi_shader(sscreen, compiler, shader, debug);
         if (r)
             return r;
     } else {
         /* The shader consists of several parts:
          *
          * - the middle part is the user shader, it has 1 variant only
          *   and it was compiled during the creation of the shader
          *   selector
          * - the prolog part is inserted at the beginning
          * - the epilog part is inserted at the end
diff --git a/src/gallium/drivers/radeonsi/si_shader.h
b/src/gallium/drivers/radeonsi/si_shader.h
index e1f6b392fbe..fd2f71bed74 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -655,21 +655,20 @@ struct si_shader_part {

 /* si_shader.c */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
                struct si_compiler *compiler,
                struct si_shader_selector *gs_selector,
                struct pipe_debug_callback *debug);
 int si_compile_tgsi_shader(struct si_screen *sscreen,
                struct si_compiler *compiler,
                struct si_shader *shader,
-               bool is_monolithic,
                struct pipe_debug_callback *debug);

int si_shader_create(struct si_screen *sscreen, struct si_compiler*compiler,

              struct si_shader *shader,
              struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name,
unsigned index);

unsigned si_shader_io_get_unique_index(unsigned semantic_name,unsigned index,

                        unsigned is_varying);
 int si_shader_binary_upload(struct si_screen *sscreen, struct
si_shader *shader);

void si_shader_dump(struct si_screen *sscreen, const struct si_shader*shader,

diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 0a347172d62..e528a56023f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -55,23 +55,20 @@ struct si_shader_context {
     struct si_screen *screen;

     unsigned type; /* PIPE_SHADER_* specifies the type of shader. */

     /* For clamping the non-constant index in resource indexing: */
     unsigned num_const_buffers;
     unsigned num_shader_buffers;
     unsigned num_images;
     unsigned num_samplers;

-    /* Whether the prolog will be compiled separately. */
-    bool separate_prolog;
-
     struct ac_shader_abi abi;

/** This function is responsible for initilizing the inputs arrayand will be

       * called once for each input declared in the TGSI shader.
       */
     void (*load_input)(struct si_shader_context *,
                unsigned input_index,
                const struct tgsi_full_declaration *decl,
                LLVMValueRef out[4]);

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f0498520ae8..ddd38dabbe6 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1573,24 +1573,24 @@ static bool si_check_missing_main_part(struct
si_screen *sscreen,
             return false;

         /* We can leave the fence as permanently signaled because the
          * main part becomes visible globally only after it has been
          * compiled. */
         util_queue_fence_init(&main_part->ready);

         main_part->selector = sel;
         main_part->key.as_es = key->as_es;
         main_part->key.as_ls = key->as_ls;
+        main_part->is_monolithic = false;

         if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
-                       main_part, false,
-                       &compiler_state->debug) != 0) {
+                       main_part, &compiler_state->debug) != 0) {
             FREE(main_part);
             return false;
         }
         *mainp = main_part;
     }
     return true;
 }

 /* Select the hw shader variant depending on the current state. */
 static int si_shader_select_with_key(struct si_screen *sscreen,
@@ -1875,39 +1875,40 @@ static void si_init_shader_selector_async(void
*job, int thread_index)
         if (!shader) {

fprintf(stderr, "radeonsi: can't allocate a main shaderpart\n");

             return;
         }

         /* We can leave the fence signaled because use of the default
          * main part is guarded by the selector's ready fence. */
         util_queue_fence_init(&shader->ready);

         shader->selector = sel;
+        shader->is_monolithic = false;
         si_parse_next_shader_property(&sel->info,
                           sel->so.num_outputs != 0,
                           &shader->key);

         if (sel->tokens || sel->nir)
             ir_binary = si_get_ir_binary(sel);

         /* Try to load the shader from the shader cache. */
         mtx_lock(&sscreen->shader_cache_mutex);

         if (ir_binary &&
             si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
             mtx_unlock(&sscreen->shader_cache_mutex);
             si_shader_dump_stats_for_shader_db(shader, debug);
         } else {
             mtx_unlock(&sscreen->shader_cache_mutex);

/* Compile the shader if it hasn't been loaded from thecache. */

-            if (si_compile_tgsi_shader(sscreen, compiler, shader, false,
+            if (si_compile_tgsi_shader(sscreen, compiler, shader,
                            debug) != 0) {
                 FREE(shader);
                 FREE(ir_binary);

fprintf(stderr, "radeonsi: can't compile a mainshader part\n");

                 return;
             }

             if (ir_binary) {
                 mtx_lock(&sscreen->shader_cache_mutex);

if (!si_shader_cache_insert_shader(sscreen,ir_binary, shader, true))

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/19] radeonsi: clean up passing the is_monolithic flag for compilation

Reply via email to