Re: [Mesa-dev] [PATCH 14/25] radeonsi: add TCS epilog

2016-02-16 Thread Marek Olšák
On Tue, Feb 16, 2016 at 5:14 PM, Nicolai Hähnle  wrote:
> On 15.02.2016 18:59, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_pipe.c   |   1 +
>>   src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
>>   src/gallium/drivers/radeonsi/si_shader.c | 163
>> ---
>>   src/gallium/drivers/radeonsi/si_shader.h |   3 +
>>   4 files changed, 155 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>> b/src/gallium/drivers/radeonsi/si_pipe.c
>> index 2b5ce3a..645d418 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -540,6 +540,7 @@ static void si_destroy_screen(struct pipe_screen*
>> pscreen)
>> struct si_shader_part *parts[] = {
>> sscreen->vs_prologs,
>> sscreen->vs_epilogs,
>> +   sscreen->tcs_epilogs,
>> };
>> unsigned i;
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> b/src/gallium/drivers/radeonsi/si_pipe.h
>> index 8d98779..d9175b9 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -91,6 +91,7 @@ struct si_screen {
>> pipe_mutex  shader_parts_mutex;
>> struct si_shader_part   *vs_prologs;
>> struct si_shader_part   *vs_epilogs;
>> +   struct si_shader_part   *tcs_epilogs;
>>   };
>>
>>   struct si_blend_color {
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index 0085c43..bc6f8cd 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -109,9 +109,11 @@ struct si_shader_context
>> LLVMTypeRef i1;
>> LLVMTypeRef i8;
>> LLVMTypeRef i32;
>> +   LLVMTypeRef i64;
>> LLVMTypeRef i128;
>> LLVMTypeRef f32;
>> LLVMTypeRef v16i8;
>> +   LLVMTypeRef v2i32;
>> LLVMTypeRef v4i32;
>> LLVMTypeRef v4f32;
>> LLVMTypeRef v8i32;
>> @@ -2078,14 +2080,51 @@ static void si_write_tess_factors(struct
>> lp_build_tgsi_context *bld_base,
>>   static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context
>> *bld_base)
>>   {
>> struct si_shader_context *ctx = si_shader_context(bld_base);
>> -   LLVMValueRef invocation_id;
>> +   LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
>>
>> +   rel_patch_id = get_rel_patch_id(ctx);
>> invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
>> +   tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
>>
>> -   si_write_tess_factors(bld_base,
>> - get_rel_patch_id(ctx),
>> - invocation_id,
>> - get_tcs_out_current_patch_data_offset(ctx));
>> +   if (!ctx->is_monolithic) {
>> +   /* Return epilog parameters from this function. */
>> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>> +   LLVMValueRef ret = ctx->return_value;
>> +   LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
>> +   unsigned vgpr;
>> +
>> +   /* RW_BUFFERS pointer */
>> +   rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
>> + SI_PARAM_RW_BUFFERS);
>> +   rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers,
>> ctx->i64, "");
>> +   rw_buffers = LLVMBuildBitCast(builder, rw_buffers,
>> ctx->v2i32, "");
>> +   rw0 = LLVMBuildExtractElement(builder, rw_buffers,
>> + bld_base->uint_bld.zero,
>> "");
>> +   rw1 = LLVMBuildExtractElement(builder, rw_buffers,
>> + bld_base->uint_bld.one, "");
>> +   ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
>> +   ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
>
>
> Ugh, that's a bit ugly even if it ends up being a no-op in the final binary.
> Doesn't LLVM at least support vector return values or maybe even i64?

Yes, it's ugly.

LLVM only supports i32 and f32 return values.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/25] radeonsi: add TCS epilog

2016-02-16 Thread Nicolai Hähnle

On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c   |   1 +
  src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
  src/gallium/drivers/radeonsi/si_shader.c | 163 ---
  src/gallium/drivers/radeonsi/si_shader.h |   3 +
  4 files changed, 155 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 2b5ce3a..645d418 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -540,6 +540,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
struct si_shader_part *parts[] = {
sscreen->vs_prologs,
sscreen->vs_epilogs,
+   sscreen->tcs_epilogs,
};
unsigned i;

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 8d98779..d9175b9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -91,6 +91,7 @@ struct si_screen {
pipe_mutex  shader_parts_mutex;
struct si_shader_part   *vs_prologs;
struct si_shader_part   *vs_epilogs;
+   struct si_shader_part   *tcs_epilogs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0085c43..bc6f8cd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -109,9 +109,11 @@ struct si_shader_context
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
+   LLVMTypeRef i64;
LLVMTypeRef i128;
LLVMTypeRef f32;
LLVMTypeRef v16i8;
+   LLVMTypeRef v2i32;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
@@ -2078,14 +2080,51 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
  static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
  {
struct si_shader_context *ctx = si_shader_context(bld_base);
-   LLVMValueRef invocation_id;
+   LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;

+   rel_patch_id = get_rel_patch_id(ctx);
invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+   tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);

-   si_write_tess_factors(bld_base,
- get_rel_patch_id(ctx),
- invocation_id,
- get_tcs_out_current_patch_data_offset(ctx));
+   if (!ctx->is_monolithic) {
+   /* Return epilog parameters from this function. */
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef ret = ctx->return_value;
+   LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
+   unsigned vgpr;
+
+   /* RW_BUFFERS pointer */
+   rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+   rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, 
"");
+   rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, 
"");
+   rw0 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.zero, "");
+   rw1 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.one, "");
+   ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
+   ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");


Ugh, that's a bit ugly even if it ends up being a no-op in the final 
binary. Doesn't LLVM at least support vector return values or maybe even 
i64?


Nicolai


+   /* Tess factor buffer soffset is after user SGPRs. */
+   tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+   ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
+  SI_TCS_NUM_USER_SGPR, "");
+
+   /* VGPRs */
+   rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
+   invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, 
invocation_id);
+   tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, 
tf_lds_offset);
+
+   vgpr = SI_TCS_NUM_USER_SGPR + 1;
+   ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, 
"");
+   ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, 
"");
+   ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
+   ctx->return_value = ret;
+   return;
+   }
+
+   si_write_tess_factors(bld_base, rel_patch_id, invocation_id, 

[Mesa-dev] [PATCH 14/25] radeonsi: add TCS epilog

2016-02-15 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c   |   1 +
 src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
 src/gallium/drivers/radeonsi/si_shader.c | 163 ---
 src/gallium/drivers/radeonsi/si_shader.h |   3 +
 4 files changed, 155 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 2b5ce3a..645d418 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -540,6 +540,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
struct si_shader_part *parts[] = {
sscreen->vs_prologs,
sscreen->vs_epilogs,
+   sscreen->tcs_epilogs,
};
unsigned i;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 8d98779..d9175b9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -91,6 +91,7 @@ struct si_screen {
pipe_mutex  shader_parts_mutex;
struct si_shader_part   *vs_prologs;
struct si_shader_part   *vs_epilogs;
+   struct si_shader_part   *tcs_epilogs;
 };
 
 struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0085c43..bc6f8cd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -109,9 +109,11 @@ struct si_shader_context
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
+   LLVMTypeRef i64;
LLVMTypeRef i128;
LLVMTypeRef f32;
LLVMTypeRef v16i8;
+   LLVMTypeRef v2i32;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
@@ -2078,14 +2080,51 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
 static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
-   LLVMValueRef invocation_id;
+   LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
+   rel_patch_id = get_rel_patch_id(ctx);
invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+   tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
 
-   si_write_tess_factors(bld_base,
- get_rel_patch_id(ctx),
- invocation_id,
- get_tcs_out_current_patch_data_offset(ctx));
+   if (!ctx->is_monolithic) {
+   /* Return epilog parameters from this function. */
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef ret = ctx->return_value;
+   LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
+   unsigned vgpr;
+
+   /* RW_BUFFERS pointer */
+   rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+   rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, 
"");
+   rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, 
"");
+   rw0 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.zero, "");
+   rw1 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.one, "");
+   ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
+   ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
+
+   /* Tess factor buffer soffset is after user SGPRs. */
+   tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+   ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
+  SI_TCS_NUM_USER_SGPR, "");
+
+   /* VGPRs */
+   rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
+   invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, 
invocation_id);
+   tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, 
tf_lds_offset);
+
+   vgpr = SI_TCS_NUM_USER_SGPR + 1;
+   ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, 
"");
+   ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, 
"");
+   ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
+   ctx->return_value = ret;
+   return;
+   }
+
+   si_write_tess_factors(bld_base, rel_patch_id, invocation_id, 
tf_lds_offset);
 }
 
 static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -3679,12 +3718,11 @@ static void create_function(struct si_shader_context 
*ctx)
struct