Re: [Mesa-dev] [PATCH 11/25] radeonsi: first bits for non-monolithic shaders

2016-02-16 Thread Nicolai Hähnle

On 16.02.2016 11:39, Marek Olšák wrote:

On Tue, Feb 16, 2016 at 5:01 PM, Nicolai Hähnle  wrote:

On 15.02.2016 18:59, Marek Olšák wrote:


From: Marek Olšák 

---
   src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
   src/gallium/drivers/radeonsi/si_pipe.h   |  3 ++
   src/gallium/drivers/radeonsi/si_shader.c | 53

   src/gallium/drivers/radeonsi/si_shader.h |  2 +-
   4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
b/src/gallium/drivers/radeonsi/si_pipe.c
index fa60732..448fe88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -600,6 +600,7 @@ struct pipe_screen *radeonsi_screen_create(struct
radeon_winsys *ws)

 sscreen->b.has_cp_dma = true;
 sscreen->b.has_streamout = true;
+   sscreen->use_monolithic_shaders = true;

 if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
 sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS |
DBG_PS | DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6..2a2455c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -84,6 +84,9 @@ struct si_compute;
   struct si_screen {
 struct r600_common_screen   b;
 unsignedgs_table_depth;
+
+   /* Whether shaders are monolithic (1-part) or separate (3-part).
*/
+   booluse_monolithic_shaders;
   };

   struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index b058019..b74ed1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context

 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader.
*/
 bool is_gs_copy_shader;
+
+   /* Whether to generate the optimized shader variant compiled as a
whole
+* (without a prolog and epilog)
+*/
+   bool is_monolithic;
+
 int param_streamout_config;
 int param_streamout_write_index;
 int param_streamout_offset[4];
@@ -3657,8 +3663,10 @@ static void create_function(struct
si_shader_context *ctx)
 struct lp_build_tgsi_context *bld_base =
>radeon_bld.soa.bld_base;
 struct gallivm_state *gallivm = bld_base->base.gallivm;
 struct si_shader *shader = ctx->shader;
-   LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
+   LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32,
v3i32;
+   LLVMTypeRef returns[16+32*4];



This is a bit of a magic number, I guess something like max parameters plus
attributes. Can you replace it by the appropriate defines?


There is not a single definition that would express this clearly.

The prolog has to return up to 16 input SGPRs and 4-20 input VGPRs.
Additionally, the prolog returns other data in VGPRs. That's up to
4+16 VGPRs (16 vertex load addresses) for the VS and 20+8 VGPRs (2
vec4 colors) for the PS. The PS epilog returns one SGPR (but in s10 or
so, so we need to allocate 11) and 9*4 VGPRs at most. This all can
change in the future, who knows.

16+32*4 is much more than we'll ever need, but it shouldn't overflow
at least. Assertions also check if we don't overflow.


Hmm, I see. I guess I can live with it, as well as with the casts in 
patch 14.


Nicolai


Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/25] radeonsi: first bits for non-monolithic shaders

2016-02-16 Thread Marek Olšák
On Tue, Feb 16, 2016 at 5:01 PM, Nicolai Hähnle  wrote:
> On 15.02.2016 18:59, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
>>   src/gallium/drivers/radeonsi/si_pipe.h   |  3 ++
>>   src/gallium/drivers/radeonsi/si_shader.c | 53
>> 
>>   src/gallium/drivers/radeonsi/si_shader.h |  2 +-
>>   4 files changed, 45 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
>> b/src/gallium/drivers/radeonsi/si_pipe.c
>> index fa60732..448fe88 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>> @@ -600,6 +600,7 @@ struct pipe_screen *radeonsi_screen_create(struct
>> radeon_winsys *ws)
>>
>> sscreen->b.has_cp_dma = true;
>> sscreen->b.has_streamout = true;
>> +   sscreen->use_monolithic_shaders = true;
>>
>> if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
>> sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS |
>> DBG_PS | DBG_CS;
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> b/src/gallium/drivers/radeonsi/si_pipe.h
>> index b5790d6..2a2455c 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -84,6 +84,9 @@ struct si_compute;
>>   struct si_screen {
>> struct r600_common_screen   b;
>> unsignedgs_table_depth;
>> +
>> +   /* Whether shaders are monolithic (1-part) or separate (3-part).
>> */
>> +   booluse_monolithic_shaders;
>>   };
>>
>>   struct si_blend_color {
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index b058019..b74ed1e 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -70,6 +70,12 @@ struct si_shader_context
>>
>> unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader.
>> */
>> bool is_gs_copy_shader;
>> +
>> +   /* Whether to generate the optimized shader variant compiled as a
>> whole
>> +* (without a prolog and epilog)
>> +*/
>> +   bool is_monolithic;
>> +
>> int param_streamout_config;
>> int param_streamout_write_index;
>> int param_streamout_offset[4];
>> @@ -3657,8 +3663,10 @@ static void create_function(struct
>> si_shader_context *ctx)
>> struct lp_build_tgsi_context *bld_base =
>> >radeon_bld.soa.bld_base;
>> struct gallivm_state *gallivm = bld_base->base.gallivm;
>> struct si_shader *shader = ctx->shader;
>> -   LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
>> +   LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32,
>> v3i32;
>> +   LLVMTypeRef returns[16+32*4];
>
>
> This is a bit of a magic number, I guess something like max parameters plus
> attributes. Can you replace it by the appropriate defines?

There is not a single definition that would express this clearly.

The prolog has to return up to 16 input SGPRs and 4-20 input VGPRs.
Additionally, the prolog returns other data in VGPRs. That's up to
4+16 VGPRs (16 vertex load addresses) for the VS and 20+8 VGPRs (2
vec4 colors) for the PS. The PS epilog returns one SGPR (but in s10 or
so, so we need to allocate 11) and 9*4 VGPRs at most. This all can
change in the future, who knows.

16+32*4 is much more than we'll ever need, but it shouldn't overflow
at least. Assertions also check if we don't overflow.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/25] radeonsi: first bits for non-monolithic shaders

2016-02-16 Thread Nicolai Hähnle

On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
  src/gallium/drivers/radeonsi/si_pipe.h   |  3 ++
  src/gallium/drivers/radeonsi/si_shader.c | 53 
  src/gallium/drivers/radeonsi/si_shader.h |  2 +-
  4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index fa60732..448fe88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -600,6 +600,7 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)

sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+   sscreen->use_monolithic_shaders = true;

if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | 
DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6..2a2455c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -84,6 +84,9 @@ struct si_compute;
  struct si_screen {
struct r600_common_screen   b;
unsignedgs_table_depth;
+
+   /* Whether shaders are monolithic (1-part) or separate (3-part). */
+   booluse_monolithic_shaders;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b058019..b74ed1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context

unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
bool is_gs_copy_shader;
+
+   /* Whether to generate the optimized shader variant compiled as a whole
+* (without a prolog and epilog)
+*/
+   bool is_monolithic;
+
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -3657,8 +3663,10 @@ static void create_function(struct si_shader_context 
*ctx)
struct lp_build_tgsi_context *bld_base = >radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = ctx->shader;
-   LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
+   LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32, v3i32;
+   LLVMTypeRef returns[16+32*4];


This is a bit of a magic number, I guess something like max parameters 
plus attributes. Can you replace it by the appropriate defines?


Apart from this, patches 10-11 are

Reviewed-by: Nicolai Hähnle 


unsigned i, last_array_pointer, last_sgpr, num_params;
+   unsigned num_returns = 0;

v2i32 = LLVMVectorType(ctx->i32, 2);
v3i32 = LLVMVectorType(ctx->i32, 3);
@@ -3785,7 +3793,7 @@ static void create_function(struct si_shader_context *ctx)

assert(num_params <= Elements(params));

-   si_create_function(ctx, NULL, 0, params,
+   si_create_function(ctx, returns, num_returns, params,
   num_params, last_array_pointer, last_sgpr);

shader->num_input_sgprs = 0;
@@ -4492,9 +4500,11 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
  }

-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
-struct si_shader *shader,
-struct pipe_debug_callback *debug)
+static int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
  {
struct si_shader_selector *sel = shader->selector;
struct tgsi_token *tokens = sel->tokens;
@@ -4524,6 +4534,7 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,

si_init_shader_ctx(, sscreen, shader, tm,
   poly_stipple ? _shader_info : >info);
+   ctx.is_monolithic = is_monolithic;

shader->uses_instanceid = sel->info.uses_instanceid;

@@ -4604,14 +4615,6 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}

-   si_shader_dump(sscreen, shader, debug, ctx.type);
-
-   r = si_shader_binary_upload(sscreen, shader);
-   if (r) {
-   fprintf(stderr, "LLVM failed to upload shader\n");
-   goto out;
-   }
-
radeon_llvm_dispose(_bld);

/* Calculate the number of fragment input VGPRs. */
@@ -4675,6 +4678,30 @@ out:
return r;
  }

+int 

[Mesa-dev] [PATCH 11/25] radeonsi: first bits for non-monolithic shaders

2016-02-15 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h   |  3 ++
 src/gallium/drivers/radeonsi/si_shader.c | 53 
 src/gallium/drivers/radeonsi/si_shader.h |  2 +-
 4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index fa60732..448fe88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -600,6 +600,7 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
 
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+   sscreen->use_monolithic_shaders = true;
 
if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | 
DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6..2a2455c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -84,6 +84,9 @@ struct si_compute;
 struct si_screen {
struct r600_common_screen   b;
unsignedgs_table_depth;
+
+   /* Whether shaders are monolithic (1-part) or separate (3-part). */
+   booluse_monolithic_shaders;
 };
 
 struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b058019..b74ed1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context
 
unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
bool is_gs_copy_shader;
+
+   /* Whether to generate the optimized shader variant compiled as a whole
+* (without a prolog and epilog)
+*/
+   bool is_monolithic;
+
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -3657,8 +3663,10 @@ static void create_function(struct si_shader_context 
*ctx)
struct lp_build_tgsi_context *bld_base = >radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = ctx->shader;
-   LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
+   LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32, v3i32;
+   LLVMTypeRef returns[16+32*4];
unsigned i, last_array_pointer, last_sgpr, num_params;
+   unsigned num_returns = 0;
 
v2i32 = LLVMVectorType(ctx->i32, 2);
v3i32 = LLVMVectorType(ctx->i32, 3);
@@ -3785,7 +3793,7 @@ static void create_function(struct si_shader_context *ctx)
 
assert(num_params <= Elements(params));
 
-   si_create_function(ctx, NULL, 0, params,
+   si_create_function(ctx, returns, num_returns, params,
   num_params, last_array_pointer, last_sgpr);
 
shader->num_input_sgprs = 0;
@@ -4492,9 +4500,11 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
 }
 
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
-struct si_shader *shader,
-struct pipe_debug_callback *debug)
+static int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
 {
struct si_shader_selector *sel = shader->selector;
struct tgsi_token *tokens = sel->tokens;
@@ -4524,6 +4534,7 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
 
si_init_shader_ctx(, sscreen, shader, tm,
   poly_stipple ? _shader_info : >info);
+   ctx.is_monolithic = is_monolithic;
 
shader->uses_instanceid = sel->info.uses_instanceid;
 
@@ -4604,14 +4615,6 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}
 
-   si_shader_dump(sscreen, shader, debug, ctx.type);
-
-   r = si_shader_binary_upload(sscreen, shader);
-   if (r) {
-   fprintf(stderr, "LLVM failed to upload shader\n");
-   goto out;
-   }
-
radeon_llvm_dispose(_bld);
 
/* Calculate the number of fragment input VGPRs. */
@@ -4675,6 +4678,30 @@ out:
return r;
 }
 
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+struct si_shader *shader,
+struct pipe_debug_callback *debug)
+{
+   int r;
+
+   /* Compile TGSI. */
+   r = si_compile_tgsi_shader(sscreen, tm, shader,
+