On Thu, Aug 08, 2013 at 02:20:54AM +0200, Marek Olšák wrote:
> ---
>  src/gallium/drivers/radeonsi/radeonsi_shader.c |  7 ++--
>  src/gallium/drivers/radeonsi/radeonsi_shader.h | 58 
> ++++++++++++++------------
>  src/gallium/drivers/radeonsi/si_state_draw.c   |  1 +
>  3 files changed, 36 insertions(+), 30 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
> b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> index 18dde61..2806045 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> @@ -1209,7 +1209,7 @@ static void create_function(struct si_shader_context 
> *si_shader_ctx)
>  {
>       struct lp_build_tgsi_context *bld_base = 
> &si_shader_ctx->radeon_bld.soa.bld_base;
>       struct gallivm_state *gallivm = bld_base->base.gallivm;
> -     LLVMTypeRef params[20], f32, i8, i32, v2i32, v3i32;
> +     LLVMTypeRef params[SI_MAX_PARAMS], f32, i8, i32, v2i32, v3i32;
>       unsigned i;
>  
>       i8 = LLVMInt8TypeInContext(gallivm->context);
> @@ -1221,6 +1221,7 @@ static void create_function(struct si_shader_context 
> *si_shader_ctx)
>       params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), 
> CONST_ADDR_SPACE);
>       params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
>       params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), 
> CONST_ADDR_SPACE);
> +     params[SI_PARAM_FMASK_RESOURCE] = params[SI_PARAM_RESOURCE];
>  
>       if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
>               params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_SAMPLER];
> @@ -1229,7 +1230,7 @@ static void create_function(struct si_shader_context 
> *si_shader_ctx)
>               params[SI_PARAM_DUMMY_0] = i32;
>               params[SI_PARAM_DUMMY_1] = i32;
>               params[SI_PARAM_INSTANCE_ID] = i32;
> -             radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 9);
> +             radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 10);
>  
>       } else {
>               params[SI_PARAM_PRIM_MASK] = i32;
> @@ -1249,7 +1250,7 @@ static void create_function(struct si_shader_context 
> *si_shader_ctx)
>               params[SI_PARAM_ANCILLARY] = f32;
>               params[SI_PARAM_SAMPLE_COVERAGE] = f32;
>               params[SI_PARAM_POS_FIXED_PT] = f32;
> -             radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 20);
> +             radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 21);
>       }
>  
>       radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, 
> si_shader_ctx->type);
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h 
> b/src/gallium/drivers/radeonsi/radeonsi_shader.h
> index 2ce34b9..836b144 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
> @@ -34,43 +34,47 @@
>  #define SI_SGPR_CONST                0
>  #define SI_SGPR_SAMPLER              2
>  #define SI_SGPR_RESOURCE     4
> -#define SI_SGPR_VERTEX_BUFFER        6
> -#define SI_SGPR_START_INSTANCE       8
> +#define SI_SGPR_FMASK_RESOURCE       6
> +#define SI_SGPR_VERTEX_BUFFER        8
> +#define SI_SGPR_START_INSTANCE       10
>  
> -#define SI_VS_NUM_USER_SGPR  9
> -#define SI_PS_NUM_USER_SGPR  6
> +#define SI_VS_NUM_USER_SGPR  11
> +#define SI_PS_NUM_USER_SGPR  8
>  
>  /* LLVM function parameter indices */
>  #define SI_PARAM_CONST               0
>  #define SI_PARAM_SAMPLER     1
>  #define SI_PARAM_RESOURCE    2
> +#define SI_PARAM_FMASK_RESOURCE      3
>  
>  /* VS only parameters */
> -#define SI_PARAM_VERTEX_BUFFER       3
> -#define SI_PARAM_START_INSTANCE      4
> -#define SI_PARAM_VERTEX_ID   5
> -#define SI_PARAM_DUMMY_0     6
> -#define SI_PARAM_DUMMY_1     7
> -#define SI_PARAM_INSTANCE_ID 8
> +#define SI_PARAM_VERTEX_BUFFER       4
> +#define SI_PARAM_START_INSTANCE      5
> +#define SI_PARAM_VERTEX_ID   6
> +#define SI_PARAM_DUMMY_0     7
> +#define SI_PARAM_DUMMY_1     8
> +#define SI_PARAM_INSTANCE_ID 9
>  
>  /* PS only parameters */
> -#define SI_PARAM_PRIM_MASK           3
> -#define SI_PARAM_PERSP_SAMPLE                4
> -#define SI_PARAM_PERSP_CENTER                5
> -#define SI_PARAM_PERSP_CENTROID              6
> -#define SI_PARAM_PERSP_PULL_MODEL    7
> -#define SI_PARAM_LINEAR_SAMPLE               8
> -#define SI_PARAM_LINEAR_CENTER               9
> -#define SI_PARAM_LINEAR_CENTROID     10
> -#define SI_PARAM_LINE_STIPPLE_TEX    11
> -#define SI_PARAM_POS_X_FLOAT         12
> -#define SI_PARAM_POS_Y_FLOAT         13
> -#define SI_PARAM_POS_Z_FLOAT         14
> -#define SI_PARAM_POS_W_FLOAT         15
> -#define SI_PARAM_FRONT_FACE          16
> -#define SI_PARAM_ANCILLARY           17
> -#define SI_PARAM_SAMPLE_COVERAGE     18
> -#define SI_PARAM_POS_FIXED_PT                19
> +#define SI_PARAM_PRIM_MASK           4
> +#define SI_PARAM_PERSP_SAMPLE                5
> +#define SI_PARAM_PERSP_CENTER                6
> +#define SI_PARAM_PERSP_CENTROID              7
> +#define SI_PARAM_PERSP_PULL_MODEL    8
> +#define SI_PARAM_LINEAR_SAMPLE               9
> +#define SI_PARAM_LINEAR_CENTER               10
> +#define SI_PARAM_LINEAR_CENTROID     11
> +#define SI_PARAM_LINE_STIPPLE_TEX    12
> +#define SI_PARAM_POS_X_FLOAT         13
> +#define SI_PARAM_POS_Y_FLOAT         14
> +#define SI_PARAM_POS_Z_FLOAT         15
> +#define SI_PARAM_POS_W_FLOAT         16
> +#define SI_PARAM_FRONT_FACE          17
> +#define SI_PARAM_ANCILLARY           18
> +#define SI_PARAM_SAMPLE_COVERAGE     19
> +#define SI_PARAM_POS_FIXED_PT                20
> +
> +#define SI_MAX_PARAMS                        21
>  
>  struct si_shader_io {
>       unsigned                name;
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 746ace6..4208fa7 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -241,6 +241,7 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, 
> struct si_pipe_shader *s
>               /* Last 2 reserved SGPRs are used for VCC */
>               num_sgprs = num_user_sgprs + 2;
>       }
> +     num_sgprs += 1; /* XXX this fixes VM faults */

One problem is that the compiler is under reporting the number of SGPRs,
when there are unused USER_SGPRs in the shader.  It should always be
reporting a number greater than or equal to the number of USER_SGPRs.

I think Michel mentioned this earlier, but there may also be a problem
with the way we determine usage of the VCC register in the shader, maybe
it is being used for more instructions than we realize.

-Tom

>       assert(num_sgprs <= 104);
>  
>       si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
> -- 
> 1.8.1.2
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to