Signed-off-by: Sonny Jiang <sonny.ji...@amd.com> --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 7 + src/gallium/drivers/radeonsi/si_state.h | 11 ++ .../drivers/radeonsi/si_state_shaders.c | 144 ++++++++++-------- 3 files changed, 98 insertions(+), 64 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 2e10d766a6..8c1bee8ed6 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -371,6 +371,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x00000002; + ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; /* Set all saved registers state to saved. */ ctx->tracked_regs.reg_saved = 0xffffffffffffffff; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index bf1ae9f18f..878b67f0ed 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -302,6 +302,17 @@ enum si_tracked_reg { SI_TRACKED_SPI_SHADER_POS_FORMAT, SI_TRACKED_PA_CL_VTE_CNTL, + SI_TRACKED_SPI_PS_INPUT_ENA, /* 2 consecutive registers */ + SI_TRACKED_SPI_PS_INPUT_ADDR, + + SI_TRACKED_SPI_BARYC_CNTL, + SI_TRACKED_SPI_PS_IN_CONTROL, + + SI_TRACKED_SPI_SHADER_Z_FORMAT, /* 2 consecutive registers */ + SI_TRACKED_SPI_SHADER_COL_FORMAT, + + SI_TRACKED_CB_SHADER_MASK, + SI_NUM_TRACKED_REGS, }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 332fdae3b3..e5732f7920 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1100,12 +1100,88 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader) return value; } -static void si_shader_ps(struct si_shader *shader) +static void si_emit_shader_ps(struct si_context *sctx) { + struct si_shader *shader = sctx->queued.named.ps->shader; + if (!shader) + return; + struct tgsi_shader_info *info = &shader->selector->info; - struct si_pm4_state *pm4; - unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); + unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; + + /* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/ + radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA, + SI_TRACKED_SPI_PS_INPUT_ENA, + shader->config.spi_ps_input_ena, + shader->config.spi_ps_input_addr); + + /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION + * Possible vaules: + * 0 -> Position = pixel center + * 1 -> Position = pixel centroid + * 2 -> Position = at sample position + * + * From GLSL 4.5 specification, section 7.1: + * "The variable gl_FragCoord is available as an input variable from + * within fragment shaders and it holds the window relative coordinates + * (x, y, z, 1/w) values for the fragment. If multi-sampling, this + * value can be for any location within the pixel, or one of the + * fragment samples. The use of centroid does not further restrict + * this value to be inside the current primitive." + * + * Meaning that centroid has no effect and we can return anything within + * the pixel. Thus, return the value at sample position, because that's + * the most accurate one shaders can get. + */ + spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); + + if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == + TGSI_FS_COORD_PIXEL_CENTER_INTEGER) + spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1); + + /* Set interpolation controls. */ + spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)); + + radeon_opt_set_context_reg(sctx, R_0286E0_SPI_BARYC_CNTL, + SI_TRACKED_SPI_BARYC_CNTL, spi_baryc_cntl); + radeon_opt_set_context_reg(sctx, R_0286D8_SPI_PS_IN_CONTROL, + SI_TRACKED_SPI_PS_IN_CONTROL, + spi_ps_in_control); + + spi_shader_col_format = si_get_spi_shader_col_format(shader); + cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format); + + /* Ensure that some export memory is always allocated, for two reasons: + * + * 1) Correctness: The hardware ignores the EXEC mask if no export + * memory is allocated, so KILL and alpha test do not work correctly + * without this. + * 2) Performance: Every shader needs at least a NULL export, even when + * it writes no color/depth output. The NULL export instruction + * stalls without this setting. + * + * Don't add this to CB_SHADER_MASK. + */ + if (!spi_shader_col_format && + !info->writes_z && !info->writes_stencil && !info->writes_samplemask) + spi_shader_col_format = V_028714_SPI_SHADER_32_R; + + /* R_028710_SPI_SHADER_Z_FORMAT, R_028714_SPI_SHADER_COL_FORMAT */ + radeon_opt_set_context_reg2(sctx, R_028710_SPI_SHADER_Z_FORMAT, + SI_TRACKED_SPI_SHADER_Z_FORMAT, + ac_get_spi_shader_z_format(info->writes_z, + info->writes_stencil, + info->writes_samplemask), + spi_shader_col_format); + + radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, + SI_TRACKED_CB_SHADER_MASK, cb_shader_mask); +} + +static void si_shader_ps(struct si_shader *shader) +{ + struct si_pm4_state *pm4; uint64_t va; unsigned input_ena = shader->config.spi_ps_input_ena; @@ -1157,67 +1233,7 @@ static void si_shader_ps(struct si_shader *shader) if (!pm4) return; - /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION - * Possible vaules: - * 0 -> Position = pixel center - * 1 -> Position = pixel centroid - * 2 -> Position = at sample position - * - * From GLSL 4.5 specification, section 7.1: - * "The variable gl_FragCoord is available as an input variable from - * within fragment shaders and it holds the window relative coordinates - * (x, y, z, 1/w) values for the fragment. If multi-sampling, this - * value can be for any location within the pixel, or one of the - * fragment samples. The use of centroid does not further restrict - * this value to be inside the current primitive." - * - * Meaning that centroid has no effect and we can return anything within - * the pixel. Thus, return the value at sample position, because that's - * the most accurate one shaders can get. - */ - spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); - - if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == - TGSI_FS_COORD_PIXEL_CENTER_INTEGER) - spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1); - - spi_shader_col_format = si_get_spi_shader_col_format(shader); - cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format); - - /* Ensure that some export memory is always allocated, for two reasons: - * - * 1) Correctness: The hardware ignores the EXEC mask if no export - * memory is allocated, so KILL and alpha test do not work correctly - * without this. - * 2) Performance: Every shader needs at least a NULL export, even when - * it writes no color/depth output. The NULL export instruction - * stalls without this setting. - * - * Don't add this to CB_SHADER_MASK. - */ - if (!spi_shader_col_format && - !info->writes_z && !info->writes_stencil && !info->writes_samplemask) - spi_shader_col_format = V_028714_SPI_SHADER_32_R; - - si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); - si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, - shader->config.spi_ps_input_addr); - - /* Set interpolation controls. */ - spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader)); - - /* Set registers. */ - si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); - si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); - - si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, - ac_get_spi_shader_z_format(info->writes_z, - info->writes_stencil, - info->writes_samplemask)); - - si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format); - si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask); - + pm4->atom.emit = si_emit_shader_ps; va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev