Mesa (master): radv: Assert when setting 0 registers in a sequence.
Module: Mesa Branch: master Commit: 78ee8b3f849063e3e37db0767212397da522b6fa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=78ee8b3f849063e3e37db0767212397da522b6fa Author: Bas NieuwenhuizenDate: Tue Mar 28 22:29:16 2017 +0200 radv: Assert when setting 0 registers in a sequence. To catch more of those hangs early. Signed-off-by: Bas Nieuwenhuizen Acked-by: Dave Airlie --- src/amd/vulkan/radv_cs.h | 4 1 file changed, 4 insertions(+) diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 2c8935f306..0990270f5c 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -43,6 +43,7 @@ static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsign { assert(reg < R600_CONTEXT_REG_OFFSET); assert(cs->cdw + 2 + num <= cs->max_dw); +assert(num); radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); } @@ -57,6 +58,7 @@ static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsig { assert(reg >= R600_CONTEXT_REG_OFFSET); assert(cs->cdw + 2 + num <= cs->max_dw); +assert(num); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); } @@ -83,6 +85,7 @@ static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned r { assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); } @@ -97,6 +100,7 @@ static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsig { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv/cmd_buffer: Refactor flush_pipeline_select_*
Module: Mesa Branch: master Commit: f3673db3d61b77415a09ca5d44f976e6fb869ec7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3673db3d61b77415a09ca5d44f976e6fb869ec7 Author: Jason EkstrandDate: Wed Mar 15 11:58:53 2017 -0700 anv/cmd_buffer: Refactor flush_pipeline_select_* While having the _3d and _gpgpu versions is nice, there's no reason why we need to have duplicated logic for tracking the current pipeline. Reviewed-by: Iago Toral Quiroga --- src/intel/vulkan/genX_cmd_buffer.c | 42 +++--- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index d0ddc29f00..1ce549a202 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2118,9 +2118,12 @@ void genX(CmdDispatchIndirect)( } static void -flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, - uint32_t pipeline) +genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, +uint32_t pipeline) { + if (cmd_buffer->state.current_pipeline == pipeline) + return; + #if GEN_GEN >= 8 && GEN_GEN < 10 /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: * @@ -2160,40 +2163,27 @@ flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, pc.InstructionCacheInvalidateEnable = true; pc.PostSyncOperation= NoWrite; } + + anv_batch_emit(_buffer->batch, GENX(PIPELINE_SELECT), ps) { +#if GEN_GEN >= 9 + ps.MaskBits = 3; +#endif + ps.PipelineSelection = pipeline; + } + + cmd_buffer->state.current_pipeline = pipeline; } void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->state.current_pipeline != _3D) { - flush_pipeline_before_pipeline_select(cmd_buffer, _3D); - - anv_batch_emit(_buffer->batch, GENX(PIPELINE_SELECT), ps) { -#if GEN_GEN >= 9 - ps.MaskBits = 3; -#endif - ps.PipelineSelection = _3D; - } - - cmd_buffer->state.current_pipeline = _3D; - } + genX(flush_pipeline_select)(cmd_buffer, _3D); } void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->state.current_pipeline != GPGPU) { - flush_pipeline_before_pipeline_select(cmd_buffer, GPGPU); - - anv_batch_emit(_buffer->batch, GENX(PIPELINE_SELECT), ps) { -#if GEN_GEN >= 9 - ps.MaskBits = 3; -#endif - ps.PipelineSelection = GPGPU; - } - - cmd_buffer->state.current_pipeline = GPGPU; - } + genX(flush_pipeline_select)(cmd_buffer, GPGPU); } void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv: Make anv_get_layerCount a macro
Module: Mesa Branch: master Commit: 1b8fa8dd794c22aba43b16470e75ecaebf902b11 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b8fa8dd794c22aba43b16470e75ecaebf902b11 Author: Jason EkstrandDate: Fri Mar 24 16:20:18 2017 -0700 anv: Make anv_get_layerCount a macro Reviewed-by: Lionel Landwerlin Cc: "13.0 17.0" --- src/intel/vulkan/anv_private.h | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 27c887c65c..74e80e8d53 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1828,13 +1828,13 @@ anv_layout_to_aux_usage(const struct gen_device_info * const devinfo, const struct anv_image *image, const VkImageAspectFlags aspects, const VkImageLayout layout); -static inline uint32_t -anv_get_layerCount(const struct anv_image *image, - const VkImageSubresourceRange *range) -{ - return range->layerCount == VK_REMAINING_ARRAY_LAYERS ? - image->array_size - range->baseArrayLayer : range->layerCount; -} + +/* This is defined as a macro so that it works for both + * VkImageSubresourceRange and VkImageSubresourceLayers + */ +#define anv_get_layerCount(_image, _range) \ + ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \ +(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount) static inline uint32_t anv_get_levelCount(const struct anv_image *image, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv/blorp: Use anv_get_layerCount everywhere
Module: Mesa Branch: master Commit: 9319ef96fd5c2489754eae1b058e4087d7259341 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9319ef96fd5c2489754eae1b058e4087d7259341 Author: Jason EkstrandDate: Fri Mar 24 16:20:35 2017 -0700 anv/blorp: Use anv_get_layerCount everywhere Reviewed-by: Lionel Landwerlin Cc: "13.0 17.0" --- src/intel/vulkan/anv_blorp.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 16f1692ff5..72a468a744 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -233,7 +233,8 @@ void anv_CmdCopyImage( layer_count = pRegions[r].extent.depth; } else { dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer; - layer_count = pRegions[r].dstSubresource.layerCount; + layer_count = +anv_get_layerCount(dst_image, [r].dstSubresource); } unsigned src_base_layer; @@ -241,7 +242,8 @@ void anv_CmdCopyImage( src_base_layer = pRegions[r].srcOffset.z; } else { src_base_layer = pRegions[r].srcSubresource.baseArrayLayer; - assert(pRegions[r].srcSubresource.layerCount == layer_count); + assert(layer_count == +anv_get_layerCount(src_image, [r].srcSubresource)); } assert(pRegions[r].srcSubresource.aspectMask == @@ -313,7 +315,8 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent); if (anv_image->type != VK_IMAGE_TYPE_3D) { image.offset.z = pRegions[r].imageSubresource.baseArrayLayer; - extent.depth = pRegions[r].imageSubresource.layerCount; + extent.depth = +anv_get_layerCount(anv_image, [r].imageSubresource); } const enum isl_format buffer_format = @@ -467,7 +470,7 @@ void anv_CmdBlitImage( dst_end = pRegions[r].dstOffsets[1].z; } else { dst_start = dst_res->baseArrayLayer; - dst_end = dst_start + dst_res->layerCount; + dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); } unsigned src_start, src_end; @@ -477,7 +480,7 @@ void anv_CmdBlitImage( src_end = pRegions[r].srcOffsets[1].z; } else { src_start = src_res->baseArrayLayer; - src_end = src_start + src_res->layerCount; + src_end = src_start + anv_get_layerCount(src_image, src_res); } bool flip_z = flip_coords(_start, _end, _start, _end); @@ -1407,10 +1410,11 @@ void anv_CmdResolveImage( for (uint32_t r = 0; r < regionCount; r++) { assert(pRegions[r].srcSubresource.aspectMask == pRegions[r].dstSubresource.aspectMask); - assert(pRegions[r].srcSubresource.layerCount == - pRegions[r].dstSubresource.layerCount); + assert(anv_get_layerCount(src_image, [r].srcSubresource) == + anv_get_layerCount(dst_image, [r].dstSubresource)); - const uint32_t layer_count = pRegions[r].dstSubresource.layerCount; + const uint32_t layer_count = + anv_get_layerCount(dst_image, [r].dstSubresource); for (uint32_t layer = 0; layer < layer_count; layer++) { resolve_image(, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv: Flush caches prior to PIPELINE_SELECT on all gens
Module: Mesa Branch: master Commit: 6baae9625d26d282a72481598f9431fcad3211f6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6baae9625d26d282a72481598f9431fcad3211f6 Author: Jason EkstrandDate: Wed Mar 15 11:58:52 2017 -0700 anv: Flush caches prior to PIPELINE_SELECT on all gens The programming note that says we need to do this still exists in the SkyLake PRM and, from looking at the bspec, seems like it may apply to all hardware generations SNB+. Unfortunately, this isn't particularly clear cut since there is also language in the bspec that says you can skip the flushing and stall to get better throughput. Experimentation with the "Car Chase" benchmark in GL seems to indicate that some form of flushing is still needed. This commit makes us do the full set of flushes regardless of hardware generation. We can always reduce the flushing later. Reported-by: Topi Pohjolainen Reviewed-by: Iago Toral Quiroga Cc: "17.0 13.0" --- src/intel/vulkan/genX_cmd_buffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 5d923a8c08..d0ddc29f00 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2133,8 +2133,8 @@ flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, */ if (pipeline == GPGPU) anv_batch_emit(_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t); +#endif -#elif GEN_GEN <= 7 /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * @@ -2160,7 +2160,6 @@ flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, pc.InstructionCacheInvalidateEnable = true; pc.PostSyncOperation= NoWrite; } -#endif } void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv/cmd_buffer: Apply flush operations prior to executing secondaries
Module: Mesa Branch: master Commit: 01a65dc43be3a4bf6b8a901586f718f4b6b3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=01a65dc43be3a4bf6b8a901586f718f4b6b3 Author: Jason EkstrandDate: Fri Mar 24 16:30:24 2017 -0700 anv/cmd_buffer: Apply flush operations prior to executing secondaries This fixes rendering issues in the Vulkan port of skia on some hardware. Reviewed-by: Lionel Landwerlin Cc: "13.0 17.0" --- src/intel/vulkan/genX_cmd_buffer.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 39856b9af7..b87d8693fd 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -654,6 +654,11 @@ genX(CmdExecuteCommands)( */ genX(cmd_buffer_enable_pma_fix)(primary, false); + /* The secondary command buffer doesn't know which textures etc. have been +* flushed prior to their execution. Apply those flushes now. +*/ + genX(cmd_buffer_apply_pipe_flushes)(primary); + for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv/cmd_buffer: Fix bad indentation
Module: Mesa Branch: master Commit: 0fe3dcce4c3e8b86a60beefe4c5adc760f2d59f8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fe3dcce4c3e8b86a60beefe4c5adc760f2d59f8 Author: Jason EkstrandDate: Wed Mar 15 11:58:51 2017 -0700 anv/cmd_buffer: Fix bad indentation A bunch of code was indented in such a way that it looked like it went with the if statement above but it definitely didn't. Reviewed-by: Iago Toral Quiroga Cc: "17.0 13.0" --- src/intel/vulkan/genX_cmd_buffer.c | 49 +++--- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b87d8693fd..5d923a8c08 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2133,32 +2133,33 @@ flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer, */ if (pipeline == GPGPU) anv_batch_emit(_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t); + #elif GEN_GEN <= 7 - /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] - * PIPELINE_SELECT [DevBWR+]": - * - * Project: DEVSNB+ - * - * Software must ensure all the write caches are flushed through a - * stalling PIPE_CONTROL command followed by another PIPE_CONTROL - * command to invalidate read only caches prior to programming - * MI_PIPELINE_SELECT command to change the Pipeline Select Mode. - */ - anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.RenderTargetCacheFlushEnable = true; - pc.DepthCacheFlushEnable = true; - pc.DCFlushEnable = true; - pc.PostSyncOperation = NoWrite; - pc.CommandStreamerStallEnable= true; - } + /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] +* PIPELINE_SELECT [DevBWR+]": +* +* Project: DEVSNB+ +* +* Software must ensure all the write caches are flushed through a +* stalling PIPE_CONTROL command followed by another PIPE_CONTROL +* command to invalidate read only caches prior to programming +* MI_PIPELINE_SELECT command to change the Pipeline Select Mode. +*/ + anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.RenderTargetCacheFlushEnable = true; + pc.DepthCacheFlushEnable = true; + pc.DCFlushEnable = true; + pc.PostSyncOperation = NoWrite; + pc.CommandStreamerStallEnable= true; + } - anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.TextureCacheInvalidationEnable = true; - pc.ConstantCacheInvalidationEnable = true; - pc.StateCacheInvalidationEnable = true; - pc.InstructionCacheInvalidateEnable = true; - pc.PostSyncOperation= NoWrite; - } + anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.TextureCacheInvalidationEnable = true; + pc.ConstantCacheInvalidationEnable = true; + pc.StateCacheInvalidationEnable = true; + pc.InstructionCacheInvalidateEnable = true; + pc.PostSyncOperation= NoWrite; + } #endif } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: only emit ps_input_cntl is we have any to output
Module: Mesa Branch: master Commit: 93d61e494518a5dd170c2b098b2ed7a26465d049 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=93d61e494518a5dd170c2b098b2ed7a26465d049 Author: Dave AirlieDate: Tue Mar 28 20:09:36 2017 +0100 radv: only emit ps_input_cntl is we have any to output Otherwise we get GPU hangs. Reported-by: Alex Smith Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e994df65fd..e6f098c208 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -697,9 +697,12 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); - radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); - for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) - radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); + if (pipeline->graphics.ps_input_cntl_num) { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); + for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) { + radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); + } + } } static void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glx: Remove #include
Module: Mesa Branch: master Commit: f208bdc0d27cf7836420272738b707f2bad9c92a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f208bdc0d27cf7836420272738b707f2bad9c92a Author: Adam JacksonDate: Wed Mar 22 14:02:52 2017 -0400 glx: Remove #include We're not using anything in it, and we don't want to inherit struct definitions from some other package anyway. Signed-off-by: Adam Jackson --- src/glx/glxconfig.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/glx/glxconfig.c b/src/glx/glxconfig.c index e5718b143f..0e1643fcd8 100644 --- a/src/glx/glxconfig.c +++ b/src/glx/glxconfig.c @@ -32,7 +32,6 @@ */ #include -#include "GL/glxint.h" #include #include ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): si_compute: check NULL return from u_upload_alloc
Module: Mesa Branch: master Commit: 4a5e779b5f9d169fd043ffaead1525040af816f3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a5e779b5f9d169fd043ffaead1525040af816f3 Author: Julien IsorceDate: Thu Mar 23 13:43:49 2017 + si_compute: check NULL return from u_upload_alloc Signed-off-by: Julien Isorce Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_compute.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 46476b68be..913a2ddbfe 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -579,7 +579,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, } } -static void si_upload_compute_input(struct si_context *sctx, +static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object, const struct pipe_grid_info *info) { @@ -602,6 +602,9 @@ static void si_upload_compute_input(struct si_context *sctx, _args_offset, (struct pipe_resource**)_buffer, _args_ptr); + if (unlikely(!kernel_args_ptr)) + return false; + kernel_args = (uint32_t*)kernel_args_ptr; kernel_args_va = input_buffer->gpu_address + kernel_args_offset; @@ -636,6 +639,8 @@ static void si_upload_compute_input(struct si_context *sctx, } r600_resource_reference(_buffer, NULL); + + return true; } static void si_setup_tgsi_grid(struct si_context *sctx, @@ -790,8 +795,11 @@ static void si_launch_grid( si_set_atom_dirty(sctx, sctx->atoms.s.render_cond, false); } - if (program->input_size || program->ir_type == PIPE_SHADER_IR_NATIVE) - si_upload_compute_input(sctx, code_object, info); + if ((program->input_size || +program->ir_type == PIPE_SHADER_IR_NATIVE) && + unlikely(!si_upload_compute_input(sctx, code_object, info))) { + return; + } /* Global buffers */ for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer jitter] fix llvm-5.0.0 build bustage
Module: Mesa Branch: master Commit: 79d92a72d5866fb4a00188fc5cb48d4385c46bb9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d92a72d5866fb4a00188fc5cb48d4385c46bb9 Author: Tim RowleyDate: Mon Mar 27 13:29:31 2017 -0500 swr: [rasterizer jitter] fix llvm-5.0.0 build bustage Add CreateAlignmentAssumptionHelper to gen_llvm_ir_macros.py ignore list. Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index dbf56471ee..2ed2b2f61e 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -140,10 +140,9 @@ def parse_ir_builder(input_file): ignore = False # The following functions need to be ignored. -if func_name == 'CreateInsertNUWNSWBinOp': -ignore = True - -if func_name == 'CreateMaskedIntrinsic': +if (func_name == 'CreateInsertNUWNSWBinOp' or +func_name == 'CreateMaskedIntrinsic' or +func_name == 'CreateAlignmentAssumptionHelper'): ignore = True # Convert CamelCase to CAMEL_CASE ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): isl: Validate the calculated row pitch (v45)
Module: Mesa Branch: master Commit: 23802dafc2d5e04e6d2d444855961082b5887400 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23802dafc2d5e04e6d2d444855961082b5887400 Author: Chad VersaceDate: Tue Mar 21 14:11:57 2017 -0700 isl: Validate the calculated row pitch (v45) Validate that isl_surf::row_pitch fits in the below bitfields, if applicable based on isl_surf::usage. RENDER_SURFACE_STATE::SurfacePitch RENDER_SURFACE_STATE::AuxiliarySurfacePitch 3DSTATE_DEPTH_BUFFER::SurfacePitch 3DSTATE_HIER_DEPTH_BUFFER::SurfacePitch v2: -Add a Makefile dependency on generated header genX_bits.h. v3: - Test ISL_SURF_USAGE_STORAGE_BIT too. [for jekstrand] - Drop explicity dependency on generated header. [for emil] v4: - Rebase for new gen_bits_header.py script. - Replace gen_10x with gen_device_info*. v5: - Drop FINISHME for validation of GEN9 1D row pitch. [for jekstrand] - Reformat bit tests. [for jekstrand] Reviewed-by: Jason Ekstrand (v4) --- src/intel/isl/isl.c | 70 - 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 81f40b6a6f..749fcdf46b 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -25,6 +25,8 @@ #include #include +#include "genxml/genX_bits.h" + #include "isl.h" #include "isl_gen4.h" #include "isl_gen6.h" @@ -1089,18 +1091,72 @@ isl_calc_min_row_pitch(const struct isl_device *dev, } } -static uint32_t +/** + * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's + * size is `bits` bits? + * + * Hardware pitch fields are offset by 1. For example, if the size of + * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid + * pitches is [1, 2^b] inclusive. If the surface pitch is N, then + * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1. + */ +static bool +pitch_in_range(uint32_t n, uint32_t bits) +{ + assert(n != 0); + return likely(bits != 0 && 1 <= n && n <= (1 << bits)); +} + +static bool isl_calc_row_pitch(const struct isl_device *dev, const struct isl_surf_init_info *surf_info, const struct isl_tile_info *tile_info, enum isl_dim_layout dim_layout, - const struct isl_extent2d *phys_slice0_sa) + const struct isl_extent2d *phys_slice0_sa, + uint32_t *out_row_pitch) { const uint32_t alignment = isl_calc_row_pitch_alignment(surf_info, tile_info); - return isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa, - alignment); + const uint32_t row_pitch = + isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa, + alignment); + + const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width; + + if (row_pitch == 0) + return false; + + if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* SurfacePitch is ignored for this layout. */ + goto done; + } + + if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | +ISL_SURF_USAGE_TEXTURE_BIT | +ISL_SURF_USAGE_STORAGE_BIT)) && + !pitch_in_range(row_pitch, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info))) + return false; + + if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT | +ISL_SURF_USAGE_MCS_BIT)) && + !pitch_in_range(row_pitch_tiles, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info))) + return false; + + if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) && + !pitch_in_range(row_pitch, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) + return false; + + if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) && + !pitch_in_range(row_pitch, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info))) + return false; + + if (surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) + isl_finishme("validate row pitch of stencil surfaces"); + + done: + *out_row_pitch = row_pitch; + return true; } /** @@ -1275,8 +1331,10 @@ isl_surf_init_s(const struct isl_device *dev, uint32_t pad_bytes; isl_apply_surface_padding(dev, info, _info, _h_el, _bytes); - const uint32_t row_pitch = isl_calc_row_pitch(dev, info, _info, - dim_layout, _slice0_sa); + uint32_t row_pitch; + if (!isl_calc_row_pitch(dev, info, _info, dim_layout, + _slice0_sa, _pitch)) + return false; uint32_t size, base_alignment; if (tiling == ISL_TILING_LINEAR) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): genxml: New generated header genX_bits.h (v6)
Module: Mesa Branch: master Commit: f0eaf38db2c7ed5dd3cbc62ad078bf9d08924640 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0eaf38db2c7ed5dd3cbc62ad078bf9d08924640 Author: Chad VersaceDate: Fri Mar 24 14:35:24 2017 -0700 genxml: New generated header genX_bits.h (v6) genX_bits.h contains the sizes of bitfields in genxml instructions, structures, and registers. It also defines some functions to query those sizes. isl_surf_init() will use the new header to validate that requested pitches fit in their destination bitfields. What's currently in genX_bits.h: - Each CONTAINER::Field from gen*.xml that has a bitsize has a macro in genX_bits.h: #define GEN{N}_CONTAINER_Field_bits {bitsize} - For each set of macros whose name, after stripping the GEN prefix, is the same, genX_bits.h contains a query function: static inline uint32_t __attribute__((pure)) CONTAINER_Field_bits(const struct gen_device_info *devinfo); v2 (Chad Versace): - Parse the XML instead of scraping the generated gen*_pack.h headers. v3 (Dylan Baker): - Port to Mako. v4 (Jason Ekstrand): - Make the _bits functions take a gen_device_info. v5 (Chad Versace): - Fix autotools out-of-tree build. - Fix Android build. Tested with git://github.com/android-ia/manifest. - Fix macro names. They were all missing the "_bits" suffix. - Fix macros names more. Remove all double-underscores. - Unindent all generated code. (It was floating in a sea of whitespace). - Reformat header to appear human-written not machine-generated. - Sort gens from high to low. Newest gens should come first because, when we read code, we likely want to read the gen8/9 code and ignore the gen4 code. So put the gen4 code at the bottom. - Replace 'const' attributes with 'pure', because the functions now have a pointer parameter. - Add --cpp-guard flag. Used by Android. - Kill class FieldCollection. After Jason's rewrite, it was just a dict. v6 (Chad Versace): - Replace `key not in d.keys()` with `key not in d`. [for dylan] Co-authored-by: Dylan Baker Co-authored-by: Jason Ekstrand Reviewed-by: Jason Ekstrand (v5) Reviewed-by: Dylan Baker (v6) --- src/intel/Android.genxml.mk | 9 +- src/intel/Makefile.genxml.am| 6 +- src/intel/Makefile.sources | 6 +- src/intel/genxml/.gitignore | 1 + src/intel/genxml/gen_bits_header.py | 281 5 files changed, 300 insertions(+), 3 deletions(-) diff --git a/src/intel/Android.genxml.mk b/src/intel/Android.genxml.mk index 79de784380..842d0e13a3 100644 --- a/src/intel/Android.genxml.mk +++ b/src/intel/Android.genxml.mk @@ -46,9 +46,16 @@ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, $(GENXML_GENERATED_FIL define header-gen @mkdir -p $(dir $@) @echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))" - $(hide) $(PRIVATE_SCRIPT) $(PRIVATE_XML) > $@ + $(hide) $(PRIVATE_SCRIPT) $(PRIVATE_SCRIPT_FLAGS) $(PRIVATE_XML) > $@ endef +$(intermediates)/genxml/genX_bits.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/genxml/gen_bits_header.py +$(intermediates)/genxml/genX_bits.h: PRIVATE_SCRIPT_FLAGS := --cpp-guard=GENX_BITS_H +$(intermediates)/genxml/genX_bits.h: PRIVATE_XML := $(addprefix $(LOCAL_PATH)/,$(GENXML_XML_FILES)) +$(intermediates)/genxml/genX_bits.h: $(LOCAL_PATH)/genxml/gen_bits_header.py +$(intermediates)/genxml/genX_bits.h: $(addprefix $(LOCAL_PATH)/,$(GENXML_XML_FILES)) + $(call header-gen) + $(intermediates)/genxml/gen4_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/genxml/gen_pack_header.py $(intermediates)/genxml/gen4_pack.h: PRIVATE_XML := $(LOCAL_PATH)/genxml/gen4.xml $(intermediates)/genxml/gen4_pack.h: $(LOCAL_PATH)/genxml/gen4.xml $(LOCAL_PATH)/genxml/gen_pack_header.py diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am index 01a02b63b4..474b751f5f 100644 --- a/src/intel/Makefile.genxml.am +++ b/src/intel/Makefile.genxml.am @@ -30,7 +30,7 @@ EXTRA_DIST += \ SUFFIXES = _pack.h _xml.h .xml -$(GENXML_GENERATED_FILES): genxml/gen_pack_header.py +$(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py .xml_pack.h: $(MKDIR_GEN) @@ -42,6 +42,10 @@ $(AUBINATOR_GENERATED_FILES): genxml/gen_zipped_file.py $(MKDIR_GEN) $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py $< > $@ || ($(RM) $@; false) +genxml/genX_bits.h: genxml/gen_bits_header.py $(GENXML_XML_FILES) + $(MKDIR_GEN) + $(PYTHON_GEN) $< -o $@ $(addprefix $(srcdir)/,$(GENXML_XML_FILES)) + EXTRA_DIST += \ genxml/genX_pack.h \ genxml/gen_macros.h \ diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 88bcf60f6e..c56891643c 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -119,7 +119,7 @@
Mesa (master): intel: Fix requests for exact surface row pitch (v2)
Module: Mesa Branch: master Commit: 6cbc13d94c40f875926b8fd2129852759f314d14 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cbc13d94c40f875926b8fd2129852759f314d14 Author: Chad VersaceDate: Fri Feb 24 17:15:43 2017 -0800 intel: Fix requests for exact surface row pitch (v2) All callers of isl_surf_init() that set 'min_row_pitch' wanted to request an *exact* row pitch, as evidenced by nearby asserts, but isl lacked API for doing so. Now that isl has an API for that, update the code to use it. v2: Assert that isl_surf_init() succeeds because the callers assume it. [for jekstrand] Reviewed-by: Nanley Chery (v1) Reviewed-by: Anuj Phogat (v1) Reviewed-by: Jason Ekstrand (v2) --- src/intel/blorp/blorp_blit.c | 8 +--- src/intel/vulkan/anv_blorp.c | 29 +++-- src/intel/vulkan/anv_image.c | 2 +- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 280b76ab70..691564c878 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -1375,6 +1375,8 @@ static void surf_convert_to_single_slice(const struct isl_device *isl_dev, struct brw_blorp_surface_info *info) { + bool ok UNUSED; + /* Just bail if we have nothing to do. */ if (info->surf.dim == ISL_SURF_DIM_2D && info->view.base_level == 0 && info->view.base_array_layer == 0 && @@ -1421,13 +1423,13 @@ surf_convert_to_single_slice(const struct isl_device *isl_dev, .levels = 1, .array_len = 1, .samples = info->surf.samples, - .min_pitch = info->surf.row_pitch, + .row_pitch = info->surf.row_pitch, .usage = info->surf.usage, .tiling_flags = 1 << info->surf.tiling, }; - isl_surf_init_s(isl_dev, >surf, _info); - assert(info->surf.row_pitch == init_info.min_pitch); + ok = isl_surf_init_s(isl_dev, >surf, _info); + assert(ok); /* The view is also different now. */ info->view.base_level = 0; diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 9b3910f1b0..16f1692ff5 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -133,6 +133,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, { const struct isl_format_layout *fmtl = isl_format_get_layout(format); + bool ok UNUSED; /* ASTC is the only format which doesn't support linear layouts. * Create an equivalently sized surface with ISL to get around this. @@ -155,20 +156,20 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, }, }; - isl_surf_init(>isl_dev, isl_surf, - .dim = ISL_SURF_DIM_2D, - .format = format, - .width = width, - .height = height, - .depth = 1, - .levels = 1, - .array_len = 1, - .samples = 1, - .min_pitch = row_pitch, - .usage = ISL_SURF_USAGE_TEXTURE_BIT | - ISL_SURF_USAGE_RENDER_TARGET_BIT, - .tiling_flags = ISL_TILING_LINEAR_BIT); - assert(isl_surf->row_pitch == row_pitch); + ok = isl_surf_init(>isl_dev, isl_surf, + .dim = ISL_SURF_DIM_2D, + .format = format, + .width = width, + .height = height, + .depth = 1, + .levels = 1, + .array_len = 1, + .samples = 1, + .row_pitch = row_pitch, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + ISL_SURF_USAGE_RENDER_TARGET_BIT, + .tiling_flags = ISL_TILING_LINEAR_BIT); + assert(ok); } static void diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 33499abca1..cf34dbe3b0 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -166,7 +166,7 @@ make_surface(const struct anv_device *dev, .array_len = vk_info->arrayLayers, .samples = vk_info->samples, .min_alignment = 0, - .min_pitch = anv_info->stride, + .row_pitch = anv_info->stride, .usage = choose_isl_surf_usage(image->usage, aspect), .tiling_flags = tiling_flags); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): isl: Drop unused isl_surf_init_info::min_pitch
Module: Mesa Branch: master Commit: d1032a047b5f8ef29a1175192436f4a2291e6ff6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d1032a047b5f8ef29a1175192436f4a2291e6ff6 Author: Chad VersaceDate: Fri Mar 10 13:58:13 2017 -0800 isl: Drop unused isl_surf_init_info::min_pitch Reviewed-by: Nanley Chery Reviewed-by: Anuj Phogat Reviewed-by: Jason Ekstrand --- src/intel/isl/isl.c | 13 +++-- src/intel/isl/isl.h | 3 --- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 98a1152c28..c7072d0902 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1043,11 +1043,7 @@ isl_calc_linear_min_row_pitch(const struct isl_device *dev, assert(phys_slice0_sa->w % fmtl->bw == 0); - uint32_t min_row_pitch = bs * (phys_slice0_sa->w / fmtl->bw); - min_row_pitch = MAX2(min_row_pitch, info->min_pitch); - min_row_pitch = isl_align_npot(min_row_pitch, alignment); - - return min_row_pitch; + return isl_align_npot(bs * (phys_slice0_sa->w / fmtl->bw), alignment); } static uint32_t @@ -1068,11 +1064,8 @@ isl_calc_tiled_min_row_pitch(const struct isl_device *dev, isl_align_div(total_w_el * tile_el_scale, tile_info->logical_extent_el.width); - uint32_t min_row_pitch = total_w_tl * tile_info->phys_extent_B.width; - min_row_pitch = MAX2(min_row_pitch, surf_info->min_pitch); - min_row_pitch = isl_align_npot(min_row_pitch, alignment); - - return min_row_pitch; + assert(alignment == tile_info->phys_extent_B.width); + return total_w_tl * tile_info->phys_extent_B.width; } static uint32_t diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 012be7b98e..17b52cf2f4 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -810,9 +810,6 @@ struct isl_surf_init_info { /** Lower bound for isl_surf::alignment, in bytes. */ uint32_t min_alignment; - /** Lower bound for isl_surf::pitch, in bytes. */ - uint32_t min_pitch; - /** * Exact value for isl_surf::row_pitch. Ignored if zero. isl_surf_init() * will fail if this is misaligned or out of bounds. ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): isl: Let isl_surf_init's caller set the exact row pitch (v2 )
Module: Mesa Branch: master Commit: e9017d58dcd0117c67788f7e2084b09f5d47a279 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9017d58dcd0117c67788f7e2084b09f5d47a279 Author: Chad VersaceDate: Fri Feb 24 16:30:13 2017 -0800 isl: Let isl_surf_init's caller set the exact row pitch (v2) The caller does so by setting the new field isl_surf_init_info::row_pitch. v2: Validate the requested row_pitch. Reviewed-by: Jason Ekstrand (v2) --- src/intel/isl/isl.c | 14 +- src/intel/isl/isl.h | 6 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 749fcdf46b..98a1152c28 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -1118,10 +1118,22 @@ isl_calc_row_pitch(const struct isl_device *dev, const uint32_t alignment = isl_calc_row_pitch_alignment(surf_info, tile_info); - const uint32_t row_pitch = + const uint32_t min_row_pitch = isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa, alignment); + uint32_t row_pitch = min_row_pitch; + + if (surf_info->row_pitch != 0) { + row_pitch = surf_info->row_pitch; + + if (row_pitch < min_row_pitch) + return false; + + if (row_pitch % alignment != 0) + return false; + } + const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width; if (row_pitch == 0) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 9d92906ca7..012be7b98e 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -813,6 +813,12 @@ struct isl_surf_init_info { /** Lower bound for isl_surf::pitch, in bytes. */ uint32_t min_pitch; + /** +* Exact value for isl_surf::row_pitch. Ignored if zero. isl_surf_init() +* will fail if this is misaligned or out of bounds. +*/ + uint32_t row_pitch; + isl_surf_usage_flags_t usage; /** Flags that alter how ISL selects isl_surf::tiling. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer common] Use C++ thread_local keyword
Module: Mesa Branch: master Commit: 1c7224c85fddcbac64ee5a6595ec8608b4f00437 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c7224c85fddcbac64ee5a6595ec8608b4f00437 Author: Tim RowleyDate: Wed Mar 22 18:55:13 2017 -0500 swr: [rasterizer common] Use C++ thread_local keyword Allows use of thread_local objects with constructors. Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/common/os.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h index 28e7ff54f9..71c4da3a59 100644 --- a/src/gallium/drivers/swr/rasterizer/common/os.h +++ b/src/gallium/drivers/swr/rasterizer/common/os.h @@ -47,7 +47,6 @@ #endif #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD -#define THREAD __declspec(thread) #define INLINE __forceinline #define DEBUGBREAK __debugbreak() @@ -108,7 +107,6 @@ typedef unsigned intDWORD; #define MAX_PATH PATH_MAX #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH))) -#define THREAD __thread #ifndef INLINE #define INLINE __inline #endif @@ -242,6 +240,8 @@ pid_t gettid(void); #endif +#define THREAD thread_local + // Universal types typedef uint8_t KILOBYTE[1024]; typedef KILOBYTEMEGABYTE[1024]; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer core] SIMD16 Frontend WIP
Module: Mesa Branch: master Commit: 549b9d2e9f1547af3fb061a7956b04fb30870a6d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=549b9d2e9f1547af3fb061a7956b04fb30870a6d Author: Tim RowleyDate: Mon Mar 20 12:17:07 2017 -0500 swr: [rasterizer core] SIMD16 Frontend WIP Fix GS and streamout. Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/core/clip.h | 61 ++ .../drivers/swr/rasterizer/core/frontend.cpp | 97 +- 2 files changed, 136 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h index eec65707e7..3a79d6a34c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/clip.h +++ b/src/gallium/drivers/swr/rasterizer/core/clip.h @@ -376,7 +376,16 @@ public: const simdscalar vMask = _mm256_set_ps(0, -1, -1, -1, -1, -1, -1, -1); uint32_t numClippedPrims = 0; +#if USE_SIMD16_FRONTEND +const uint32_t numPrims = pa.NumPrims(); +const uint32_t numPrims_lo = std::min(numPrims, KNOB_SIMD_WIDTH); + +SWR_ASSERT(numPrims <= numPrims_lo); + +for (uint32_t inputPrim = 0; inputPrim < numPrims_lo; ++inputPrim) +#else for (uint32_t inputPrim = 0; inputPrim < pa.NumPrims(); ++inputPrim) +#endif { uint32_t numEmittedVerts = pVertexCount[inputPrim]; if (numEmittedVerts < NumVertsPerPrim) @@ -391,13 +400,28 @@ public: // tranpose clipper output so that each lane's vertices are in SIMD order // set aside space for 2 vertices, as the PA will try to read up to 16 verts // for triangle fan +#if USE_SIMD16_FRONTEND +simd16vertex transposedPrims[2]; +#else simdvertex transposedPrims[2]; +#endif // transpose pos uint8_t* pBase = (uint8_t*)([0].attrib[VERTEX_POSITION_SLOT]) + sizeof(float) * inputPrim; + +#if USE_SIMD16_FRONTEND +// TEMPORARY WORKAROUND for bizarre VS2015 code-gen bug - use dx11_clipping_03-09 failures to check for existence of bug +static const float *dummy = reinterpret_cast(pBase); +#endif + for (uint32_t c = 0; c < 4; ++c) { +#if USE_SIMD16_FRONTEND +simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1); +transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0); +#else transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1); +#endif pBase += sizeof(simdscalar); } @@ -408,7 +432,12 @@ public: uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib; for (uint32_t c = 0; c < 4; ++c) { +#if USE_SIMD16_FRONTEND +simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1); +transposedPrims[0].attrib[attribSlot][c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0); +#else transposedPrims[0].attrib[attribSlot][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1); +#endif pBase += sizeof(simdscalar); } } @@ -419,7 +448,12 @@ public: pBase = (uint8_t*)([0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT]) + sizeof(float) * inputPrim; for (uint32_t c = 0; c < 4; ++c) { +#if USE_SIMD16_FRONTEND +simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1); +transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT][c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0); +#else transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1); +#endif pBase += sizeof(simdscalar); } } @@ -429,7 +463,12 @@ public: pBase = (uint8_t*)([0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT]) + sizeof(float) * inputPrim; for (uint32_t c = 0; c < 4; ++c) { +#if USE_SIMD16_FRONTEND +simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, vMask, 1); +transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT][c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0); +#else transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1); +#endif pBase +=
Mesa (master): swr: [rasterizer codegen] Refactor codegen
Module: Mesa Branch: master Commit: fee3fc018b274af2913c2a9aa17e024b8eb293d0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fee3fc018b274af2913c2a9aa17e024b8eb293d0 Author: Tim RowleyDate: Fri Mar 17 12:39:15 2017 -0500 swr: [rasterizer codegen] Refactor codegen Move common codegen functions into gen_common.py. v2: change gen_knobs.py to find the template file internally, like the rest of the gen scripts. Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/Makefile.am| 20 ++- .../drivers/swr/rasterizer/codegen/gen_archrast.py | 30 +--- .../drivers/swr/rasterizer/codegen/gen_backends.py | 30 +--- .../drivers/swr/rasterizer/codegen/gen_common.py | 162 + .../drivers/swr/rasterizer/codegen/gen_knobs.py| 64 +++- .../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 35 + .../swr/rasterizer/codegen/gen_llvm_types.py | 32 +--- 7 files changed, 215 insertions(+), 158 deletions(-) diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am index 8ba9ac93da..515a9089cc 100644 --- a/src/gallium/drivers/swr/Makefile.am +++ b/src/gallium/drivers/swr/Makefile.am @@ -71,30 +71,28 @@ gen_swr_context_llvm.h: rasterizer/codegen/gen_llvm_types.py rasterizer/codegen/ --input $(srcdir)/swr_context.h \ --output ./gen_swr_context_llvm.h -rasterizer/codegen/gen_knobs.cpp: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp +rasterizer/codegen/gen_knobs.cpp: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_knobs.py \ - --input $(srcdir)/rasterizer/codegen/templates/gen_knobs.cpp \ --output rasterizer/codegen/gen_knobs.cpp \ --gen_cpp -rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp +rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_knobs.py \ - --input $(srcdir)/rasterizer/codegen/templates/gen_knobs.cpp \ --output rasterizer/codegen/gen_knobs.h \ --gen_h -rasterizer/jitter/gen_state_llvm.h: rasterizer/codegen/gen_llvm_types.py rasterizer/codegen/templates/gen_llvm.hpp rasterizer/core/state.h +rasterizer/jitter/gen_state_llvm.h: rasterizer/codegen/gen_llvm_types.py rasterizer/codegen/templates/gen_llvm.hpp rasterizer/core/state.h rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_llvm_types.py \ --input $(srcdir)/rasterizer/core/state.h \ --output rasterizer/jitter/gen_state_llvm.h -rasterizer/jitter/gen_builder.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp +rasterizer/jitter/gen_builder.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \ @@ -102,14 +100,14 @@ rasterizer/jitter/gen_builder.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rast --output rasterizer/jitter \ --gen_h -rasterizer/jitter/gen_builder_x86.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp +rasterizer/jitter/gen_builder_x86.hpp: rasterizer/codegen/gen_llvm_ir_macros.py rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \ --output rasterizer/jitter \ --gen_x86_h -rasterizer/archrast/gen_ar_event.hpp: rasterizer/codegen/gen_archrast.py rasterizer/codegen/templates/gen_ar_event.hpp rasterizer/archrast/events.proto +rasterizer/archrast/gen_ar_event.hpp: rasterizer/codegen/gen_archrast.py rasterizer/codegen/templates/gen_ar_event.hpp rasterizer/archrast/events.proto rasterizer/codegen/gen_common.py $(MKDIR_GEN) $(PYTHON_GEN) \ $(srcdir)/rasterizer/codegen/gen_archrast.py \ @@ -117,7 +115,7 @@ rasterizer/archrast/gen_ar_event.hpp: rasterizer/codegen/gen_archrast.py rasteri --output rasterizer/archrast/gen_ar_event.hpp \ --gen_event_h -rasterizer/archrast/gen_ar_event.cpp: rasterizer/codegen/gen_archrast.py rasterizer/codegen/templates/gen_ar_event.cpp
Mesa (master): swr: [rasterizer core] Don't bind single-threaded contexts
Module: Mesa Branch: master Commit: aea737e12e186091507d33b874ec101da0f69c5e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aea737e12e186091507d33b874ec101da0f69c5e Author: Tim RowleyDate: Tue Mar 21 16:52:49 2017 -0500 swr: [rasterizer core] Don't bind single-threaded contexts Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/core/threads.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index ea29f66c88..e3ad2585c0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -274,7 +274,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false) { // Only bind threads when MAX_WORKER_THREADS isn't set. -if (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false) +if (pContext->threadInfo.SINGLE_THREADED || (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false)) { return; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer core] Disable inline function expansion
Module: Mesa Branch: master Commit: 3974cfea2531b86fb394d8501b106e69f00c5f89 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3974cfea2531b86fb394d8501b106e69f00c5f89 Author: Tim RowleyDate: Wed Mar 22 19:20:42 2017 -0500 swr: [rasterizer core] Disable inline function expansion Disable expansion in windows Debug builds. Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/common/os.h | 12 1 file changed, 12 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h index 71c4da3a59..ef00a255d3 100644 --- a/src/gallium/drivers/swr/rasterizer/common/os.h +++ b/src/gallium/drivers/swr/rasterizer/common/os.h @@ -47,7 +47,19 @@ #endif #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD + +#if defined(_DEBUG) +// We compile Debug builds with inline function expansion enabled. This allows +// functions compiled with __forceinline to be inlined even in Debug builds. +// The inline_depth(0) pragma below will disable inline function expansion for +// normal INLINE / inline functions, but not for __forceinline functions. +// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in +// Debug builds. +#define INLINE inline +#pragma inline_depth(0) +#else #define INLINE __forceinline +#endif #define DEBUGBREAK __debugbreak() #define PRAGMA_WARNING_PUSH_DISABLE(...) \ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer core] Enable SIMD16
Module: Mesa Branch: master Commit: 4cd0b1bb2c284609d2ac3413456b29f1a3e42d10 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4cd0b1bb2c284609d2ac3413456b29f1a3e42d10 Author: Tim RowleyDate: Tue Mar 21 15:32:34 2017 -0500 swr: [rasterizer core] Enable SIMD16 Make the AVX512 insert/extract intrinsics KNL-compatible Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/common/simd16intrin.h | 14 +++--- src/gallium/drivers/swr/rasterizer/core/knobs.h | 2 +- src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 12 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h index 88814a58aa..3b43d510e6 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h @@ -853,10 +853,10 @@ INLINE simd16scalari _simd16_set_epi32(int e7, int e6, int e5, int e4, int e3, i #define _simd16_broadcast_ps(m) _mm512_extload_ps(m, _MM_UPCONV_PS_NONE, _MM_BROADCAST_4X16, 0) #define _simd16_store_ps_mm512_store_ps #define _simd16_store_si_mm512_store_si512 -#define _simd16_extract_ps _mm512_extractf32x8_ps -#define _simd16_extract_si _mm512_extracti32x8_epi32 -#define _simd16_insert_ps _mm512_insertf32x8 -#define _simd16_insert_si _mm512_inserti32x8 +#define _simd16_extract_ps(a, imm8) _mm256_castsi256_ps(_mm512_extracti64x4_epi64(_mm512_castps_si512(a), imm8)) +#define _simd16_extract_si _mm512_extracti64x4_epi64 +#define _simd16_insert_ps(a, b, imm8) _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castps_si512(a), _mm256_castps_si256(b), imm8)) +#define _simd16_insert_si _mm512_inserti64x4 INLINE void _simd16_maskstore_ps(float *m, simd16scalari mask, simd16scalar a) { @@ -871,21 +871,21 @@ INLINE simd16scalar _simd16_blendv_ps(simd16scalar a, simd16scalar b, const simd { simd16mask k = _simd16_scalari2mask(_mm512_castps_si512(mask)); -_mm512_mask_blend_ps(k, a, b); +return _mm512_mask_blend_ps(k, a, b); } INLINE simd16scalari _simd16_blendv_epi32(simd16scalari a, simd16scalari b, const simd16scalar mask) { simd16mask k = _simd16_scalari2mask(_mm512_castps_si512(mask)); -_mm512_mask_blend_epi32(k, a, b); +return _mm512_mask_blend_epi32(k, a, b); } INLINE simd16scalari _simd16_blendv_epi32(simd16scalari a, simd16scalari b, const simd16scalari mask) { simd16mask k = _simd16_scalari2mask(mask); -_mm512_mask_blend_epi32(k, a, b); +return _mm512_mask_blend_epi32(k, a, b); } #define _simd16_mul_ps _mm512_mul_ps diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h index 8e54f90526..7928f5d6d7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h @@ -38,7 +38,7 @@ // AVX512 Support /// -#define ENABLE_AVX512_SIMD160 +#define ENABLE_AVX512_SIMD161 #define USE_8x2_TILE_BACKEND0 #define USE_SIMD16_FRONTEND 0 diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp index 297f23a88c..511a1fc0df 100644 --- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp @@ -1297,7 +1297,19 @@ bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]) { +#if USE_SIMD16_FRONTEND const simd16vector = pa.leadingVertex.attrib[slot]; +#else +simd16vector a; + +{ +for (uint32_t i = 0; i < 4; i += 1) +{ +a[i] = _simd16_insert_ps(_simd16_setzero_ps(), pa.leadingVertex.attrib[slot][i], 0); +} +} + +#endif const simd16vector = PaGetSimdVector_simd16(pa, pa.prev, slot); const simd16vector = PaGetSimdVector_simd16(pa, pa.cur, slot); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer jitter] Clean up EngineBuilder construction
Module: Mesa Branch: master Commit: ec51e8ecfea9d81313192fcd25f9767f8203a9ca URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec51e8ecfea9d81313192fcd25f9767f8203a9ca Author: Tim RowleyDate: Mon Mar 20 19:44:49 2017 -0500 swr: [rasterizer jitter] Clean up EngineBuilder construction Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 79118f5f65..bdb8a52e2f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -106,16 +106,10 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) std::unique_ptr newModule(new Module(fnName.str(), mContext)); mpCurrentModule = newModule.get(); -auto & = EngineBuilder(std::move(newModule)); -EB.setTargetOptions(tOpts); -EB.setOptLevel(CodeGenOpt::Aggressive); - StringRef hostCPUName; hostCPUName = sys::getHostCPUName(); -EB.setMCPU(hostCPUName); - #if defined(_WIN32) // Needed for MCJIT on windows Triple hostTriple(sys::getProcessTriple()); @@ -123,7 +117,11 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) mpCurrentModule->setTargetTriple(hostTriple.getTriple()); #endif // _WIN32 -mpExec = EB.create(); +mpExec = EngineBuilder(std::move(newModule)) +.setTargetOptions(tOpts) +.setOptLevel(CodeGenOpt::Aggressive) +.setMCPU(hostCPUName) +.create(); #if LLVM_USE_INTEL_JITEVENTS JITEventListener *vTune = JITEventListener::createIntelJITEventListener(); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): swr: [rasterizer core] SIMD16 Frontend WIP
Module: Mesa Branch: master Commit: aee5276375d79f5d73680d6038a1fd838894679a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aee5276375d79f5d73680d6038a1fd838894679a Author: Tim RowleyDate: Wed Mar 22 12:36:49 2017 -0500 swr: [rasterizer core] SIMD16 Frontend WIP Implement widened clipper and binner interfaces for SIMD16. Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/core/api.cpp| 24 src/gallium/drivers/swr/rasterizer/core/binner.cpp | 154 + src/gallium/drivers/swr/rasterizer/core/clip.cpp | 131 ++ src/gallium/drivers/swr/rasterizer/core/clip.h | 6 + src/gallium/drivers/swr/rasterizer/core/context.h | 3 + .../drivers/swr/rasterizer/core/frontend.cpp | 115 +-- src/gallium/drivers/swr/rasterizer/core/frontend.h | 7 + src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 12 ++ 8 files changed, 371 insertions(+), 81 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index bd63796d13..dabd0616d3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -839,11 +839,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC) } PFN_PROCESS_PRIMS pfnBinner; +#if USE_SIMD16_FRONTEND +PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16; +#endif switch (pState->state.topology) { case TOP_POINT_LIST: pState->pfnProcessPrims = ClipPoints; pfnBinner = BinPoints; +#if USE_SIMD16_FRONTEND +pState->pfnProcessPrims_simd16 = ClipPoints_simd16; +pfnBinner_simd16 = BinPoints_simd16; +#endif break; case TOP_LINE_LIST: case TOP_LINE_STRIP: @@ -852,10 +859,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC) case TOP_LISTSTRIP_ADJ: pState->pfnProcessPrims = ClipLines; pfnBinner = BinLines; +#if USE_SIMD16_FRONTEND +pState->pfnProcessPrims_simd16 = ClipLines_simd16; +pfnBinner_simd16 = BinLines_simd16; +#endif break; default: pState->pfnProcessPrims = ClipTriangles; pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0)); +#if USE_SIMD16_FRONTEND +pState->pfnProcessPrims_simd16 = ClipTriangles_simd16; +pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0)); +#endif break; }; @@ -864,6 +879,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC) if (pState->state.frontendState.vpTransformDisable) { pState->pfnProcessPrims = pfnBinner; +#if USE_SIMD16_FRONTEND +pState->pfnProcessPrims_simd16 = pfnBinner_simd16; +#endif } if ((pState->state.psState.pfnPixelShader == nullptr) && @@ -874,11 +892,17 @@ void SetupPipeline(DRAW_CONTEXT *pDC) (pState->state.backendState.numAttributes == 0)) { pState->pfnProcessPrims = nullptr; +#if USE_SIMD16_FRONTEND +pState->pfnProcessPrims_simd16 = nullptr; +#endif } if (pState->state.soState.rasterizerDisable == true) { pState->pfnProcessPrims = nullptr; +#if USE_SIMD16_FRONTEND +pState->pfnProcessPrims_simd16 = nullptr; +#endif } diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 490a86804f..63eab33ac0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -856,6 +856,58 @@ endBinTriangles: AR_END(FEBinTriangles, 1); } +#if USE_SIMD16_FRONTEND +inline uint32_t GetPrimMaskLo(uint32_t primMask) +{ +return primMask & 255; +} + +inline uint32_t GetPrimMaskHi(uint32_t primMask) +{ +return (primMask >> 8) & 255; +} + +template +void BinTriangles_simd16( +DRAW_CONTEXT *pDC, +PA_STATE& pa, +uint32_t workerId, +simd16vector tri[3], +uint32_t triMask, +simd16scalari primID, +simd16scalari viewportIdx) +{ +enum { VERTS_PER_PRIM = 3 }; + +simdvector verts[VERTS_PER_PRIM]; + +for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) +{ +for (uint32_t j = 0; j < 4; j += 1) +{ +verts[i][j] = _simd16_extract_ps(tri[i][j], 0); +} +} + +pa.useAlternateOffset = false; +BinTriangles(pDC, pa, workerId, verts, GetPrimMaskLo(triMask), _simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0)); + +if (GetPrimMaskHi(triMask)) +{ +for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1) +{ +for (uint32_t j = 0; j < 4; j += 1) +{ +verts[i][j] = _simd16_extract_ps(tri[i][j], 1); +} +} + +pa.useAlternateOffset = true; +BinTriangles(pDC, pa, workerId, verts, GetPrimMaskHi(triMask), _simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1)); +} +} + +#endif struct
Mesa (master): swr: [rasterizer codegen] add cmdline to archrast gen files
Module: Mesa Branch: master Commit: 89b83f4b1e8b51cc23f7a998b81b486cf39f8d86 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=89b83f4b1e8b51cc23f7a998b81b486cf39f8d86 Author: Tim RowleyDate: Mon Mar 20 17:39:41 2017 -0500 swr: [rasterizer codegen] add cmdline to archrast gen files Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py | 4 .../drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp| 3 +++ .../drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp| 3 +++ .../drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp | 5 - .../swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp | 5 - 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py index efe42bb8db..06a3dea4ad 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py @@ -155,6 +155,7 @@ def main(): output_fullpath = os.sep.join([output_dir, output_filename]) MakoTemplateWriter.to_file(template_file, output_fullpath, +cmdline=sys.argv, filename=output_filename, protos=protos) @@ -165,6 +166,7 @@ def main(): output_fullpath = os.sep.join([output_dir, output_filename]) MakoTemplateWriter.to_file(template_file, output_fullpath, +cmdline=sys.argv, filename=output_filename, protos=protos) @@ -175,6 +177,7 @@ def main(): output_fullpath = os.sep.join([output_dir, output_filename]) MakoTemplateWriter.to_file(template_file, output_fullpath, +cmdline=sys.argv, filename=output_filename, event_header='gen_ar_event.hpp', protos=protos) @@ -186,6 +189,7 @@ def main(): output_fullpath = os.sep.join([output_dir, output_filename]) MakoTemplateWriter.to_file(template_file, output_fullpath, +cmdline=sys.argv, filename=output_filename, event_header='gen_ar_eventhandler.hpp', protos=protos) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp index b743b2f3d2..d48fda61c2 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp @@ -26,6 +26,9 @@ * * DO NOT EDIT * +* Generation Command Line: +* ${'\n*'.join(cmdline)} +* **/ #include "common/os.h" #include "gen_ar_event.hpp" diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp index 68926ea805..e792f5f63e 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp @@ -25,6 +25,9 @@ * @brief Definitions for events. auto-generated file * * DO NOT EDIT +* +* Generation Command Line: +* ${'\n*'.join(cmdline)} * **/ #pragma once diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp index cfed2aded0..87d0ef47ca 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp @@ -25,7 +25,10 @@ * @brief Event handler interface. auto-generated file * * DO NOT EDIT -* +* +* Generation Command Line: +* ${'\n*'.join(cmdline)} +* **/ #pragma once diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp index 48ff0b0a95..3a618a124d 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp @@ -25,7 +25,10 @@ * @brief Event handler interface. auto-generated file * * DO NOT EDIT -* +* +* Generation Command Line: +* ${'\n*'.join(cmdline)} +* **/ #pragma once ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org
Mesa (master): tests/cache_test: allow crossing mount points
Module: Mesa Branch: master Commit: caa616ccc4384ea1479865e12b56cf816561a827 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=caa616ccc4384ea1479865e12b56cf816561a827 Author: Juan A. Suarez RomeroDate: Tue Mar 28 18:00:39 2017 +0200 tests/cache_test: allow crossing mount points When using an overlayfs system (like a Docker container), rmrf_local() fails because part of the files to be removed are in different mount points (layouts). And thus cache-test fails. Letting crossing mount points is not a big problem, specially because this is just for a test, not to be used in real code. Reviewed-by: Nicolai Hähnle --- src/compiler/glsl/tests/cache_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/tests/cache_test.c b/src/compiler/glsl/tests/cache_test.c index 537a81bc76..bec1d240e9 100644 --- a/src/compiler/glsl/tests/cache_test.c +++ b/src/compiler/glsl/tests/cache_test.c @@ -124,7 +124,7 @@ rmrf_local(const char *path) if (path == NULL || *path == '\0' || *path != '.') return -1; - return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS | FTW_MOUNT); + return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS); } static void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): 23 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0f9a0cb5f55b432f58c9adbb9b1c63c748d1dfd0 Author: Emil VelikovDate: Tue Feb 28 13:29:06 2017 + glcpp/tests/glcpp-test-cr-lf: error out if we cannot find any tests Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8096b75aa15fdda8433c2c8614ca0bf5de2c150 Author: Emil Velikov Date: Tue Feb 28 13:24:55 2017 + glcpp/tests/glcpp-test-cr-lf: correctly set/use srcdir/abs_builddir Otherwise manual invokation of the script from elsewhere than `dirname $0` will fail. With these all the artefacts should be created in the correct location, and thus we can remove the old (and slighly strange) clean-local line. Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf77cdce839a06097b5f995118261eb98285ffc7 Author: Emil Velikov Date: Tue Feb 28 12:13:58 2017 + glcpp/tests: update testname in help string Rather than hardcoding glcpp/other use `basename "$0"` which expands appropriatelly. Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4ea4fbf93a5a2229af3d48dc7fb23a43c90adb7f Author: Emil Velikov Date: Tue Feb 28 12:10:41 2017 + glcpp/tests/glcpp-test: error out if we cannot find any tests Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=182d48ceb9e58eb53b52436b2cd6010de072d29b Author: Emil Velikov Date: Tue Feb 28 12:08:52 2017 + glcpp/tests/glcpp-test: print only the test basename Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=addf62946d6c73885dd261099cddc06d3c910f17 Author: Emil Velikov Date: Tue Feb 28 12:02:35 2017 + glcpp/tests/glcpp-test: set srcdir/abs_builddir variables Current definitions work fine for the manual invokation of the script, although the whole script does not consider that one can run it OOT. The latter will be handled with latter patches, although it will be extensively using the two variables. Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee8aea35725e4b582ed8af2866d0feffa2d13c6e Author: Emil Velikov Date: Mon Feb 27 18:58:06 2017 + glsl/tests/optimization-test: 'echo' only folders which has generators The current "let's print any folder which exists" is simply confusing. Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79a95f19e64a1d2f855e3f8194b86dc0b2a78c3f Author: Emil Velikov Date: Mon Feb 27 18:56:38 2017 + glsl/tests/optimization-test: print only the test basedir/name The relative/absolute path brings little to no benefit in being printed as testname. Trim it out. Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=33cd136fa267a44931b8f0230c5d68259ebec2d5 Author: Emil Velikov Date: Sun Feb 26 20:43:05 2017 + glsl/tests/optimization-test: error if zero tests were executed We don't want to lie ourselves that 'everything is fine' when no tests were found/ran. Signed-off-by: Emil Velikov Acked-by: Kenneth Graunke Reviewed-by: Eric Engestrom URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=421115a72939b7dbcdc9f714d85f3e7616323a3e Author: Emil Velikov Date: Sun Feb
Mesa (master): st/va: remove assert for single slice
Module: Mesa Branch: master Commit: 3472be2bfd8b9cbc931342cc99d0e1abdc48350b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3472be2bfd8b9cbc931342cc99d0e1abdc48350b Author: Nayan DeshmukhDate: Tue Mar 21 14:02:27 2017 +0530 st/va: remove assert for single slice we anyway allow for multiple slices v2: do not remove assert to check for buf->size Signed-off-by: Nayan Deshmukh Reviewed-by: Christian König --- src/gallium/state_trackers/va/picture_mpeg12.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/va/picture_mpeg12.c b/src/gallium/state_trackers/va/picture_mpeg12.c index 812e9e5b2a..1e5a9c7428 100644 --- a/src/gallium/state_trackers/va/picture_mpeg12.c +++ b/src/gallium/state_trackers/va/picture_mpeg12.c @@ -81,6 +81,6 @@ void vlVaHandleIQMatrixBufferMPEG12(vlVaContext *context, vlVaBuffer *buf) void vlVaHandleSliceParameterBufferMPEG12(vlVaContext *context, vlVaBuffer *buf) { - assert(buf->size >= sizeof(VASliceParameterBufferMPEG2) && buf->num_elements == 1); + assert(buf->size >= sizeof(VASliceParameterBufferMPEG2)); context->desc.mpeg12.num_slices += buf->num_elements; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: CP DMA clear supports unaligned destination addresses
Module: Mesa Branch: master Commit: f0d9af772e0fbb5854fc8293186a70ea3721748a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0d9af772e0fbb5854fc8293186a70ea3721748a Author: Nicolai HähnleDate: Mon Feb 13 13:08:52 2017 +0100 radeonsi: CP DMA clear supports unaligned destination addresses Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index b40f5cc587..0cf7b3b3cb 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -197,7 +197,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, offset + size); /* Fallback for unaligned clears. */ - if (offset % 4 != 0 || size % 4 != 0) { + if (size % 4 != 0) { uint8_t *map = r600_buffer_map_sync_with_rings(>b, rdst, PIPE_TRANSFER_WRITE); map += offset; @@ -211,6 +211,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* dma_clear_buffer can use clear_buffer on failure. Make sure that * doesn't happen. We don't want an infinite recursion: */ if (sctx->b.dma.cs && + (offset % 4 == 0) && /* CP DMA is very slow. Always use SDMA for big clears. This * alone improves DeusEx:MD performance by 70%. */ (size > 128 * 1024 || ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: use DMA for clears with unaligned size
Module: Mesa Branch: master Commit: 21ba6543be0c979c5f5ae10e2623ba697292dccc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21ba6543be0c979c5f5ae10e2623ba697292dccc Author: Nicolai HähnleDate: Mon Feb 13 13:19:45 2017 +0100 radeonsi: use DMA for clears with unaligned size Only a small tail needs to be uploaded manually. This is only partly a performance measure (apps are expected to use aligned access). Mostly it is preparation for sparse buffers, which the old code would incorrectly have attempted to map directly. Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_cp_dma.c | 46 +++- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 0cf7b3b3cb..812fcbc2b2 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -185,28 +185,19 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, struct r600_resource *rdst = r600_resource(dst); unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher); unsigned flush_flags = get_flush_flags(sctx, coher); + uint64_t dma_clear_size; bool is_first = true; if (!size) return; + dma_clear_size = size & ~3llu; + /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ util_range_add(>valid_buffer_range, offset, - offset + size); - - /* Fallback for unaligned clears. */ - if (size % 4 != 0) { - uint8_t *map = r600_buffer_map_sync_with_rings(>b, rdst, - PIPE_TRANSFER_WRITE); - map += offset; - for (uint64_t i = 0; i < size; i++) { - unsigned byte_within_dword = (offset + i) % 4; - *map++ = (value >> (byte_within_dword * 8)) & 0xff; - } - return; - } + offset + dma_clear_size); /* dma_clear_buffer can use clear_buffer on failure. Make sure that * doesn't happen. We don't want an infinite recursion: */ @@ -223,25 +214,31 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, * of them are moved to SDMA thanks to this. */ !ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf, RADEON_USAGE_READWRITE))) { - sctx->b.dma_clear_buffer(ctx, dst, offset, size, value); - } else { + sctx->b.dma_clear_buffer(ctx, dst, offset, dma_clear_size, value); + + offset += dma_clear_size; + size -= dma_clear_size; + } else if (dma_clear_size >= 4) { uint64_t va = rdst->gpu_address + offset; + offset += dma_clear_size; + size -= dma_clear_size; + /* Flush the caches. */ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags; - while (size) { - unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); + while (dma_clear_size) { + unsigned byte_count = MIN2(dma_clear_size, CP_DMA_MAX_BYTE_COUNT); unsigned dma_flags = tc_l2_flag | CP_DMA_CLEAR; - si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, + si_cp_dma_prepare(sctx, dst, NULL, byte_count, dma_clear_size, 0, _first, _flags); /* Emit the clear packet. */ si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, coher); - size -= byte_count; + dma_clear_size -= byte_count; va += byte_count; } @@ -252,6 +249,17 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, if (coher == R600_COHERENCY_SHADER) sctx->b.num_cp_dma_calls++; } + + if (size) { + /* Handle non-dword alignment. +* +* This function is called for embedded texture metadata clears, +* but those should always be properly aligned. */ + assert(dst->target == PIPE_BUFFER); + assert(size < 4); + + pipe_buffer_write(ctx, dst, offset, size, ); + } } /** ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: remove the early-out for SDMA in si_clear_buffer
Module: Mesa Branch: master Commit: d9014952f5ca10a5292df3bb8c4bf1b7ccaed240 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9014952f5ca10a5292df3bb8c4bf1b7ccaed240 Author: Nicolai HähnleDate: Mon Feb 13 12:51:36 2017 +0100 radeonsi: remove the early-out for SDMA in si_clear_buffer This allows the next patches to be simple while still being able to make use of SDMA even in some unusual cases. Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_cp_dma.c | 43 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 1be7586d16..b40f5cc587 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -223,35 +223,34 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, !ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf, RADEON_USAGE_READWRITE))) { sctx->b.dma_clear_buffer(ctx, dst, offset, size, value); - return; - } - - uint64_t va = rdst->gpu_address + offset; + } else { + uint64_t va = rdst->gpu_address + offset; - /* Flush the caches. */ - sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | -SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags; + /* Flush the caches. */ + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | +SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags; - while (size) { - unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); - unsigned dma_flags = tc_l2_flag | CP_DMA_CLEAR; + while (size) { + unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); + unsigned dma_flags = tc_l2_flag | CP_DMA_CLEAR; - si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, - _first, _flags); + si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, + _first, _flags); - /* Emit the clear packet. */ - si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, coher); + /* Emit the clear packet. */ + si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, coher); - size -= byte_count; - va += byte_count; - } + size -= byte_count; + va += byte_count; + } - if (tc_l2_flag) - rdst->TC_L2_dirty = true; + if (tc_l2_flag) + rdst->TC_L2_dirty = true; - /* If it's not a framebuffer fast clear... */ - if (coher == R600_COHERENCY_SHADER) - sctx->b.num_cp_dma_calls++; + /* If it's not a framebuffer fast clear... */ + if (coher == R600_COHERENCY_SHADER) + sctx->b.num_cp_dma_calls++; + } } /** ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: rework vertex/export shader output handling
Module: Mesa Branch: master Commit: 931a8d0c9a15df462f14ab40f9ae31c8ecf75376 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=931a8d0c9a15df462f14ab40f9ae31c8ecf75376 Author: Dave AirlieDate: Tue Mar 28 06:13:09 2017 +1000 radv: rework vertex/export shader output handling In order to faciliate adding tess support, split the vs/es output info into a separate block, so we make it easier to have the tess shaders export the same info. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 54 + src/amd/common/ac_nir_to_llvm.h | 30 --- src/amd/vulkan/radv_cmd_buffer.c | 64 ++-- src/amd/vulkan/radv_pipeline.c | 8 ++--- 4 files changed, 86 insertions(+), 70 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6e36c192c3..cfbdeae1a3 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4228,11 +4228,11 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx, int length = glsl_get_length(variable->type); if (idx == VARYING_SLOT_CLIP_DIST0) { if (ctx->stage == MESA_SHADER_VERTEX) - ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1; + ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1; ctx->num_output_clips = length; } else if (idx == VARYING_SLOT_CULL_DIST0) { if (ctx->stage == MESA_SHADER_VERTEX) - ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1; + ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1; ctx->num_output_culls = length; } if (length > 4) @@ -4448,7 +4448,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, } static void -handle_vs_outputs_post(struct nir_to_llvm_context *ctx) +handle_vs_outputs_post(struct nir_to_llvm_context *ctx, + struct ac_vs_output_info *outinfo) { uint32_t param_count = 0; unsigned target; @@ -4461,14 +4462,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) (1ull << VARYING_SLOT_CULL_DIST0) | (1ull << VARYING_SLOT_CULL_DIST1)); - ctx->shader_info->vs.prim_id_output = 0x; - ctx->shader_info->vs.layer_output = 0x; + outinfo->prim_id_output = 0x; + outinfo->layer_output = 0x; if (clip_mask) { LLVMValueRef slots[8]; unsigned j; - if (ctx->shader_info->vs.cull_dist_mask) - ctx->shader_info->vs.cull_dist_mask <<= ctx->num_output_clips; + if (outinfo->cull_dist_mask) + outinfo->cull_dist_mask <<= ctx->num_output_clips; i = VARYING_SLOT_CLIP_DIST0; for (j = 0; j < ctx->num_output_clips; j++) @@ -4513,25 +4514,25 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) i == VARYING_SLOT_CULL_DIST1) { continue; } else if (i == VARYING_SLOT_PSIZ) { - ctx->shader_info->vs.writes_pointsize = true; + outinfo->writes_pointsize = true; psize_value = values[0]; continue; } else if (i == VARYING_SLOT_LAYER) { - ctx->shader_info->vs.writes_layer = true; + outinfo->writes_layer = true; layer_value = values[0]; - ctx->shader_info->vs.layer_output = param_count; + outinfo->layer_output = param_count; target = V_008DFC_SQ_EXP_PARAM + param_count; param_count++; } else if (i == VARYING_SLOT_VIEWPORT) { - ctx->shader_info->vs.writes_viewport_index = true; + outinfo->writes_viewport_index = true; viewport_index_value = values[0]; continue; } else if (i == VARYING_SLOT_PRIMITIVE_ID) { - ctx->shader_info->vs.prim_id_output = param_count; + outinfo->prim_id_output = param_count; target = V_008DFC_SQ_EXP_PARAM + param_count; param_count++; } else if (i >= VARYING_SLOT_VAR0) { - ctx->shader_info->vs.export_mask |= 1u <<
Mesa (master): radv: move shader_z_format calculation to pipeline.
Module: Mesa Branch: master Commit: 4b467c759ea1e9d5960a5e668a166f33ef03e9d6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b467c759ea1e9d5960a5e668a166f33ef03e9d6 Author: Dave AirlieDate: Tue Mar 28 11:34:46 2017 +1000 radv: move shader_z_format calculation to pipeline. No need to recalculate this every time. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 5 + src/amd/vulkan/radv_pipeline.c | 6 ++ src/amd/vulkan/radv_private.h| 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8e35dc5299..c3b141ea3a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -709,10 +709,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT, - ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR : - ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR : - ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : - V_028710_SPI_SHADER_ZERO); + pipeline->graphics.shader_z_format); radeon_set_context_reg(cmd_buffer->cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 42e8abd84e..550b773e9a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1666,6 +1666,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline, S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) | S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory); + pipeline->graphics.shader_z_format = + ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR : + ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR : + ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : + V_028710_SPI_SHADER_ZERO; + const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8e45e95b77..dff0aef832 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -956,6 +956,7 @@ struct radv_pipeline { struct radv_raster_state raster; struct radv_multisample_state ms; uint32_t db_shader_control; + uint32_t shader_z_format; unsigned prim; unsigned gs_out; uint32_t vgt_gs_mode; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: move db_shader_control calculation to pipeline.
Module: Mesa Branch: master Commit: 8996fdbf61e5341c321c802278ee388ac5001f50 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8996fdbf61e5341c321c802278ee388ac5001f50 Author: Dave AirlieDate: Tue Mar 28 11:34:19 2017 +1000 radv: move db_shader_control calculation to pipeline. There is no need to recalculate this every time. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 16 +--- src/amd/vulkan/radv_pipeline.c | 19 ++- src/amd/vulkan/radv_private.h| 1 + 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 195a82fef5..8e35dc5299 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -674,7 +674,6 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); struct radv_blend_state *blend = >graphics.blend; unsigned ps_offset = 0; - unsigned z_order; struct ac_vs_output_info *outinfo; assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); @@ -692,21 +691,8 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, ps->rsrc1); radeon_emit(cmd_buffer->cs, ps->rsrc2); - if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory) - z_order = V_02880C_EARLY_Z_THEN_LATE_Z; - else - z_order = V_02880C_LATE_Z; - - radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, - S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) | - S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) | - S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) | - S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) | - S_02880C_Z_ORDER(z_order) | - S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) | - S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) | - S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory)); + pipeline->graphics.db_shader_control); radeon_set_context_reg(cmd_buffer->cs, R_0286CC_SPI_PS_INPUT_ENA, ps->config.spi_ps_input_ena); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 752986a9c5..42e8abd84e 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1641,14 +1641,31 @@ radv_pipeline_init(struct radv_pipeline *pipeline, * * Don't add this to CB_SHADER_MASK. */ + struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; if (!pipeline->graphics.blend.spi_shader_col_format) { - struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; if (!ps->info.fs.writes_z && !ps->info.fs.writes_stencil && !ps->info.fs.writes_sample_mask) pipeline->graphics.blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R; } + unsigned z_order; + pipeline->graphics.db_shader_control = 0; + if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory) + z_order = V_02880C_EARLY_Z_THEN_LATE_Z; + else + z_order = V_02880C_LATE_Z; + + pipeline->graphics.db_shader_control = + S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) | + S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) | + S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) | + S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) | + S_02880C_Z_ORDER(z_order) | + S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) | + S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) | + S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory); + const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index dcd738a54f..8e45e95b77 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -955,6 +955,7 @@ struct radv_pipeline { struct radv_depth_stencil_state ds; struct radv_raster_state raster; struct radv_multisample_state ms; + uint32_t db_shader_control; unsigned prim;
Mesa (master): radv: move shader stages calculation to pipeline.
Module: Mesa Branch: master Commit: 239a9224a33d280cd5703c29ce6eb9df2eab9b3d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=239a9224a33d280cd5703c29ce6eb9df2eab9b3d Author: Dave AirlieDate: Tue Mar 28 12:59:17 2017 +1000 radv: move shader stages calculation to pipeline. With tess this becomes a bit more complex. so move to pipeline for now. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 9 + src/amd/vulkan/radv_pipeline.c | 9 - src/amd/vulkan/radv_private.h| 1 + 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 04c28d6a29..e994df65fd 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1286,14 +1286,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, } if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) { - uint32_t stages = 0; - - if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | - S_028B54_GS_EN(1) | - S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); - - radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, stages); + radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en); if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 252808d7a7..07020e8c38 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1771,7 +1771,14 @@ radv_pipeline_init(struct radv_pipeline *pipeline, calculate_pa_cl_vs_out_cntl(pipeline); calculate_ps_inputs(pipeline); - + + uint32_t stages = 0; + if (radv_pipeline_has_gs(pipeline)) + stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | + S_028B54_GS_EN(1) | + S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); + pipeline->graphics.vgt_shader_stages_en = stages; + const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 0b8c86df79..f587ee3ffd 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -965,6 +965,7 @@ struct radv_pipeline { uint32_t ps_input_cntl[32]; uint32_t ps_input_cntl_num; uint32_t pa_cl_vs_out_cntl; + uint32_t vgt_shader_stages_en; struct radv_prim_vertex_count prim_vertex_count; } graphics; }; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: move vgt_gs_mode value to pipeline.
Module: Mesa Branch: master Commit: cd33a5c1cb68d8c7e67f4724cc19bb92a405c796 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd33a5c1cb68d8c7e67f4724cc19bb92a405c796 Author: Dave AirlieDate: Tue Mar 28 11:33:35 2017 +1000 radv: move vgt_gs_mode value to pipeline. No need to recalculate this everytime. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 29 +++-- src/amd/vulkan/radv_pipeline.c | 27 ++- src/amd/vulkan/radv_private.h| 1 + 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ce34204b8a..195a82fef5 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -599,27 +599,6 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0); } -static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs) -{ - unsigned gs_max_vert_out = gs->info.gs.vertices_out; - unsigned cut_mode; - - if (gs_max_vert_out <= 128) { - cut_mode = V_028A40_GS_CUT_128; - } else if (gs_max_vert_out <= 256) { - cut_mode = V_028A40_GS_CUT_256; - } else if (gs_max_vert_out <= 512) { - cut_mode = V_028A40_GS_CUT_512; - } else { - assert(gs_max_vert_out <= 1024); - cut_mode = V_028A40_GS_CUT_1024; - } - - return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | - S_028A40_CUT_MODE(cut_mode)| - S_028A40_ES_WRITE_OPTIMIZE(1) | - S_028A40_GS_WRITE_OPTIMIZE(1); -} static void radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, @@ -629,13 +608,11 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *gs; uint64_t va; + radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, pipeline->graphics.vgt_gs_mode); + gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; - if (!gs) { - radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 0); + if (!gs) return; - } - - radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs)); uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 2c710f4eb8..752986a9c5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1505,6 +1505,28 @@ static const struct radv_prim_vertex_count prim_size_table[] = { [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0}, }; +static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs) +{ + unsigned gs_max_vert_out = gs->info.gs.vertices_out; + unsigned cut_mode; + + if (gs_max_vert_out <= 128) { + cut_mode = V_028A40_GS_CUT_128; + } else if (gs_max_vert_out <= 256) { + cut_mode = V_028A40_GS_CUT_256; + } else if (gs_max_vert_out <= 512) { + cut_mode = V_028A40_GS_CUT_512; + } else { + assert(gs_max_vert_out <= 1024); + cut_mode = V_028A40_GS_CUT_1024; + } + + return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | + S_028A40_CUT_MODE(cut_mode)| + S_028A40_ES_WRITE_OPTIMIZE(1) | + S_028A40_GS_WRITE_OPTIMIZE(1); +} + VkResult radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, @@ -1559,7 +1581,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline, pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY); calculate_gs_ring_sizes(pipeline); - } + + pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]); + } else + pipeline->graphics.vgt_gs_mode = 0; if (!modules[MESA_SHADER_FRAGMENT]) { nir_builder fs_b; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 433cba7d28..dcd738a54f 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -957,6 +957,7 @@ struct radv_pipeline { struct radv_multisample_state ms; unsigned prim; unsigned gs_out; + uint32_t vgt_gs_mode; bool prim_restart_enable; unsigned esgs_ring_size; unsigned gsvs_ring_size; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: move calculating fragment shader i/os to pipeline.
Module: Mesa Branch: master Commit: 92e9c14a6a8d536404ef5b41217662bb2286d946 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=92e9c14a6a8d536404ef5b41217662bb2286d946 Author: Dave AirlieDate: Tue Mar 28 11:43:48 2017 +1000 radv: move calculating fragment shader i/os to pipeline. There is no need to calculate this on each command submit. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 67 +++-- src/amd/vulkan/radv_pipeline.c | 71 src/amd/vulkan/radv_private.h| 2 ++ 3 files changed, 77 insertions(+), 63 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c3b141ea3a..92e68efa86 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -669,18 +669,13 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radv_shader_variant *ps, *vs; + struct radv_shader_variant *ps; uint64_t va; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); struct radv_blend_state *blend = >graphics.blend; - unsigned ps_offset = 0; - struct ac_vs_output_info *outinfo; assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX]; - - outinfo = >info.vs.outinfo; va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); @@ -716,63 +711,9 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); - if (ps->info.fs.has_pcoord) { - unsigned val; - val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ps_offset++; - } - - if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0x)) { - unsigned vs_offset, flat_shade; - unsigned val; - vs_offset = outinfo->prim_id_output; - flat_shade = true; - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ++ps_offset; - } - - if (ps->info.fs.layer_input && (outinfo->layer_output != 0x)) { - unsigned vs_offset, flat_shade; - unsigned val; - vs_offset = outinfo->layer_output; - flat_shade = true; - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ++ps_offset; - } - - for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { - unsigned vs_offset, flat_shade; - unsigned val; - - if (!(ps->info.fs.input_mask & (1u << i))) - continue; - - - if (!(outinfo->export_mask & (1u << i))) { - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, - S_028644_OFFSET(0x20)); - ++ps_offset; - continue; - } - - vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1)); - if (outinfo->prim_id_output != 0x) { - if (vs_offset >= outinfo->prim_id_output) - vs_offset++; - } - if (outinfo->layer_output != 0x) { - if (vs_offset >= outinfo->layer_output) - vs_offset++; - } - flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); - - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ++ps_offset; - } + radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); + for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) + radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); } static
Mesa (master): radv: handle NULL multisample state.
Module: Mesa Branch: master Commit: a8b8e542c2e9ea97413095993cee5ec8faf2ee16 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a8b8e542c2e9ea97413095993cee5ec8faf2ee16 Author: Dave AirlieDate: Tue Mar 28 05:48:27 2017 +1000 radv: handle NULL multisample state. If rasterization is disabled, we can get a NULL multisample state. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_pipeline.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 45277b94fa..1becb65055 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1163,10 +1163,13 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, int ps_iter_samples = 1; uint32_t mask = 0x; - ms->num_samples = vkms->rasterizationSamples; + if (vkms) + ms->num_samples = vkms->rasterizationSamples; + else + ms->num_samples = 1; if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) { - ps_iter_samples = vkms->rasterizationSamples; + ps_iter_samples = ms->num_samples; } ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); @@ -1184,8 +1187,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); - if (vkms->rasterizationSamples > 1) { - unsigned log_samples = util_logbase2(vkms->rasterizationSamples); + if (ms->num_samples > 1) { + unsigned log_samples = util_logbase2(ms->num_samples); unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples)); ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1); ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */ @@ -1199,11 +1202,12 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, ms->pa_sc_mode_cntl_1 |= EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); } - if (vkms->alphaToCoverageEnable) - blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); + if (vkms) { + if (vkms->alphaToCoverageEnable) + blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); - if (vkms->pSampleMask) { - mask = vkms->pSampleMask[0] & 0x; + if (vkms->pSampleMask) + mask = vkms->pSampleMask[0] & 0x; } ms->pa_sc_aa_mask[0] = mask | (mask << 16); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: fix ia_multi_vgt_param for instanced vs indirect draw.
Module: Mesa Branch: master Commit: ae0551b4b3f7ca79148f0cb8384c0f1efc3faac2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae0551b4b3f7ca79148f0cb8384c0f1efc3faac2 Author: Dave AirlieDate: Tue Mar 28 05:53:50 2017 +1000 radv: fix ia_multi_vgt_param for instanced vs indirect draw. The logic was different than radeonsi, fix it up before adding tess support. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 13 +++-- src/amd/vulkan/radv_private.h| 3 ++- src/amd/vulkan/si_cmd_buffer.c | 12 ++-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 1b13ae7bc6..eb2a7b0dde 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1317,7 +1317,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, } static void -radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool instanced_or_indirect_draw, +radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, + bool instanced_draw, bool indirect_draw, uint32_t draw_vertex_count) { struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; @@ -1382,7 +1383,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool instanced_o if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR)) radv_emit_scissor(cmd_buffer); - ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_or_indirect_draw, draw_vertex_count); + ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, draw_vertex_count); if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) { if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); @@ -2296,7 +2297,7 @@ void radv_CmdDraw( { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), vertexCount); + radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), false, vertexCount); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); @@ -2347,7 +2348,7 @@ void radv_CmdDrawIndexed( uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), indexCount); + radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), false, indexCount); radv_emit_primitive_reset_index(cmd_buffer); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); @@ -2445,7 +2446,7 @@ radv_cmd_draw_indirect_count(VkCommandBuffer command uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer, true, 0); + radv_cmd_buffer_flush_state(cmd_buffer, false, true, 0); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); @@ -2470,7 +2471,7 @@ radv_cmd_draw_indexed_indirect_count( int index_size = cmd_buffer->state.index_type ? 4 : 2; uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer, true, 0); + radv_cmd_buffer_flush_state(cmd_buffer, false, true, 0); radv_emit_primitive_reset_index(cmd_buffer); index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index e4654bb4d4..433cba7d28 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -759,7 +759,8 @@ void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp, void si_write_scissors(struct radeon_winsys_cs *cs, int first, int count, const VkRect2D *scissors); uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, - bool instanced_or_indirect_draw, uint32_t draw_vertex_count); + bool instanced_draw, bool indirect_draw, + uint32_t draw_vertex_count); void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, enum chip_class chip_class, bool is_mec,
Mesa (master): radv: add parameter to emit_waitcnt.
Module: Mesa Branch: master Commit: d43691ce775ed7bd525b5d195cc6e17b7c15574e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d43691ce775ed7bd525b5d195cc6e17b7c15574e Author: Dave AirlieDate: Tue Mar 28 08:46:35 2017 +1000 radv: add parameter to emit_waitcnt. This is just a precursor for tess support, which needs to pass different values here. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/common/ac_nir_to_llvm.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index cfbdeae1a3..5a25487a30 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2818,10 +2818,15 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, return res; } -static void emit_waitcnt(struct nir_to_llvm_context *ctx) +#define NOOP_WAITCNT 0xf7f +#define LGKM_CNT 0x07f +#define VM_CNT 0xf70 + +static void emit_waitcnt(struct nir_to_llvm_context *ctx, +unsigned simm16) { LLVMValueRef args[1] = { - LLVMConstInt(ctx->i32, 0xf70, false), + LLVMConstInt(ctx->i32, simm16, false), }; ac_build_intrinsic(>ac, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); @@ -3297,7 +3302,7 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, emit_discard_if(ctx, instr); break; case nir_intrinsic_memory_barrier: - emit_waitcnt(ctx); + emit_waitcnt(ctx, VM_CNT); break; case nir_intrinsic_barrier: emit_barrier(ctx); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: move pa_cl_vs_out_cntl calculation to pipeline
Module: Mesa Branch: master Commit: 0232ea8025d3da65295c0af1b8f4ca8fc97a74dd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0232ea8025d3da65295c0af1b8f4ca8fc97a74dd Author: Dave AirlieDate: Tue Mar 28 11:48:38 2017 +1000 radv: move pa_cl_vs_out_cntl calculation to pipeline This also takes the side band setting code from radeonsi. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 16 +--- src/amd/vulkan/radv_pipeline.c | 31 ++- src/amd/vulkan/radv_private.h| 2 +- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 92e68efa86..04c28d6a29 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -540,23 +540,9 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); - unsigned clip_dist_mask, cull_dist_mask, total_mask; - clip_dist_mask = outinfo->clip_dist_mask; - cull_dist_mask = outinfo->cull_dist_mask; - total_mask = clip_dist_mask | cull_dist_mask; radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL, - S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | - S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | - S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | - S_02881C_VS_OUT_MISC_VEC_ENA(outinfo->writes_pointsize || - outinfo->writes_layer || - outinfo->writes_viewport_index) | - S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | - pipeline->graphics.raster.pa_cl_vs_out_cntl | - cull_dist_mask << 8 | - clip_dist_mask); + pipeline->graphics.pa_cl_vs_out_cntl); radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF, S_028AB4_REUSE_OFF(outinfo->writes_viewport_index)); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index c7d74805a2..252808d7a7 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1126,7 +1126,7 @@ radv_pipeline_init_raster_state(struct radv_pipeline *pipeline, S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | S_0286D4_PNT_SPRITE_TOP_1(0); // vulkan is top to bottom - 1.0 at bottom - raster->pa_cl_vs_out_cntl = S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1); + raster->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) | S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions. S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | @@ -1527,6 +1527,33 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs) S_028A40_GS_WRITE_OPTIMIZE(1); } +static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *vs; + vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX]; + + struct ac_vs_output_info *outinfo = >info.vs.outinfo; + + unsigned clip_dist_mask, cull_dist_mask, total_mask; + clip_dist_mask = outinfo->clip_dist_mask; + cull_dist_mask = outinfo->cull_dist_mask; + total_mask = clip_dist_mask | cull_dist_mask; + + bool misc_vec_ena = outinfo->writes_pointsize || + outinfo->writes_layer || + outinfo->writes_viewport_index; + pipeline->graphics.pa_cl_vs_out_cntl = + S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | + S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | + S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | + S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | + S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | + cull_dist_mask << 8 | + clip_dist_mask; + +} static void calculate_ps_inputs(struct radv_pipeline *pipeline) { struct radv_shader_variant *ps, *vs; @@ -1742,7 +1769,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline, ps->info.fs.writes_z ?