Re: [Mesa-dev] [PATCH 2/2] r600g/compute: Decrement map_count when unmapping items
On Thu, Aug 07, 2014 at 12:14:24PM +0200, Bruno Jiménez wrote: This patch adds a new struct: r600_transfer_global. It will act as a wrapper around an r600_resource_global and an r600_transfer. It will be used for calling r600_compute_global_transfer_unmap when transfer_unmap is called. And at the same time, keep all the transfer information, so we can call r600_buffer_transfer_unmap with the 'real' transfer. --- src/gallium/drivers/r600/evergreen_compute.c | 46 +--- src/gallium/drivers/r600/evergreen_compute.h | 5 +++ 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index f50f94a..ac72256 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -970,10 +970,16 @@ void *r600_compute_global_transfer_map( struct r600_resource_global* buffer = (struct r600_resource_global*)resource; + struct r600_transfer_global *trans = NULL; + uint8_t *data; + struct compute_memory_item *item = buffer-chunk; struct pipe_resource *dst = NULL; unsigned offset = box-x; + trans = CALLOC(1, sizeof(struct r600_transfer_global)); + trans-resource = resource; + if (is_item_in_pool(item)) { compute_memory_demote_item(pool, item, ctx_); } @@ -1004,8 +1010,11 @@ void *r600_compute_global_transfer_map( assert(box-z == 0); ///TODO: do it better, mapping is not possible if the pool is too big - return pipe_buffer_map_range(ctx_, dst, - offset, box-width, usage, ptransfer); + data = pipe_buffer_map_range(ctx_, dst, + offset, box-width, usage, trans-ptransfer); + + *ptransfer = (struct pipe_transfer *)trans; + return data; } void r600_compute_global_transfer_unmap( @@ -1013,16 +1022,31 @@ void r600_compute_global_transfer_unmap( struct pipe_transfer* transfer) { /* struct r600_resource_global are not real resources, they just map - * to an offset within the compute memory pool. The function - * r600_compute_global_transfer_map() maps the memory pool - * resource rather than the struct r600_resource_global passed to - * it as an argument and then initalizes ptransfer-resource with - * the memory pool resource (via pipe_buffer_map_range). - * When transfer_unmap is called it uses the memory pool's - * vtable which calls r600_buffer_transfer_map() rather than - * this function. + * to an offset within the compute memory pool. The function + * r600_compute_global_transfer_map() creates a struct + * r600_transfer_global, which has as resource an r600_global_resource + * and an r600_transfer which will act as the 'real' pipe_transfer + * that will be passed to pipe_buffer_map_range. + * + * This allows us to use an r600_resource_global vtable when transfer_unmap + * is called, and still have the full information about the transfer, + * which will be used to actually unmap the resource. */ - assert (!This function should not be called); + + struct r600_context *rctx = (struct r600_context *)ctx_; + struct r600_transfer_global *trans = + (struct r600_transfer_global *)transfer; + struct r600_resource_global *buffer = + (struct r600_resource_global *)trans-resource; + struct compute_memory_item *item = buffer-chunk; + + COMPUTE_DBG(rctx-screen, * r600_compute_global_transfer_unmap()\n + Unmaping Buffer: %u\n, item-id); + + ctx_-transfer_unmap(ctx_, trans-ptransfer); + item-map_count--; + + FREE(trans); } void r600_compute_global_transfer_flush_region( diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index 4fb53a1..842e5e4 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -38,6 +38,11 @@ struct r600_resource_global { struct compute_memory_item *chunk; }; +struct r600_transfer_global { + struct pipe_resource *resource; + struct pipe_transfer *ptransfer; This still looks wrong. ptransfer should be the first member, and it should not be a pointer. -Tom +}; + void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso); void evergreen_delete_compute_state(struct pipe_context *ctx, void *state); void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input); -- 2.0.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/r300: Fix a link error in the tests
On Tue, Aug 12, 2014 at 11:14:06AM -0700, Jason Ekstrand wrote: The link error occurs because the static libraries are linked in the wrong order. This fixes it. Reviewed-by: Tom Stellard thomas.stell...@amd.com Signed-off-by: Jason Ekstrand jason.ekstr...@intel.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82483 --- src/gallium/drivers/r300/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r300/Makefile.am b/src/gallium/drivers/r300/Makefile.am index ae6e8d2..e74e445 100644 --- a/src/gallium/drivers/r300/Makefile.am +++ b/src/gallium/drivers/r300/Makefile.am @@ -18,8 +18,8 @@ check_PROGRAMS = r300_compiler_tests TESTS = r300_compiler_tests r300_compiler_tests_LDADD = libr300.la libr300-helper.la \ - $(top_builddir)/src/util/libmesautil.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) r300_compiler_tests_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/drivers/r300/compiler -- 2.0.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] radeonsi/compute: Call si_pm4_free_state() after emitting compute state
This will decrement the reference count for buffers referenced in the command stream will prevent us from leaking them. CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeonsi/si_compute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 482d475..e8fc8eb 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -374,8 +374,8 @@ static void si_launch_grid( } #endif - FREE(pm4); FREE(kernel_args); + si_pm4_free_state(sctx, pm4, ~0); } -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/7] clover: Flush the command queue in clReleaseCommandQueue()
This is required by the spec. CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/state_trackers/clover/api/queue.cpp | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/queue.cpp b/src/gallium/state_trackers/clover/api/queue.cpp index a136018..06a2863 100644 --- a/src/gallium/state_trackers/clover/api/queue.cpp +++ b/src/gallium/state_trackers/clover/api/queue.cpp @@ -58,7 +58,11 @@ clRetainCommandQueue(cl_command_queue d_q) try { CLOVER_API cl_int clReleaseCommandQueue(cl_command_queue d_q) try { - if (obj(d_q).release()) + auto q = obj(d_q); + + q.flush(); + + if (q.release()) delete pobj(d_q); return CL_SUCCESS; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] radeonsi/compute: Stop leaking the input buffer
We were leaking the input buffer used for kernel arguments and since we were allocating it using si_upload_const_buffer() we were leaking 1 MB per kernel invocation. CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeonsi/si_compute.c | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index dff5ddd..01aa0c6 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -48,6 +48,7 @@ struct si_pipe_compute { struct si_pipe_shader *kernels; unsigned num_user_sgprs; + struct r600_resource *input_buffer; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; LLVMContextRef llvm_ctx; @@ -85,6 +86,9 @@ static void *si_create_compute_state( LLVMDisposeModule(mod); } + program-input_buffer = si_resource_create_custom(sctx-b.b.screen, + PIPE_USAGE_IMMUTABLE, program-input_size); + return program; } @@ -167,7 +171,7 @@ static void si_launch_grid( struct si_context *sctx = (struct si_context*)ctx; struct si_pipe_compute *program = sctx-cs_shader_state.program; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - struct r600_resource *kernel_args_buffer = NULL; + struct r600_resource *input_buffer = program-input_buffer; unsigned kernel_args_size; unsigned num_work_size_bytes = 36; uint32_t kernel_args_offset = 0; @@ -199,7 +203,8 @@ static void si_launch_grid( /* The extra num_work_size_bytes are for work group / work item size information */ kernel_args_size = program-input_size + num_work_size_bytes + 8 /* For scratch va */; - kernel_args = MALLOC(kernel_args_size); + kernel_args = sctx-b.ws-buffer_map(input_buffer-cs_buf, + sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE); for (i = 0; i 3; i++) { kernel_args[i] = grid_layout[i]; kernel_args[i + 3] = grid_layout[i] * block_layout[i]; @@ -236,13 +241,13 @@ static void si_launch_grid( kernel_args[i]); } - si_upload_const_buffer(sctx, kernel_args_buffer, (uint8_t*)kernel_args, - kernel_args_size, kernel_args_offset); - kernel_args_va = r600_resource_va(ctx-screen, - (struct pipe_resource*)kernel_args_buffer); + sctx-b.ws-buffer_unmap(input_buffer-cs_buf); + + kernel_args_va = r600_resource_va(ctx-screen, input_buffer-b.b); kernel_args_va += kernel_args_offset; - si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va 32) | S_008F04_STRIDE(0)); @@ -374,7 +379,6 @@ static void si_launch_grid( } #endif - FREE(kernel_args); si_pm4_free_state(sctx, pm4, ~0); } @@ -398,6 +402,8 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){ if (program-llvm_ctx){ LLVMContextDispose(program-llvm_ctx); } + pipe_resource_reference( + (struct pipe_resource **)program-input_buffer, NULL); //And then free the program itself. FREE(program); -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/7] radeon/compute: Fix reported values for MAX_GLOBAL_SIZE and MAX_MEM_ALLOC_SIZE
There is a hard limit in older kernels of 256 MB for buffer allocations, so report this value as MAX_MEM_ALLOC_SIZE and adjust MAX_GLOBAL_SIZE to statisfy requirements of OpenCL. CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeon/r600_pipe_common.c | 32 --- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3476021..0886b02 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -474,13 +474,21 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: if (ret) { uint64_t *max_global_size = ret; - /* XXX: This is what the proprietary driver reports, we -* may want to use a different value. */ - /* XXX: Not sure what to put here for SI. */ - if (rscreen-chip_class = SI) - *max_global_size = 20; - else - *max_global_size = 201326592; + uint64_t max_mem_alloc_size; + + r600_get_compute_param(screen, + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + max_mem_alloc_size); + + /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least +* 1/4 of the MAX_GLOBAL_SIZE. Since the +* MAX_MEM_ALLOC_SIZE is fixed for older kernels, +* make sure we never report more than +* 4 * MAX_MEM_ALLOC_SIZE. +*/ + *max_global_size = MIN2(4 * max_mem_alloc_size, + rscreen-info.gart_size + + rscreen-info.vram_size); } return sizeof(uint64_t); @@ -504,13 +512,11 @@ static int r600_get_compute_param(struct pipe_screen *screen, if (ret) { uint64_t max_global_size; uint64_t *max_mem_alloc_size = ret; - r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, max_global_size); - /* OpenCL requres this value be at least -* max(MAX_GLOBAL_SIZE / 4, 128 * 1024 *1024) -* I'm really not sure what value to report here, but -* MAX_GLOBAL_SIZE / 4 seems resonable. + + /* XXX: The limit in older kernels is 256 MB. We +* should add a query here for newer kernels. */ - *max_mem_alloc_size = max_global_size / 4; + *max_mem_alloc_size = 256 * 1024 * 1024; } return sizeof(uint64_t); -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] radeonsi/compute: Whitespace fixes
CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeonsi/si_compute.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e8fc8eb..dff5ddd 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -48,7 +48,7 @@ struct si_pipe_compute { struct si_pipe_shader *kernels; unsigned num_user_sgprs; -struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; + struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; LLVMContextRef llvm_ctx; }; @@ -392,7 +392,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){ si_pipe_shader_destroy(ctx, program-kernels[i]); } } - FREE(program-kernels); } -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/7] radeon/compute: Report a value for PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE
CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/r600/r600_pipe.c | 11 ++- src/gallium/drivers/radeonsi/si_pipe.c | 7 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index a08e70e..7ace671 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -421,7 +421,16 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e /* XXX Isn't this equal to TEMPS? */ return 1; /* Max native address registers */ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return R600_MAX_CONST_BUFFER_SIZE; + if (shader == PIPE_SHADER_COMPUTE) { + uint64_t max_const_buffer_size; + pscreen-get_compute_param(pscreen, + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + max_const_buffer_size); + return max_const_buffer_size; + + } else { + return R600_MAX_CONST_BUFFER_SIZE; + } case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return R600_MAX_USER_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_PREDS: diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 635b37d..791838f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -327,6 +327,13 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_DOUBLES: return 0; /* XXX: Enable doubles once the compiler can handle them. */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: { + uint64_t max_const_buffer_size; + pscreen-get_compute_param(pscreen, + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + max_const_buffer_size); + return max_const_buffer_size; + } default: return 0; } -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] radeonsi/compute: Update reference counts for buffers in si_set_global_binding()
CC: 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeonsi/si_compute.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 42e4fec..482d475 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -105,7 +105,7 @@ static void si_set_global_binding( if (!resources) { for (i = first; i first + n; i++) { - program-global_buffers[i] = NULL; + pipe_resource_reference(program-global_buffers[i], NULL); } return; } @@ -113,7 +113,7 @@ static void si_set_global_binding( for (i = first; i first + n; i++) { uint64_t va; uint32_t offset; - program-global_buffers[i] = resources[i]; + pipe_resource_reference(program-global_buffers[i], resources[i]); va = r600_resource_va(ctx-screen, resources[i]); offset = util_le32_to_cpu(*handles[i]); va += offset; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] radeonsi/compute: Memory usage fixes
Hi, This series contains fixes for applications which allocate large amounts of memory. The first two patches fix the values reported for PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, and PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE so that applications don't allocate more memory than is available. The next five patches eliminate some GPU buffer leaks which should fix long running applications that launch a lot of kernels. Please Review. Thanks, Tom ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/3] cl workdim v2
On Thu, Aug 07, 2014 at 04:02:40PM +0300, Francisco Jerez wrote: Jan Vesely jan.ves...@rutgers.edu writes: This respin includes Francisco's approach of providing implicit in the arg vector passed from clover, and Tom's idea of appending implicit args after the kernel args. Hmmm... Maybe it would make sense to add some sort of versioning (e.g. as part of the target triple) to the binary interface between clover and the kernel instead, so we can handle this sort of non-backwards compatible changes and the compiler back-end and libclc have some way to find out whether some specific feature is available and e.g. some specific extension should be enabled. I was thinking the way to do this would be to use calling conventions on the kernel functions to specify which binary interface to use. However, I don't want to change the binary interface right now, because it is still missing a lot of things, and I don't want to have to change it every time we add something new. I think we should keep the current interface of: Offset | Data -|-- 0: Kernel Arguments sizeof(Kernel Inputs): work_dim sizeof(Kernel Inputs) + 4: ... We can always revisit this once clover is more mature and we think we have a binary interface that won't change. Although, personally I prefer adding implicit inputs to the end of the kernel arguments rather than having of them somewhere else. -Tom I assumed it's not safe to modify exec.input, so the input vector is copied before appending work dim. Why wouldn't it be safe? You just need to make sure they're appended before the compute state is created. Passes get-work-dim piglit on turks without any regression, I have not tested SI as I don't have the hw. jan Jan Vesely (3): gallium: Pass input data size to launch_grid clover: Add work dimension implicit param to input r600,radeonsi: Copy implicit args provided by clover src/gallium/drivers/ilo/ilo_gpgpu.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 +- src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 2 +- src/gallium/drivers/r600/evergreen_compute.c | 14 +- src/gallium/drivers/r600/evergreen_compute.h | 1 - src/gallium/drivers/radeonsi/si_compute.c | 6 +- src/gallium/include/pipe/p_context.h | 2 +- src/gallium/state_trackers/clover/core/kernel.cpp | 162 -- src/gallium/tests/trivial/compute.c | 40 +++--- 10 files changed, 122 insertions(+), 113 deletions(-) -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Is the r600 llvm backend supposed to behave exactly the same as the radeonsi llvm?
On Fri, Aug 08, 2014 at 04:53:46AM +0300, Kertesz Laszlo wrote: Is the r600 llvm backend supposed to behave exactly the same as the radeonsi llvm? Support for r600 in LLVM is experimental, but for radeonsi it should work well. -Tom -- O zi buna, Kertesz Laszlo ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r300/compiler: recursive look for RC_OPCODE_S**
On Fri, Aug 01, 2014 at 03:55:56PM +0200, Marek Olšák wrote: From: David Heidelberger david.heidelber...@ixit.cz Get rid of error Failed to build loop info by fixing failure in cases like 4: SGE temp[2].x, temp[0]., const[0].; 5: CMP temp[1].x, -temp[2]., const[0]., temp[1].; 6: IF temp[1].; On RS690 - fixes piglit glean do-loop with continue and break - changes error from Failed to build loop info - Not a native swizzle: 0e89 r300_fragprog_emit.c::begin_tex(): Too many texture indirections for discard statement in for loop - hide Failed to build loop info for precision log2, while-loop with continue, for-loop with continue and return 1 1 1 1 insted of 0 0 0 1 Signed-off-by: David Heidelberger david.heidelber...@ixit.cz --- I'm sending this on behalf of David. I'll commit it soon if there is no review. .../drivers/r300/compiler/radeon_emulate_loops.c | 27 +- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c index 91ed9d2..d1fae9a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c @@ -376,18 +376,23 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, loop-Brk = ptr; loop-If = ptr-Prev; loop-EndIf = ptr-Next; - switch(loop-If-Prev-U.I.Opcode){ - case RC_OPCODE_SLT: - case RC_OPCODE_SGE: - case RC_OPCODE_SGT: - case RC_OPCODE_SLE: - case RC_OPCODE_SEQ: - case RC_OPCODE_SNE: - break; - default: - return 0; - } + loop-Cond = loop-If-Prev; + while (loop-Cond-U.I.Opcode != RC_OPCODE_BGNLOOP) { + switch(loop-Cond-U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + goto found; + default: You can't just look for any S* instruction here, you need to look for the one that is defining the register used by IF. I know I posted a patch that does this, but I forgot what the problem was with it. -Tom + loop-Cond = loop-Cond-Prev; + break; + } + } + found: break; case RC_OPCODE_ENDLOOP: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] r600g: Pass dimension parameter to compute shader.
On Wed, Jul 30, 2014 at 07:11:35PM -0400, Jan Vesely wrote: Make the function static. No need to cc llvm-commits on these mesa patches. Reviewers follow both lists. This needs corresponding change in LLVM otherwise it breaks parameter passing CC: Tom Stellard t...@stellard.net CC: Matt Arsenault matthew.arsena...@amd.com Signed-off-by: Jan Vesely jan.ves...@rutgers.edu --- src/gallium/drivers/r600/evergreen_compute.c | 26 +- src/gallium/drivers/r600/evergreen_compute.h | 1 - 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 3928676..150bc5c 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -266,24 +266,31 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) * DWORDS 3-5: Number of global work items in each dimension (x,y,z) * DWORDS 6-8: Number of work items within each work group in each dimension * (x,y,z) - * DWORDS 9+ : Kernel parameters + * DWORD 9 : work dimension (needs new enough llvm) + * DWORDS 10+: Kernel parameters I would prefer to add new parameters after the kernel arguments, so we don't need to break compatibility with LLVM every time we add a new parameter. -Tom */ -void evergreen_compute_upload_input( +static void evergreen_compute_upload_input( struct pipe_context *ctx_, const uint *block_layout, const uint *grid_layout, - const void *input) + const void *input, + unsigned dimensions) { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = ctx-cs_shader_state.shader; unsigned i; - /* We need to reserve 9 dwords (36 bytes) for implicit kernel + /* We need to reserve 10 dwords (40 bytes) for implicit kernel * parameters. */ +#if HAVE_LLVM = 0x0306 + unsigned input_size = shader-input_size + 40; +#else unsigned input_size = shader-input_size + 36; +#endif uint32_t * num_work_groups_start; uint32_t * global_size_start; uint32_t * local_size_start; + uint32_t * work_dim; uint32_t * kernel_parameters_start; struct pipe_box box; struct pipe_transfer *transfer = NULL; @@ -306,7 +313,14 @@ void evergreen_compute_upload_input( box, transfer); global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4)); local_size_start = global_size_start + (3 * (sizeof(uint)) / 4); +#if HAVE_LLVM = 0x0306 + work_dim = local_size_start + (3 * (sizeof(uint)) / 4); + work_dim[0] = dimensions; + + kernel_parameters_start = work_dim + (1 * (sizeof(uint)) / 4); +#else kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); +#endif /* Copy the work group size */ memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint)); @@ -319,6 +333,7 @@ void evergreen_compute_upload_input( /* Copy the local dimensions */ memcpy(local_size_start, block_layout, 3 * sizeof(uint)); + /* Copy the kernel inputs */ memcpy(kernel_parameters_start, input, shader-input_size); @@ -584,7 +599,8 @@ static void evergreen_launch_grid( #endif shader-active_kernel = kernel; ctx-cs_shader_state.kernel_index = pc; - evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); + evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input, +dimensions); compute_emit_cs(ctx, block_layout, grid_layout); } diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index 4fb53a1..570ab2a 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -40,7 +40,6 @@ struct r600_resource_global { void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso); void evergreen_delete_compute_state(struct pipe_context *ctx, void *state); -void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input); void evergreen_init_atom_start_compute_cs(struct r600_context *rctx); void evergreen_init_compute_state_functions(struct r600_context *rctx); void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_atom * atom); -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] r600g: Pass dimension parameter to compute shader.
On Thu, Jul 31, 2014 at 01:28:45PM -0400, Jan Vesely wrote: On Thu, 2014-07-31 at 11:02 -0400, Tom Stellard wrote: On Wed, Jul 30, 2014 at 07:11:35PM -0400, Jan Vesely wrote: Make the function static. No need to cc llvm-commits on these mesa patches. Reviewers follow both lists. sorry about that This needs corresponding change in LLVM otherwise it breaks parameter passing CC: Tom Stellard t...@stellard.net CC: Matt Arsenault matthew.arsena...@amd.com Signed-off-by: Jan Vesely jan.ves...@rutgers.edu --- src/gallium/drivers/r600/evergreen_compute.c | 26 +- src/gallium/drivers/r600/evergreen_compute.h | 1 - 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 3928676..150bc5c 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -266,24 +266,31 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) * DWORDS 3-5: Number of global work items in each dimension (x,y,z) * DWORDS 6-8: Number of work items within each work group in each dimension * (x,y,z) - * DWORDS 9+ : Kernel parameters + * DWORD 9 : work dimension (needs new enough llvm) + * DWORDS 10+: Kernel parameters I would prefer to add new parameters after the kernel arguments, so we don't need to break compatibility with LLVM every time we add a new parameter. How would this work with multiple kernel in one module? Is it possible to know what kernel is the lowered code going to end up in? or did you have a static offset in mind? I was thinking of using a static offset from the end of the kernel arguments. So KernelArgSize + 0 would be number of dimensions and it would be available to all kernels. Then the compiler could lower any use of get_dims to a load from the input buffer at the correct offset. -Tom jan -Tom */ -void evergreen_compute_upload_input( +static void evergreen_compute_upload_input( struct pipe_context *ctx_, const uint *block_layout, const uint *grid_layout, - const void *input) + const void *input, + unsigned dimensions) { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = ctx-cs_shader_state.shader; unsigned i; - /* We need to reserve 9 dwords (36 bytes) for implicit kernel + /* We need to reserve 10 dwords (40 bytes) for implicit kernel * parameters. */ +#if HAVE_LLVM = 0x0306 + unsigned input_size = shader-input_size + 40; +#else unsigned input_size = shader-input_size + 36; +#endif uint32_t * num_work_groups_start; uint32_t * global_size_start; uint32_t * local_size_start; + uint32_t * work_dim; uint32_t * kernel_parameters_start; struct pipe_box box; struct pipe_transfer *transfer = NULL; @@ -306,7 +313,14 @@ void evergreen_compute_upload_input( box, transfer); global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4)); local_size_start = global_size_start + (3 * (sizeof(uint)) / 4); +#if HAVE_LLVM = 0x0306 + work_dim = local_size_start + (3 * (sizeof(uint)) / 4); + work_dim[0] = dimensions; + + kernel_parameters_start = work_dim + (1 * (sizeof(uint)) / 4); +#else kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); +#endif /* Copy the work group size */ memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint)); @@ -319,6 +333,7 @@ void evergreen_compute_upload_input( /* Copy the local dimensions */ memcpy(local_size_start, block_layout, 3 * sizeof(uint)); + /* Copy the kernel inputs */ memcpy(kernel_parameters_start, input, shader-input_size); @@ -584,7 +599,8 @@ static void evergreen_launch_grid( #endif shader-active_kernel = kernel; ctx-cs_shader_state.kernel_index = pc; - evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); + evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input, +dimensions); compute_emit_cs(ctx, block_layout, grid_layout); } diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index 4fb53a1..570ab2a 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -40,7 +40,6 @@ struct r600_resource_global { void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso); void evergreen_delete_compute_state(struct pipe_context *ctx, void *state); -void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout
Re: [Mesa-dev] Clover Platform Naming
On Tue, Jul 29, 2014 at 09:50:23AM -0500, Aaron Watry wrote: Hi list, I was starting to look into getting cppamp-driver-ng working with mesa/clover, and I quickly ran into a question... cppamp-driver-ng explicitly lists which platforms are supported in its OpenCL back-end's source code. I went to add in the Mesa/Clover entries, and discovered that we're actually reporting ourselves as: CL_PLATFORM_VENDOR=Mesa CL_PLATFORM_NAME=Default What do other implementations return for CL_PLATFORM_NAME ? -Tom The way that cppamp-driver-ng is written, they don't really care about the vendor so much as the platform name, and Default is about as generic as you can get. At this point in the game, are we stuck with this platform name, or would we be able to change this to Clover (or something more appropriate)? Just wanted to ask around and see what people thought. --Aaron ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/2] r600g/compute: Add documentation to compute_memory_pool
On Sun, Jul 27, 2014 at 01:56:15PM +0200, Bruno Jiménez wrote: v2: Rebased on top of master I've pushed both of these patches, thanks! -Tom --- src/gallium/drivers/r600/compute_memory_pool.c | 59 +- src/gallium/drivers/r600/compute_memory_pool.h | 58 - 2 files changed, 86 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index d53587f..928618c 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -44,7 +44,7 @@ #define ITEM_ALIGNMENT 1024 /** - * Creates a new pool + * Creates a new pool. */ struct compute_memory_pool* compute_memory_pool_new( struct r600_screen * rscreen) @@ -66,6 +66,12 @@ struct compute_memory_pool* compute_memory_pool_new( return pool; } +/** + * Initializes the pool with a size of \a initial_size_in_dw. + * \param pool The pool to be initialized. + * \param initial_size_in_dw The initial size. + * \see compute_memory_grow_defrag_pool + */ static void compute_memory_pool_init(struct compute_memory_pool * pool, unsigned initial_size_in_dw) { @@ -79,7 +85,7 @@ static void compute_memory_pool_init(struct compute_memory_pool * pool, } /** - * Frees all stuff in the pool and the pool struct itself too + * Frees all stuff in the pool and the pool struct itself too. */ void compute_memory_pool_delete(struct compute_memory_pool* pool) { @@ -94,7 +100,9 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool) /** * Searches for an empty space in the pool, return with the pointer to the - * allocatable space in the pool, returns -1 on failure. + * allocatable space in the pool. + * \param size_in_dw The size of the space we are looking for. + * \return -1 on failure */ int64_t compute_memory_prealloc_chunk( struct compute_memory_pool* pool, @@ -126,6 +134,8 @@ int64_t compute_memory_prealloc_chunk( /** * Search for the chunk where we can link our new chunk after it. + * \param start_in_dw The position of the item we want to add to the pool. + * \return The item that is just before the passed position */ struct list_head *compute_memory_postalloc_chunk( struct compute_memory_pool* pool, @@ -166,8 +176,9 @@ struct list_head *compute_memory_postalloc_chunk( } /** - * Reallocates pool, conserves data. - * @returns -1 if it fails, 0 otherwise + * Reallocates and defragments the pool, conserves data. + * \returns -1 if it fails, 0 otherwise + * \see compute_memory_finalize_pending */ int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool, struct pipe_context *pipe, int new_size_in_dw) @@ -234,6 +245,8 @@ int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool, /** * Copy pool from device to host, or host to device. + * \param device_to_host 1 for device-host, 0 for host-device + * \see compute_memory_grow_defrag_pool */ void compute_memory_shadow(struct compute_memory_pool* pool, struct pipe_context * pipe, int device_to_host) @@ -251,8 +264,10 @@ void compute_memory_shadow(struct compute_memory_pool* pool, } /** - * Allocates pending allocations in the pool - * @returns -1 if it fails, 0 otherwise + * Moves all the items marked for promotion from the \a unallocated_list + * to the \a item_list. + * \return -1 if it fails, 0 otherwise + * \see evergreen_set_global_binding */ int compute_memory_finalize_pending(struct compute_memory_pool* pool, struct pipe_context * pipe) @@ -323,6 +338,9 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, /** * Defragments the pool, so that there's no gap between items. * \param pool The pool to be defragmented + * \param srcThe origin resource + * \param dstThe destination resource + * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending */ void compute_memory_defrag(struct compute_memory_pool *pool, struct pipe_resource *src, struct pipe_resource *dst, @@ -348,6 +366,12 @@ void compute_memory_defrag(struct compute_memory_pool *pool, pool-status = ~POOL_FRAGMENTED; } +/** + * Moves an item from the \a unallocated_list to the \a item_list. + * \param item The item that will be promoted. + * \return -1 if it fails, 0 otherwise + * \see compute_memory_finalize_pending + */ int compute_memory_promote_item(struct compute_memory_pool *pool, struct compute_memory_item *item, struct pipe_context *pipe, int64_t start_in_dw) @@ -390,6 +414,11 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, return 0; } +/** + * Moves an item from the \a item_list to the \a unallocated_list. + * \param item The item that
Re: [Mesa-dev] [PATCH 3/3] r600g/compute: Defrag the pool at the same time as we grow it
On Sat, Jul 19, 2014 at 07:35:51PM +0200, Bruno Jiménez wrote: This allows us two things: we now need less item copies when we have to defrag+grow the pool (to just one copy per item) and, even in the case where we don't need to defrag the pool, we reduce the data copied to just the useful data that the items use. Note: The fallback path is a bit ugly now, but hopefully we won't need it much. Hi, I pushed the first two patches, but I couldn't get this one to apply. Could you send an updated version rebased on master? Thanks, Tom --- src/gallium/drivers/r600/compute_memory_pool.c | 40 -- src/gallium/drivers/r600/compute_memory_pool.h | 2 +- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ca36240..32f5892 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -169,10 +169,12 @@ struct list_head *compute_memory_postalloc_chunk( * Reallocates pool, conserves data. * @returns -1 if it fails, 0 otherwise */ -int compute_memory_grow_pool(struct compute_memory_pool* pool, - struct pipe_context * pipe, int new_size_in_dw) +int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool, + struct pipe_context *pipe, int new_size_in_dw) { - COMPUTE_DBG(pool-screen, * compute_memory_grow_pool() + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); + + COMPUTE_DBG(pool-screen, * compute_memory_grow_defrag_pool() new_size_in_dw = %d (%d bytes)\n, new_size_in_dw, new_size_in_dw * 4); @@ -183,27 +185,17 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, } else { struct r600_resource *temp = NULL; - new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); - - COMPUTE_DBG(pool-screen, Aligned size = %d (%d bytes)\n, - new_size_in_dw, new_size_in_dw * 4); - temp = (struct r600_resource *)r600_compute_buffer_alloc_vram( pool-screen, new_size_in_dw * 4); if (temp != NULL) { - struct r600_context *rctx = (struct r600_context *)pipe; struct pipe_resource *src = (struct pipe_resource *)pool-bo; struct pipe_resource *dst = (struct pipe_resource *)temp; - struct pipe_box box; - COMPUTE_DBG(pool-screen, Growing the pool using a temporary resource\n); + COMPUTE_DBG(pool-screen, Growing and defragmenting the pool + using a temporary resource\n); - u_box_1d(0, pool-size_in_dw * 4, box); - - rctx-b.b.resource_copy_region(pipe, - dst, 0, 0, 0 ,0, - src, 0, box); + compute_memory_defrag(pool, src, dst, pipe); pool-screen-b.b.resource_destroy( (struct pipe_screen *)pool-screen, @@ -229,6 +221,11 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, pool-screen, pool-size_in_dw * 4); compute_memory_shadow(pool, pipe, 0); + + if (pool-status POOL_FRAGMENTED) { + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + compute_memory_defrag(pool, src, src, pipe); + } } } @@ -292,16 +289,15 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, return 0; } - if (pool-status POOL_FRAGMENTED) { - struct pipe_resource *src = (struct pipe_resource *)pool-bo; - compute_memory_defrag(pool, src, src, pipe); - } - if (pool-size_in_dw allocated + unallocated) { - err = compute_memory_grow_pool(pool, pipe, allocated + unallocated); + err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated); if (err == -1) return -1; } + else if (pool-status POOL_FRAGMENTED) { + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + compute_memory_defrag(pool, src, src, pipe); + } /* After defragmenting the pool, allocated is equal to the first available * position for new items in the pool */ diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index 5f1d72b..c7eb237 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++
Re: [Mesa-dev] [PATCH 1/3] r600g/compute: Fix singed/unsigned comparison compiler warnings.
On Fri, Jul 25, 2014 at 10:33:42AM -0400, Jan Vesely wrote: The iteration variables go from 0 anyway. Signed-off-by: Jan Vesely jan.ves...@rutgers.edu Thanks, I pushed patch #1. -Tom --- A collection of fixes for gcc warnings I ran across. src/gallium/drivers/r600/evergreen_compute.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 12e9c85..1970414 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -100,7 +100,7 @@ struct r600_resource* r600_compute_buffer_alloc_vram( static void evergreen_set_rat( struct r600_pipe_compute *pipe, - int id, + unsigned id, struct r600_resource* bo, int start, int size) @@ -276,7 +276,7 @@ void evergreen_compute_upload_input( { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = ctx-cs_shader_state.shader; - int i; + unsigned i; /* We need to reserve 9 dwords (36 bytes) for implicit kernel * parameters. */ @@ -405,7 +405,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx-b.rings.gfx.cs; - int i; + unsigned i; /* make sure that the gfx ring is only one active */ if (ctx-b.rings.dma.cs ctx-b.rings.dma.cs-cdw) { @@ -598,7 +598,7 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_, COMPUTE_DBG(ctx-screen, *** evergreen_set_compute_resources: start = %u count = %u\n, start, count); - for (int i = 0; i count; i++) { + for (unsigned i = 0; i count; i++) { /* The First two vertex buffers are reserved for parameters and * global buffers. */ unsigned vtx_id = 2 + i; @@ -629,7 +629,7 @@ void evergreen_set_cs_sampler_view(struct pipe_context *ctx_, struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - for (int i = 0; i count; i++) { + for (unsigned i = 0; i count; i++){ if (resource[i]) { assert(i+1 12); /* XXX: Implement */ @@ -661,7 +661,7 @@ static void evergreen_set_global_binding( /* We mark these items for promotion to the pool if they * aren't already there */ - for (int i = 0; i n; i++) { + for (unsigned i = 0; i n; i++) { struct compute_memory_item *item = buffers[i]-chunk; if (!is_item_in_pool(item)) @@ -673,7 +673,7 @@ static void evergreen_set_global_binding( return; } - for (int i = 0; i n; i++) + for (unsigned i = 0; i n; i++) { uint32_t buffer_offset; uint32_t handle; -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] r600g/compute: Defrag the pool at the same time as we grow it
On Fri, Jul 25, 2014 at 11:28:19PM +0200, Bruno Jimenez wrote: On Fri, 2014-07-25 at 12:46 -0400, Tom Stellard wrote: On Sat, Jul 19, 2014 at 07:35:51PM +0200, Bruno Jiménez wrote: This allows us two things: we now need less item copies when we have to defrag+grow the pool (to just one copy per item) and, even in the case where we don't need to defrag the pool, we reduce the data copied to just the useful data that the items use. Note: The fallback path is a bit ugly now, but hopefully we won't need it much. Hi, I pushed the first two patches, but I couldn't get this one to apply. Could you send an updated version rebased on master? Hi, This patch needs this other patch first: http://lists.freedesktop.org/archives/mesa-dev/2014-July/062923.html As I mentioned it in the cover letter and you reviewed it when I sent it I thought that you would push it too. Although I forgot to say that it wasn't pushed, sorry. Sorry, I missed that. I've pushed both patches. -Tom I can squash them and send it if needed. Sorry for any inconvenience. Bruno Thanks, Tom --- src/gallium/drivers/r600/compute_memory_pool.c | 40 -- src/gallium/drivers/r600/compute_memory_pool.h | 2 +- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ca36240..32f5892 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -169,10 +169,12 @@ struct list_head *compute_memory_postalloc_chunk( * Reallocates pool, conserves data. * @returns -1 if it fails, 0 otherwise */ -int compute_memory_grow_pool(struct compute_memory_pool* pool, - struct pipe_context * pipe, int new_size_in_dw) +int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool, + struct pipe_context *pipe, int new_size_in_dw) { - COMPUTE_DBG(pool-screen, * compute_memory_grow_pool() + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); + + COMPUTE_DBG(pool-screen, * compute_memory_grow_defrag_pool() new_size_in_dw = %d (%d bytes)\n, new_size_in_dw, new_size_in_dw * 4); @@ -183,27 +185,17 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, } else { struct r600_resource *temp = NULL; - new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); - - COMPUTE_DBG(pool-screen, Aligned size = %d (%d bytes)\n, - new_size_in_dw, new_size_in_dw * 4); - temp = (struct r600_resource *)r600_compute_buffer_alloc_vram( pool-screen, new_size_in_dw * 4); if (temp != NULL) { - struct r600_context *rctx = (struct r600_context *)pipe; struct pipe_resource *src = (struct pipe_resource *)pool-bo; struct pipe_resource *dst = (struct pipe_resource *)temp; - struct pipe_box box; - COMPUTE_DBG(pool-screen, Growing the pool using a temporary resource\n); + COMPUTE_DBG(pool-screen, Growing and defragmenting the pool + using a temporary resource\n); - u_box_1d(0, pool-size_in_dw * 4, box); - - rctx-b.b.resource_copy_region(pipe, - dst, 0, 0, 0 ,0, - src, 0, box); + compute_memory_defrag(pool, src, dst, pipe); pool-screen-b.b.resource_destroy( (struct pipe_screen *)pool-screen, @@ -229,6 +221,11 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, pool-screen, pool-size_in_dw * 4); compute_memory_shadow(pool, pipe, 0); + + if (pool-status POOL_FRAGMENTED) { + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + compute_memory_defrag(pool, src, src, pipe); + } } } @@ -292,16 +289,15 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, return 0; } - if (pool-status POOL_FRAGMENTED) { - struct pipe_resource *src = (struct pipe_resource *)pool-bo; - compute_memory_defrag(pool, src, src, pipe); - } - if (pool-size_in_dw allocated + unallocated) { - err = compute_memory_grow_pool(pool, pipe, allocated + unallocated); + err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated); if (err == -1
[Mesa-dev] [PATCH] clover: Add checks for image support to the image functions v2
Most image functions are required to return a CL_INVALID_OPERATION error when used on devices without image support. v2: - Simplified the code --- src/gallium/state_trackers/clover/api/memory.cpp | 6 ++ src/gallium/state_trackers/clover/api/sampler.cpp | 3 +++ src/gallium/state_trackers/clover/api/transfer.cpp | 3 +++ 3 files changed, 12 insertions(+) diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp index d26b1c6..a094e74 100644 --- a/src/gallium/state_trackers/clover/api/memory.cpp +++ b/src/gallium/state_trackers/clover/api/memory.cpp @@ -106,6 +106,9 @@ clCreateImage2D(cl_context d_ctx, cl_mem_flags flags, void *host_ptr, cl_int *r_errcode) try { auto ctx = obj(d_ctx); + if (!any_of(std::mem_fn(device::image_support), ctx.devices())) + throw error(CL_INVALID_OPERATION); + if (flags ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) @@ -141,6 +144,9 @@ clCreateImage3D(cl_context d_ctx, cl_mem_flags flags, void *host_ptr, cl_int *r_errcode) try { auto ctx = obj(d_ctx); + if (!any_of(std::mem_fn(device::image_support), ctx.devices())) + throw error(CL_INVALID_OPERATION); + if (flags ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp b/src/gallium/state_trackers/clover/api/sampler.cpp index 403892b..482e55a 100644 --- a/src/gallium/state_trackers/clover/api/sampler.cpp +++ b/src/gallium/state_trackers/clover/api/sampler.cpp @@ -31,6 +31,9 @@ clCreateSampler(cl_context d_ctx, cl_bool norm_mode, cl_int *r_errcode) try { auto ctx = obj(d_ctx); + if (!any_of(std::mem_fn(device::image_support), ctx.devices())) + throw error(CL_INVALID_OPERATION); + ret_error(r_errcode, CL_SUCCESS); return new sampler(ctx, norm_mode, addr_mode, filter_mode); diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp index 404ceb0..07d8a73 100644 --- a/src/gallium/state_trackers/clover/api/transfer.cpp +++ b/src/gallium/state_trackers/clover/api/transfer.cpp @@ -101,6 +101,9 @@ namespace { const vector_t orig, const vector_t region) { vector_t size = { img.width(), img.height(), img.depth() }; + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); + if (img.context() != q.context()) throw error(CL_INVALID_CONTEXT); -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] gallium: Add PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE
On Thu, Jul 24, 2014 at 01:09:49PM +0200, Marek Olšák wrote: Isn't this redundant with get_shader_param(PIPE_SHADER_COMPUTE, PIPE_SHADER_CAP_MAX_CONSTS) * 16? This is what clover was using, but I was confused about what the value was supposed to represent. Now, I think I understand (number of 4 x 32-bit constants). I can use this instead. -Tom Marek On Thu, Jul 24, 2014 at 3:05 AM, Tom Stellard thomas.stell...@amd.com wrote: --- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/include/pipe/p_defines.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 830a1a5..219c9f9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -334,6 +334,8 @@ pipe_screen::get_compute_param. Value type: ``uint32_t`` * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported non-zero means yes, zero means no. Value type: ``uint32_t`` +* ``PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE``: The maximum size in bytes + of a constant buffer. Value type: ``uint64_t`` .. _pipe_bind: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 43bb1f5..78709b9 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -651,7 +651,8 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY, PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS, - PIPE_COMPUTE_CAP_IMAGES_SUPPORTED + PIPE_COMPUTE_CAP_IMAGES_SUPPORTED, + PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE }; /** -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] r600g: Use hardware sqrt instruction
On Fri, Jul 18, 2014 at 12:35:24PM -0400, Alex Deucher wrote: On Fri, Jul 18, 2014 at 3:54 AM, Glenn Kennard glenn.kenn...@gmail.com wrote: Piglit quick tests including sqrt pass, no other regressions, tested on radeon 6670. --- Should be slightly more precise than the invsqrt/recip/mul combination used previously, I reckon up to about 2 bits of mantissa, and saves two instructions per sqrt emitted. It would be good if someone could test this on Cayman since it uses a slightly different codepath. Reviewed-by: Alex Deucher alexander.deuc...@amd.com I've pushed this patch, thanks! -Tom src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_shader.c | 9 +++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 5bf9c00..ee6a416 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: - return 0; + return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index db928f3..907547d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {20,0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ {22,0, ALU_OP0_NOP, tgsi_unsupported}, @@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {20,0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ {22,0, ALU_OP0_NOP, tgsi_unsupported}, @@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ - {20,0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ {22,0, ALU_OP0_NOP, tgsi_unsupported}, -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] [RFC] r600g/compute: Adding support for defragmenting compute_memory_pool
On Fri, Jul 18, 2014 at 01:09:03PM +0200, Bruno Jimenez wrote: On Thu, 2014-07-17 at 22:56 -0400, Tom Stellard wrote: On Wed, Jul 16, 2014 at 11:12:42PM +0200, Bruno Jiménez wrote: Hi, This series finally adds support for defragmenting the pool for OpenCL buffers in the r600g driver. It is mostly a rewritten of the series that I wrote some months ago. For defragmenting the pool I have thought of two different possibilities: - Creating a new pool and moving every item here in the correct position. This has the advantage of being very simple to implement and that it allows the pool to be grown at the same time. But it has a couple of problems, namely that it has a high memory peak usage (sum of current pool + new pool) and that in the case of having a pool not very fragmented you have to copy every item to its new place. - Using the same pool by moving the items in it. This has the advantage of using less memory (sum of current pool + biggest item in it) and that it is easier to handle the case of only having few elements out of place. The disadvantages are that it doesn't allow growing the pool at the same time and that it may involve twice the number of item-copies in the worst case. I have chosen to implement the second option, but if you think that it is better the first one I can rewrite the series for it. (^_^) The worst case I have mentioned is this: Imagine that you have a series of items in which the first is, at least, 1 'unit' smaller than the rest. You now free this item and create a new one with the same size [why would anyone do this? I don't know] For now, the defragmenter code is so dumb that it will move every item to the front of the pool without trying first to put this new item in the available space. Hopefully situations like this won't be very common. If you want me to explain any detail about any of the patches just ask. And as said, if you prefer the first version of the defragmenter, just ask. [In fact, after having written this, I may add it for the case grow+defrag] Also, no regressions found in piglit. Thanks in advance! Bruno Bruno Jiménez (5): r600g/compute: Add a function for moving items in the pool r600g/compute: Add a function for defragmenting the pool r600g/compute: Defrag the pool if it's necesary r600g/compute: Quick exit if there's nothing to add to the pool r600g/compute: Remove unneeded code from compute_memory_promote_item src/gallium/drivers/r600/compute_memory_pool.c | 196 ++--- src/gallium/drivers/r600/compute_memory_pool.h | 13 +- 2 files changed, 156 insertions(+), 53 deletions(-) Hi, A took a brief look at these patches and they look pretty good. I will look at them again tomorrow and then commit if I don't see any issues. I've pushed these patches, thanks! -Tom Hi, Thanks, if you have any doubt about any of the patches just ask. I have just ended writing a follow up series for doing grow + defrag at the same time. I still have to test it, but if no problems arise I'll send it to the list as soon as possible. This new series is based on the patch that I sent here: http://lists.freedesktop.org/archives/mesa-dev/2014-July/062923.html If you think it's good, could you push it to master? Thanks in advance! Bruno -Tom -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders
On Tue, Jul 22, 2014 at 12:36:33AM +0200, Grigori Goronzy wrote: On 17.07.2014 21:24, Tom Stellard wrote: On Thu, Jul 17, 2014 at 06:44:25PM +0200, Grigori Goronzy wrote: Accuracy of some operations was recently improved in the R600 backend, at the cost of slower code. This is required for compute shaders, but not for graphics shaders. Add unsafe-fp-math hint to make LLVM generate faster but possibly less accurate code. Piglit didn't indicate any regressions. Both patches are: Reviewed-by: Tom Stellard thomas.stell...@amd.com Can you please commit the patches for me? My account request is still pending. I just pushed these, thanks! -Tom Grigori --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 1b17dd4..171ccaa 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -26,6 +26,7 @@ #include radeon_llvm_emit.h #include radeon_elf_util.h #include util/u_memory.h +#include pipe/p_shader_tokens.h #include llvm-c/Target.h #include llvm-c/TargetMachine.h @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) sprintf(Str, %1d, type); LLVMAddTargetDependentFunctionAttr(F, ShaderType, Str); + + if (type != TGSI_PROCESSOR_COMPUTE) { +LLVMAddTargetDependentFunctionAttr(F, unsafe-fp-math, true); + } } static void init_r600_target() { -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] r600g/compute: Allow compute_memory_move_item to move items between resources
On Sat, Jul 19, 2014 at 07:35:49PM +0200, Bruno Jiménez wrote: --- src/gallium/drivers/r600/compute_memory_pool.c | 43 ++ src/gallium/drivers/r600/compute_memory_pool.h | 1 + 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 254c1d7..1ad77ad 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -331,6 +331,7 @@ void compute_memory_defrag(struct compute_memory_pool *pool, struct pipe_context *pipe) { struct compute_memory_item *item; + struct pipe_resource *src = (struct pipe_resource *)pool-bo; int64_t last_pos; COMPUTE_DBG(pool-screen, * compute_memory_defrag()\n); @@ -340,7 +341,8 @@ void compute_memory_defrag(struct compute_memory_pool *pool, if (item-start_in_dw != last_pos) { assert(last_pos item-start_in_dw); - compute_memory_move_item(pool, item, last_pos, pipe); + compute_memory_move_item(pool, src, src, + item, last_pos, pipe); } last_pos += align(item-size_in_dw, ITEM_ALIGNMENT); @@ -431,7 +433,8 @@ void compute_memory_demote_item(struct compute_memory_pool *pool, } /** - * Moves the item \a item forward in the pool to \a new_start_in_dw + * Moves the item \a item forward from the resource \a src to the + * resource \a dst at \a new_start_in_dw * * This function assumes two things: * 1) The item is \b only moved forward @@ -442,13 +445,14 @@ void compute_memory_demote_item(struct compute_memory_pool *pool, * \see compute_memory_defrag */ void compute_memory_move_item(struct compute_memory_pool *pool, + struct pipe_resource *src, struct pipe_resource *dst, struct compute_memory_item *item, uint64_t new_start_in_dw, struct pipe_context *pipe) { struct pipe_screen *screen = (struct pipe_screen *)pool-screen; struct r600_context *rctx = (struct r600_context *)pipe; - struct pipe_resource *src = (struct pipe_resource *)pool-bo; - struct pipe_resource *dst; + struct pipe_resource *src_ = src; + struct pipe_resource *dst_; I think it is confusing to have variables named _src and src. Could you rename one of them to something more descriptive. struct pipe_box box; struct compute_memory_item *prev; @@ -465,34 +469,35 @@ void compute_memory_move_item(struct compute_memory_pool *pool, u_box_1d(item-start_in_dw * 4, item-size_in_dw * 4, box); - /* If the ranges don't overlap, we can just copy the item directly */ - if (new_start_in_dw + item-size_in_dw = item-start_in_dw) { - dst = (struct pipe_resource *)pool-bo; + /* If the ranges don't overlap, or we are copying from one resource + * to another, we can just copy the item directly */ + if (src != dst || new_start_in_dw + item-size_in_dw = item-start_in_dw) { + dst_ = dst; rctx-b.b.resource_copy_region(pipe, - dst, 0, new_start_in_dw * 4, 0, 0, - src, 0, box); + dst_, 0, new_start_in_dw * 4, 0, 0, + src_, 0, box); } else { /* The ranges overlap, we will try first to use an intermediate * resource to move the item */ - dst = (struct pipe_resource *)r600_compute_buffer_alloc_vram( + dst_ = (struct pipe_resource *)r600_compute_buffer_alloc_vram( pool-screen, item-size_in_dw * 4); - if (dst != NULL) { + if (dst_ != NULL) { rctx-b.b.resource_copy_region(pipe, - dst, 0, 0, 0, 0, - src, 0, box); + dst_, 0, 0, 0, 0, + src_, 0, box); - src = dst; - dst = (struct pipe_resource *)pool-bo; + src_ = dst_; + dst_ = dst; box.x = 0; rctx-b.b.resource_copy_region(pipe, - dst, 0, new_start_in_dw * 4, 0, 0, - src, 0, box); + dst_, 0, new_start_in_dw * 4, 0, 0, + src_, 0, box); - pool-screen-b.b.resource_destroy(screen, src); + pool-screen-b.b.resource_destroy(screen, src_); } else { /* The allocation of the temporary resource failed, @@ -505,7 +510,7 @@ void compute_memory_move_item(struct compute_memory_pool *pool, u_box_1d(new_start_in_dw * 4, (offset +
Re: [Mesa-dev] [PATCH 2/3] r600g/compute: Allow compute_memory_defrag to defragment between resources
On Sat, Jul 19, 2014 at 07:35:50PM +0200, Bruno Jiménez wrote: This will be used in the following patch to avoid duplicated code --- Reviewed-by: Tom Stellard thomas.stell...@amd.com src/gallium/drivers/r600/compute_memory_pool.c | 11 ++- src/gallium/drivers/r600/compute_memory_pool.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 1ad77ad..ca36240 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -293,7 +293,8 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, } if (pool-status POOL_FRAGMENTED) { - compute_memory_defrag(pool, pipe); + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + compute_memory_defrag(pool, src, src, pipe); } if (pool-size_in_dw allocated + unallocated) { @@ -328,20 +329,20 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, * \param pool The pool to be defragmented */ void compute_memory_defrag(struct compute_memory_pool *pool, + struct pipe_resource *src, struct pipe_resource *dst, struct pipe_context *pipe) { struct compute_memory_item *item; - struct pipe_resource *src = (struct pipe_resource *)pool-bo; int64_t last_pos; COMPUTE_DBG(pool-screen, * compute_memory_defrag()\n); last_pos = 0; LIST_FOR_EACH_ENTRY(item, pool-item_list, link) { - if (item-start_in_dw != last_pos) { - assert(last_pos item-start_in_dw); + if (src != dst || item-start_in_dw != last_pos) { + assert(last_pos = item-start_in_dw); - compute_memory_move_item(pool, src, src, + compute_memory_move_item(pool, src, dst, item, last_pos, pipe); } diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index 822bfbe..5f1d72b 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -91,6 +91,7 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, struct pipe_context * pipe); void compute_memory_defrag(struct compute_memory_pool *pool, + struct pipe_resource *src, struct pipe_resource *dst, struct pipe_context *pipe); int compute_memory_promote_item(struct compute_memory_pool *pool, -- 2.0.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] r600g/compute: Defrag the pool at the same time as we grow it
On Sat, Jul 19, 2014 at 07:35:51PM +0200, Bruno Jiménez wrote: This allows us two things: we now need less item copies when we have to defrag+grow the pool (to just one copy per item) and, even in the case where we don't need to defrag the pool, we reduce the data copied to just the useful data that the items use. Note: The fallback path is a bit ugly now, but hopefully we won't need it much. Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/r600/compute_memory_pool.c | 40 -- src/gallium/drivers/r600/compute_memory_pool.h | 2 +- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ca36240..32f5892 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -169,10 +169,12 @@ struct list_head *compute_memory_postalloc_chunk( * Reallocates pool, conserves data. * @returns -1 if it fails, 0 otherwise */ -int compute_memory_grow_pool(struct compute_memory_pool* pool, - struct pipe_context * pipe, int new_size_in_dw) +int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool, + struct pipe_context *pipe, int new_size_in_dw) { - COMPUTE_DBG(pool-screen, * compute_memory_grow_pool() + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); + + COMPUTE_DBG(pool-screen, * compute_memory_grow_defrag_pool() new_size_in_dw = %d (%d bytes)\n, new_size_in_dw, new_size_in_dw * 4); @@ -183,27 +185,17 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, } else { struct r600_resource *temp = NULL; - new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); - - COMPUTE_DBG(pool-screen, Aligned size = %d (%d bytes)\n, - new_size_in_dw, new_size_in_dw * 4); - temp = (struct r600_resource *)r600_compute_buffer_alloc_vram( pool-screen, new_size_in_dw * 4); if (temp != NULL) { - struct r600_context *rctx = (struct r600_context *)pipe; struct pipe_resource *src = (struct pipe_resource *)pool-bo; struct pipe_resource *dst = (struct pipe_resource *)temp; - struct pipe_box box; - COMPUTE_DBG(pool-screen, Growing the pool using a temporary resource\n); + COMPUTE_DBG(pool-screen, Growing and defragmenting the pool + using a temporary resource\n); - u_box_1d(0, pool-size_in_dw * 4, box); - - rctx-b.b.resource_copy_region(pipe, - dst, 0, 0, 0 ,0, - src, 0, box); + compute_memory_defrag(pool, src, dst, pipe); pool-screen-b.b.resource_destroy( (struct pipe_screen *)pool-screen, @@ -229,6 +221,11 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, pool-screen, pool-size_in_dw * 4); compute_memory_shadow(pool, pipe, 0); + + if (pool-status POOL_FRAGMENTED) { + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + compute_memory_defrag(pool, src, src, pipe); + } } } @@ -292,16 +289,15 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, return 0; } - if (pool-status POOL_FRAGMENTED) { - struct pipe_resource *src = (struct pipe_resource *)pool-bo; - compute_memory_defrag(pool, src, src, pipe); - } - if (pool-size_in_dw allocated + unallocated) { - err = compute_memory_grow_pool(pool, pipe, allocated + unallocated); + err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated); if (err == -1) return -1; } + else if (pool-status POOL_FRAGMENTED) { + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + compute_memory_defrag(pool, src, src, pipe); + } /* After defragmenting the pool, allocated is equal to the first available * position for new items in the pool */ diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index 5f1d72b..c7eb237 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -81,7 +81,7 @@ int64_t compute_memory_prealloc_chunk
[Mesa-dev] [PATCH 3/3] clover: Add checks for image support to the image functions
Most image functions are required to return a CL_INVALID_OPERATION error when used on devices without image support. --- src/gallium/state_trackers/clover/api/memory.cpp | 6 ++ src/gallium/state_trackers/clover/api/sampler.cpp | 3 +++ src/gallium/state_trackers/clover/api/transfer.cpp | 17 + src/gallium/state_trackers/clover/core/context.cpp | 9 + src/gallium/state_trackers/clover/core/context.hpp | 2 ++ 5 files changed, 37 insertions(+) diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp index d26b1c6..77f8b96 100644 --- a/src/gallium/state_trackers/clover/api/memory.cpp +++ b/src/gallium/state_trackers/clover/api/memory.cpp @@ -106,6 +106,9 @@ clCreateImage2D(cl_context d_ctx, cl_mem_flags flags, void *host_ptr, cl_int *r_errcode) try { auto ctx = obj(d_ctx); + if (!ctx.image_support()) + throw error(CL_INVALID_OPERATION); + if (flags ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) @@ -141,6 +144,9 @@ clCreateImage3D(cl_context d_ctx, cl_mem_flags flags, void *host_ptr, cl_int *r_errcode) try { auto ctx = obj(d_ctx); + if (!ctx.image_support()) + throw error(CL_INVALID_OPERATION); + if (flags ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp b/src/gallium/state_trackers/clover/api/sampler.cpp index 403892b..7f2e04d 100644 --- a/src/gallium/state_trackers/clover/api/sampler.cpp +++ b/src/gallium/state_trackers/clover/api/sampler.cpp @@ -31,6 +31,9 @@ clCreateSampler(cl_context d_ctx, cl_bool norm_mode, cl_int *r_errcode) try { auto ctx = obj(d_ctx); + if (!ctx.image_support()) + throw error(CL_INVALID_OPERATION); + ret_error(r_errcode, CL_SUCCESS); return new sampler(ctx, norm_mode, addr_mode, filter_mode); diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp index 404ceb0..da12d2b 100644 --- a/src/gallium/state_trackers/clover/api/transfer.cpp +++ b/src/gallium/state_trackers/clover/api/transfer.cpp @@ -457,6 +457,8 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, auto src_origin = vector(p_origin); auto src_pitch = pitch(region, {{ img.pixel_size(), img.row_pitch(), img.slice_pitch() }}); + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); validate_common(q, deps); validate_object(q, ptr, {}, dst_pitch, region); @@ -491,6 +493,9 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, auto src_pitch = pitch(region, {{ img.pixel_size(), row_pitch, slice_pitch }}); + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); + validate_common(q, deps); validate_object(q, img, dst_origin, region); validate_object(q, ptr, {}, src_pitch, region); @@ -522,6 +527,9 @@ clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, auto dst_origin = vector(p_dst_origin); auto src_origin = vector(p_src_origin); + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); + validate_common(q, deps); validate_object(q, dst_img, dst_origin, region); validate_object(q, src_img, src_origin, region); @@ -559,6 +567,9 @@ clEnqueueCopyImageToBuffer(cl_command_queue d_q, src_img.row_pitch(), src_img.slice_pitch() }}); + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); + validate_common(q, deps); validate_object(q, dst_mem, dst_origin, dst_pitch, region); validate_object(q, src_img, src_origin, region); @@ -595,6 +606,9 @@ clEnqueueCopyBufferToImage(cl_command_queue d_q, vector_t src_origin = { src_offset }; auto src_pitch = pitch(region, {{ dst_img.pixel_size() }}); + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); + validate_common(q, deps); validate_object(q, dst_img, dst_origin, region); validate_object(q, src_mem, src_origin, src_pitch, region); @@ -651,6 +665,9 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, auto region = vector(p_region); auto origin = vector(p_origin); + if (!q.device().image_support()) + throw error(CL_INVALID_OPERATION); + validate_common(q, deps); validate_object(q, img, origin, region); diff --git a/src/gallium/state_trackers/clover/core/context.cpp b/src/gallium/state_trackers/clover/core/context.cpp index bf4df39..722c97d 100644 ---
[Mesa-dev] [PATCH 1/3] gallium: Add PIPE_CAP_COMPUTE_IMAGES_SUPPORTED
--- src/gallium/docs/source/screen.rst| 2 ++ src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++ src/gallium/include/pipe/p_defines.h | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index ba583fe..830a1a5 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -332,6 +332,8 @@ pipe_screen::get_compute_param. clock in MHz. Value type: ``uint32_t`` * ``PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS``: Maximum number of compute units Value type: ``uint32_t`` +* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported + non-zero means yes, zero means no. Value type: ``uint32_t`` .. _pipe_bind: diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 6535992..bf0585d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -519,6 +519,13 @@ static int r600_get_compute_param(struct pipe_screen *screen, *max_compute_units = MAX2(rscreen-info.max_compute_units, 1); } return sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + if (ret) { + uint32_t *images_supported = ret; + *images_supported = 0; + } + return sizeof(uint32_t); } fprintf(stderr, unknown PIPE_COMPUTE_CAP %d\n, param); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index d9b6e5a..43bb1f5 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -650,7 +650,8 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_MAX_INPUT_SIZE, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY, - PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS + PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS, + PIPE_COMPUTE_CAP_IMAGES_SUPPORTED }; /** -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] clover: Query the device to see if images are supported
--- src/gallium/state_trackers/clover/api/device.cpp | 2 +- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 97b2cf9..e825468 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -184,7 +184,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_IMAGE_SUPPORT: - buf.as_scalarcl_bool() = CL_TRUE; + buf.as_scalarcl_bool() = dev.image_support(); break; case CL_DEVICE_MAX_PARAMETER_SIZE: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index b6078db..63aa193 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -169,6 +169,12 @@ device::max_compute_units() const { PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0]; } +bool +device::image_support() const { + return get_compute_paramuint32_t(pipe, + PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0]; +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 731c31e..2201700 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -63,6 +63,7 @@ namespace clover { cl_ulong max_mem_alloc_size() const; cl_uint max_clock_frequency() const; cl_uint max_compute_units() const; + bool image_support() const; std::vectorsize_t max_block_size() const; std::string device_name() const; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] gallium: Add PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE
--- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/include/pipe/p_defines.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 830a1a5..219c9f9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -334,6 +334,8 @@ pipe_screen::get_compute_param. Value type: ``uint32_t`` * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported non-zero means yes, zero means no. Value type: ``uint32_t`` +* ``PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE``: The maximum size in bytes + of a constant buffer. Value type: ``uint64_t`` .. _pipe_bind: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 43bb1f5..78709b9 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -651,7 +651,8 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY, PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS, - PIPE_COMPUTE_CAP_IMAGES_SUPPORTED + PIPE_COMPUTE_CAP_IMAGES_SUPPORTED, + PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE }; /** -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] radeon/compute: Return a value for PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE
--- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index bf0585d..2ea8f3d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -492,6 +492,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, } return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE: case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: if (ret) { uint64_t max_global_size; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] clover: Use correct query for CL_MAX_CONSTANT_BUFFER_SIZE
--- src/gallium/state_trackers/clover/core/device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 63aa193..ada5267 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -135,8 +135,8 @@ device::max_mem_input() const { cl_ulong device::max_const_buffer_size() const { - return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, - PIPE_SHADER_CAP_MAX_CONSTS) * 16; + return get_compute_paramuint64_t(pipe, + PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE)[0]; } cl_uint -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] radeonsi/compute: Bump number of user sgprs for LLVM 3.5
--- src/gallium/drivers/radeonsi/si_compute.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3a9f00f..a7d61e7 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -32,6 +32,11 @@ #include radeon_llvm_util.h #define MAX_GLOBAL_BUFFERS 20 +#if HAVE_LLVM 0x0305 +#define NUM_USER_SGPRS 2 +#else +#define NUM_USER_SGPRS 4 +#endif struct si_pipe_compute { struct si_context *ctx; @@ -132,7 +137,7 @@ static void si_launch_grid( uint32_t *kernel_args; uint64_t kernel_args_va; uint64_t shader_va; - unsigned arg_user_sgpr_count = 2; + unsigned arg_user_sgpr_count = NUM_USER_SGPRS; unsigned i; struct si_pipe_shader *shader = program-kernels[pc]; unsigned lds_blocks; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] winsys/radeon: Query the kernel for the number of SEs and SHs per SE
--- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 576fea5..7cda70a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -389,6 +389,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws-fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, ws-info.max_compute_units); +radeon_get_drm_value(ws-fd, RADEON_INFO_MAX_SE, NULL, + ws-info.max_se); + +radeon_get_drm_value(ws-fd, RADEON_INFO_MAX_SH_PER_SE, NULL, + ws-info.max_sh_per_se); + if (radeon_get_drm_value(ws-fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, ws-info.si_tile_mode_array)) { ws-info.si_tile_mode_array_valid = TRUE; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 6df1987..a63a50b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -199,6 +199,8 @@ struct radeon_info { uint32_tvram_size; uint32_tmax_sclk; uint32_tmax_compute_units; +uint32_tmax_se; +uint32_tmax_sh_per_se; uint32_tdrm_major; /* version */ uint32_tdrm_minor; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] radeonsi/compute: Add support scratch buffer support
The scratch buffer will be used for private memory and also register spilling. --- src/gallium/drivers/radeonsi/si_compute.c | 85 ++- src/gallium/drivers/radeonsi/si_shader.c | 5 ++ src/gallium/drivers/radeonsi/si_shader.h | 2 + 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index a7d61e7..d6cbbf4 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -122,6 +122,43 @@ static void si_set_global_binding( } } +/** + * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES + * /p block_layout is the number of threads in each work group. + * /p grid layout is the number of work groups. + */ +static unsigned compute_num_waves_for_scratch( + const struct radeon_info *info; + const uint *block_layout, + const uint *grid_layout) +{ + unsigned num_sh = MAX2(info-max_sh_per_se, 1); + unsigned num_se = MAX2(info-max_se, 1); + unsigned num_blocks = 1; + unsigned threads_per_block = 1; + unsigned waves_per_block; + unsigned waves_per_sh; + unsigned waves; + unsigned scratch_waves; + unsigned i; + + for (i = 0; i 3; i++) { + threads_per_block *= block_layout[i]; + num_blocks *= grid_layout[i]; + } + + waves_per_block = align(threads_per_block, 64) / 64; + waves = waves_per_block * num_blocks; + waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se); + scratch_waves = waves_per_sh * num_sh * num_se; + + if (waves_per_block waves_per_sh) { + scratch_waves = waves_per_block * num_sh * num_se; + } + + return scratch_waves; +} + static void si_launch_grid( struct pipe_context *ctx, const uint *block_layout, const uint *grid_layout, @@ -134,13 +171,16 @@ static void si_launch_grid( unsigned kernel_args_size; unsigned num_work_size_bytes = 36; uint32_t kernel_args_offset = 0; + uint32_t scratch_offset = 0; uint32_t *kernel_args; uint64_t kernel_args_va; + uint64_t scratch_buffer_va = 0; uint64_t shader_va; unsigned arg_user_sgpr_count = NUM_USER_SGPRS; unsigned i; struct si_pipe_shader *shader = program-kernels[pc]; unsigned lds_blocks; + unsigned num_waves_for_scratch; pm4-compute_pkt = true; si_cmd_context_control(pm4); @@ -158,7 +198,9 @@ static void si_launch_grid( /* Upload the kernel arguments */ /* The extra num_work_size_bytes are for work group / work item size information */ - kernel_args_size = program-input_size + num_work_size_bytes; + kernel_args_size = program-input_size + num_work_size_bytes + 8 /* For scratch va */; + scratch_offset = program-input_size + num_work_size_bytes; + kernel_args = MALLOC(kernel_args_size); for (i = 0; i 3; i++) { kernel_args[i] = grid_layout[i]; @@ -166,8 +208,34 @@ static void si_launch_grid( kernel_args[i + 6] = block_layout[i]; } + num_waves_for_scratch = compute_num_waves_for_scratch( + stcx-screen.info, block_layout, grid_layout); + memcpy(kernel_args + (num_work_size_bytes / 4), input, program-input_size); + if (shader-scratch_bytes_per_wave 0) { + float *ptr; + + COMPUTE_DBG(sctx-screen, Waves: %u; Scratch per wave: %u bytes; + Total Scratch: %u bytes\n, num_waves_for_scratch, + shader-scratch_bytes_per_wave, info.width0); + if (!shader-scratch_bo) { + shader-scratch_bo = (struct r600_resource*) + si_resource_create_custom(sctx-b.b.screen, + PIPE_USAGE_DEFAULT, info.width0); + } + ptr = sctx-b.ws-buffer_map(shader-scratch_bo-cs_buf, sctx-b.rings.gfx.cs, + PIPE_TRANSFER_READ); + scratch_buffer_va = r600_resource_va(ctx-screen, + (struct pipe_resource*)shader-scratch_bo); + si_pm4_add_bo(pm4, shader-scratch_bo, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW); + + } + memcpy(kernel_args + (scratch_offset / 4), scratch_buffer_va, + sizeof(scratch_buffer_va)); + for (i = 0; i (kernel_args_size / 4); i++) { COMPUTE_DBG(sctx-screen, input %u : %u\n, i, kernel_args[i]); @@ -183,6 +251,10 @@ static void si_launch_grid( si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); si_pm4_set_reg(pm4,
[Mesa-dev] [PATCH 3/3] radeonsi: Read rodata from ELF and append it to the end of shaders
The is used for programs that have arrays of constants that are accessed using dynamic indices. The shader will compute the base address of the constants and then access them using SMRD instructions. --- src/gallium/drivers/radeon/r600_pipe_common.h | 5 + src/gallium/drivers/radeon/radeon_elf_util.c | 5 + src/gallium/drivers/radeonsi/si_shader.c | 16 +--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index d82adf5..8f1a0a5 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -108,6 +108,11 @@ struct radeon_shader_binary { unsigned char *config; unsigned config_size; + /** Constant data accessed by the shader. This will be uploaded +* into a constant buffer. */ + unsigned char *rodata; + unsigned rodata_size; + /** Set to 1 if the disassembly for this binary has been dumped to * stderr. */ int disassembled; diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c index 7d92962..7c5f93e 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.c +++ b/src/gallium/drivers/radeon/radeon_elf_util.c @@ -80,6 +80,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size, fprintf(stderr, \nShader Disassembly:\n\n); fprintf(stderr, %.*s\n, (int)section_data-d_size, (char *)section_data-d_buf); + } else if (!strncmp(name, .rodata, 7)) { + section_data = elf_getdata(section, section_data); + binary-rodata_size = section_data-d_size; + binary-rodata = MALLOC(binary-rodata_size * sizeof(unsigned char)); + memcpy(binary-rodata, section_data-d_buf, binary-rodata_size); } } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8593bca..641e563 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2499,11 +2499,12 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, { unsigned r; /* llvm_compile result */ unsigned i; - uint32_t *ptr; + unsigned char *ptr; struct radeon_shader_binary binary; bool dump = r600_can_dump_shader(sctx-screen-b, shader-selector ? shader-selector-tokens : NULL); const char * gpu_family = r600_get_llvm_processor_name(sctx-screen-b.family); + unsigned code_size; /* Use LLVM to compile shader */ memset(binary, 0, sizeof(binary)); @@ -2551,19 +2552,28 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, } /* copy new shader */ + code_size = binary.code_size + binary.rodata_size; r600_resource_reference(shader-bo, NULL); shader-bo = si_resource_create_custom(sctx-b.b.screen, PIPE_USAGE_IMMUTABLE, - binary.code_size); + code_size); if (shader-bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + ptr = sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs, + PIPE_TRANSFER_WRITE); util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size); + /* Copy read only data if any. */ + if (binary.rodata_size 0) { + ptr += binary.code_size; + util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size); + } + sctx-b.ws-buffer_unmap(shader-bo-cs_buf); free(binary.code); free(binary.config); + free(binary.rodata); return r; } -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] util: Add util_memcpy_cpu_to_le32() v2
v2: - Preserve word boundaries. --- src/gallium/auxiliary/util/u_math.h | 17 + 1 file changed, 17 insertions(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b9ed197..5de181a 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -812,6 +812,23 @@ util_bswap16(uint16_t n) (n 8); } +static INLINE void* +util_memcpy_cpu_to_le32(void *dest, void *src, size_t n) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + size_t i, e; + asset(n % 4 == 0); + + for (i = 0, e = n / 4; i e; i++) { + uint32_t *d = (uint32_t*)dest; + uint32_t *s = (uint32_t*)src; + d[i] = util_bswap32(s[i]); + } + return dest; +#else + return memcpy(dest, src, n); +#endif +} /** * Clamp X to [MIN, MAX]. -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] radeonsi: Use util_memcpy_cpu_to_le32()
--- src/gallium/drivers/radeonsi/si_descriptors.c | 4 +--- src/gallium/drivers/radeonsi/si_shader.c | 8 +--- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 38ad077..41c1b67 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -451,9 +451,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf return; } - for (i = 0; i size / 4; ++i) { - tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]); - } + util_memcpy_cpu_to_le32(tmpPtr, ptr, size); u_upload_data(sctx-b.uploader, 0, size, tmpPtr, const_offset, (struct pipe_resource**)rbuffer); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f0650f4..8593bca 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, } ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (SI_BIG_ENDIAN) { - for (i = 0; i binary.code_size / 4; ++i) { - ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + i*4))); - } - } else { - memcpy(ptr, binary.code, binary.code_size); - } + util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size); sctx-b.ws-buffer_unmap(shader-bo-cs_buf); free(binary.code); -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi/compute: Share COMPUTE_DBG macro with r600g
--- src/gallium/drivers/r600/evergreen_compute.h | 13 - src/gallium/drivers/radeon/r600_pipe_common.h | 5 + src/gallium/drivers/radeonsi/si_compute.c | 5 + 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index e760790..4fb53a1 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -59,17 +59,4 @@ void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pi void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level, unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride); - -static inline void COMPUTE_DBG(struct r600_screen *rscreen, const char *fmt, ...) -{ - if (!(rscreen-b.debug_flags DBG_COMPUTE)) { - return; - } - - va_list ap; - va_start(ap, fmt); - _debug_vprintf(fmt, ap); - va_end(ap); -} - #endif diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 8f1a0a5..cee9622 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -515,6 +515,11 @@ static inline unsigned r600_tex_aniso_filter(unsigned filter) /* else */return 4; } +#define COMPUTE_DBG(rscreen, fmt, args...) \ + do { \ + if ((rscreen-b.debug_flags DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ + } while (0); + #define R600_ERR(fmt, args...) \ fprintf(stderr, EE %s:%d %s - fmt, __FILE__, __LINE__, __func__, ##args) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index c0637f6..3a9f00f 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -163,6 +163,11 @@ static void si_launch_grid( memcpy(kernel_args + (num_work_size_bytes / 4), input, program-input_size); + for (i = 0; i (kernel_args_size / 4); i++) { + COMPUTE_DBG(sctx-screen, input %u : %u\n, i, + kernel_args[i]); + } + si_upload_const_buffer(sctx, kernel_args_buffer, (uint8_t*)kernel_args, kernel_args_size, kernel_args_offset); kernel_args_va = r600_resource_va(ctx-screen, -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] util: Add util_memcpy_cpu_to_le32() v3
v2: - Preserve word boundaries. v3: - Use const and restrict. - Fix indentation. --- src/gallium/auxiliary/util/u_math.h | 17 + 1 file changed, 17 insertions(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b9ed197..f6dcb22 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -812,6 +812,23 @@ util_bswap16(uint16_t n) (n 8); } +static INLINE void* +util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + size_t i, e; + asset(n % 4 == 0); + + for (i = 0, e = n / 4; i e; i++) { + uint32_t * restrict d = (uint32_t* restrict)dest; + const uint32_t * restrict s = (const uint32_t* restrict)src; + d[i] = util_bswap32(s[i]); + } + return dest; +#else + return memcpy(dest, src, n); +#endif +} /** * Clamp X to [MIN, MAX]. -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi/compute: Add support scratch buffer support v2
The scratch buffer will be used for private memory and also register spilling. v2: - Code cleanups --- I had some uncommitted changes left in my tree when I generated v1 of this patch. src/gallium/drivers/radeonsi/si_compute.c | 80 ++- src/gallium/drivers/radeonsi/si_shader.c | 5 ++ src/gallium/drivers/radeonsi/si_shader.h | 2 + 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index a7d61e7..42e4fec 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -122,6 +122,43 @@ static void si_set_global_binding( } } +/** + * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES + * /p block_layout is the number of threads in each work group. + * /p grid layout is the number of work groups. + */ +static unsigned compute_num_waves_for_scratch( + const struct radeon_info *info, + const uint *block_layout, + const uint *grid_layout) +{ + unsigned num_sh = MAX2(info-max_sh_per_se, 1); + unsigned num_se = MAX2(info-max_se, 1); + unsigned num_blocks = 1; + unsigned threads_per_block = 1; + unsigned waves_per_block; + unsigned waves_per_sh; + unsigned waves; + unsigned scratch_waves; + unsigned i; + + for (i = 0; i 3; i++) { + threads_per_block *= block_layout[i]; + num_blocks *= grid_layout[i]; + } + + waves_per_block = align(threads_per_block, 64) / 64; + waves = waves_per_block * num_blocks; + waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se); + scratch_waves = waves_per_sh * num_sh * num_se; + + if (waves_per_block waves_per_sh) { + scratch_waves = waves_per_block * num_sh * num_se; + } + + return scratch_waves; +} + static void si_launch_grid( struct pipe_context *ctx, const uint *block_layout, const uint *grid_layout, @@ -136,11 +173,13 @@ static void si_launch_grid( uint32_t kernel_args_offset = 0; uint32_t *kernel_args; uint64_t kernel_args_va; + uint64_t scratch_buffer_va = 0; uint64_t shader_va; unsigned arg_user_sgpr_count = NUM_USER_SGPRS; unsigned i; struct si_pipe_shader *shader = program-kernels[pc]; unsigned lds_blocks; + unsigned num_waves_for_scratch; pm4-compute_pkt = true; si_cmd_context_control(pm4); @@ -158,7 +197,8 @@ static void si_launch_grid( /* Upload the kernel arguments */ /* The extra num_work_size_bytes are for work group / work item size information */ - kernel_args_size = program-input_size + num_work_size_bytes; + kernel_args_size = program-input_size + num_work_size_bytes + 8 /* For scratch va */; + kernel_args = MALLOC(kernel_args_size); for (i = 0; i 3; i++) { kernel_args[i] = grid_layout[i]; @@ -166,8 +206,31 @@ static void si_launch_grid( kernel_args[i + 6] = block_layout[i]; } + num_waves_for_scratch = compute_num_waves_for_scratch( + sctx-screen-b.info, block_layout, grid_layout); + memcpy(kernel_args + (num_work_size_bytes / 4), input, program-input_size); + if (shader-scratch_bytes_per_wave 0) { + unsigned scratch_bytes = shader-scratch_bytes_per_wave * + num_waves_for_scratch; + + COMPUTE_DBG(sctx-screen, Waves: %u; Scratch per wave: %u bytes; + Total Scratch: %u bytes\n, num_waves_for_scratch, + shader-scratch_bytes_per_wave, scratch_bytes); + if (!shader-scratch_bo) { + shader-scratch_bo = (struct r600_resource*) + si_resource_create_custom(sctx-b.b.screen, + PIPE_USAGE_DEFAULT, scratch_bytes); + } + scratch_buffer_va = r600_resource_va(ctx-screen, + (struct pipe_resource*)shader-scratch_bo); + si_pm4_add_bo(pm4, shader-scratch_bo, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW); + + } + for (i = 0; i (kernel_args_size / 4); i++) { COMPUTE_DBG(sctx-screen, input %u : %u\n, i, kernel_args[i]); @@ -183,6 +246,10 @@ static void si_launch_grid( si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va 32) | S_008F04_STRIDE(0)); + si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, scratch_buffer_va); + si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0
Re: [Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders
On Thu, Jul 17, 2014 at 06:44:25PM +0200, Grigori Goronzy wrote: Accuracy of some operations was recently improved in the R600 backend, at the cost of slower code. This is required for compute shaders, but not for graphics shaders. Add unsafe-fp-math hint to make LLVM generate faster but possibly less accurate code. Piglit didn't indicate any regressions. Both patches are: Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 1b17dd4..171ccaa 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -26,6 +26,7 @@ #include radeon_llvm_emit.h #include radeon_elf_util.h #include util/u_memory.h +#include pipe/p_shader_tokens.h #include llvm-c/Target.h #include llvm-c/TargetMachine.h @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) sprintf(Str, %1d, type); LLVMAddTargetDependentFunctionAttr(F, ShaderType, Str); + + if (type != TGSI_PROCESSOR_COMPUTE) { +LLVMAddTargetDependentFunctionAttr(F, unsafe-fp-math, true); + } } static void init_r600_target() { -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] [RFC] r600g/compute: Adding support for defragmenting compute_memory_pool
On Wed, Jul 16, 2014 at 11:12:42PM +0200, Bruno Jiménez wrote: Hi, This series finally adds support for defragmenting the pool for OpenCL buffers in the r600g driver. It is mostly a rewritten of the series that I wrote some months ago. For defragmenting the pool I have thought of two different possibilities: - Creating a new pool and moving every item here in the correct position. This has the advantage of being very simple to implement and that it allows the pool to be grown at the same time. But it has a couple of problems, namely that it has a high memory peak usage (sum of current pool + new pool) and that in the case of having a pool not very fragmented you have to copy every item to its new place. - Using the same pool by moving the items in it. This has the advantage of using less memory (sum of current pool + biggest item in it) and that it is easier to handle the case of only having few elements out of place. The disadvantages are that it doesn't allow growing the pool at the same time and that it may involve twice the number of item-copies in the worst case. I have chosen to implement the second option, but if you think that it is better the first one I can rewrite the series for it. (^_^) The worst case I have mentioned is this: Imagine that you have a series of items in which the first is, at least, 1 'unit' smaller than the rest. You now free this item and create a new one with the same size [why would anyone do this? I don't know] For now, the defragmenter code is so dumb that it will move every item to the front of the pool without trying first to put this new item in the available space. Hopefully situations like this won't be very common. If you want me to explain any detail about any of the patches just ask. And as said, if you prefer the first version of the defragmenter, just ask. [In fact, after having written this, I may add it for the case grow+defrag] Also, no regressions found in piglit. Thanks in advance! Bruno Bruno Jiménez (5): r600g/compute: Add a function for moving items in the pool r600g/compute: Add a function for defragmenting the pool r600g/compute: Defrag the pool if it's necesary r600g/compute: Quick exit if there's nothing to add to the pool r600g/compute: Remove unneeded code from compute_memory_promote_item src/gallium/drivers/r600/compute_memory_pool.c | 196 ++--- src/gallium/drivers/r600/compute_memory_pool.h | 13 +- 2 files changed, 156 insertions(+), 53 deletions(-) Hi, A took a brief look at these patches and they look pretty good. I will look at them again tomorrow and then commit if I don't see any issues. -Tom -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] clover: Call end_query before getting timestamp result
Also change the wait parameter from false to true. --- I'm really not sure what is correct here, but this patch fixes event profiling on SI. src/gallium/state_trackers/clover/core/timestamp.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/core/timestamp.cpp b/src/gallium/state_trackers/clover/core/timestamp.cpp index 481c4f9..a6edaf6 100644 --- a/src/gallium/state_trackers/clover/core/timestamp.cpp +++ b/src/gallium/state_trackers/clover/core/timestamp.cpp @@ -47,7 +47,8 @@ cl_ulong timestamp::query::operator()() const { pipe_query_result result; - if (!q().pipe-get_query_result(q().pipe, _query, false, result)) + q().pipe-end_query(q().pipe, _query); + if (!q().pipe-get_query_result(q().pipe, _query, true, result)) throw error(CL_PROFILING_INFO_NOT_AVAILABLE); return result.u64; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] clover: Use 1 as default value for CL_DEVICE_PROFILING_TIMER_RESOLUTION
We use PIPE_QUERY_TIMESTAMP for profiling events, and gallium specifies that the timestamp be in nanoseconds. --- src/gallium/state_trackers/clover/api/device.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 1176668..25d29f5 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -249,7 +249,9 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PROFILING_TIMER_RESOLUTION: - buf.as_scalarsize_t() = 0; + // PIPE_QUERY_TIMESTAMP returns a timestamp in units of nanoseconds, + // so we default to 1 here. + buf.as_scalarsize_t() = 1; break; case CL_DEVICE_ENDIAN_LITTLE: -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] clover: Call end_query before getting timestamp result v2
v2: - Move the end_query() call into the timestamp constructor. - Still pass false as the wait parameter to get_query_result(). --- src/gallium/state_trackers/clover/core/timestamp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/clover/core/timestamp.cpp b/src/gallium/state_trackers/clover/core/timestamp.cpp index 481c4f9..3fd341f 100644 --- a/src/gallium/state_trackers/clover/core/timestamp.cpp +++ b/src/gallium/state_trackers/clover/core/timestamp.cpp @@ -30,6 +30,7 @@ using namespace clover; timestamp::query::query(command_queue q) : q(q), _query(q.pipe-create_query(q.pipe, PIPE_QUERY_TIMESTAMP, 0)) { + q.pipe-end_query(q.pipe, _query); } timestamp::query::query(query other) : -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] util: Add util_memcpy_cpu_to_le()
--- src/gallium/auxiliary/util/u_math.h | 22 ++ src/gallium/drivers/radeonsi/si_shader.c | 8 +--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b9ed197..cd3cf04 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -812,6 +812,28 @@ util_bswap16(uint16_t n) (n 8); } +static INLINE void* +util_memcpy_cpu_to_le(void *dest, void *src, size_t n) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + size_t i, e; + for (i = 0, e = n % 8; i e; i++) { + char *d = (char*)dest; + char *s = (char*)src; + d[i] = s[e - i - 1]; + } + dest += i; + n -= i; + for (i = 0, e = n / 8; i e; i++) { + uint64_t *d = (uint64_t*)dest; + uint64_t *s = (uint64_t*)src; + d[i] = util_bswap64(s[e - i - 1]); + } + return dest; +#else + return memcpy(dest, src, n); +#endif +} /** * Clamp X to [MIN, MAX]. diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f0650f4..6f0504b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, } ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (SI_BIG_ENDIAN) { - for (i = 0; i binary.code_size / 4; ++i) { - ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + i*4))); - } - } else { - memcpy(ptr, binary.code, binary.code_size); - } + util_memcpy_cpu_to_le(ptr, binary.code, binary.code_size); sctx-b.ws-buffer_unmap(shader-bo-cs_buf); free(binary.code); -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] radeonsi: Read rodata from ELF and append it to the end of shaders
The is used for programs that have arrays of constants that are accessed using dynamic indices. The shader will compute the base address of the constants and then access them using SMRD instructions. --- src/gallium/drivers/radeon/r600_pipe_common.h | 5 + src/gallium/drivers/radeon/radeon_elf_util.c | 5 + src/gallium/drivers/radeonsi/si_shader.c | 16 +--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index d82adf5..8f1a0a5 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -108,6 +108,11 @@ struct radeon_shader_binary { unsigned char *config; unsigned config_size; + /** Constant data accessed by the shader. This will be uploaded +* into a constant buffer. */ + unsigned char *rodata; + unsigned rodata_size; + /** Set to 1 if the disassembly for this binary has been dumped to * stderr. */ int disassembled; diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c index 7d92962..7c5f93e 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.c +++ b/src/gallium/drivers/radeon/radeon_elf_util.c @@ -80,6 +80,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size, fprintf(stderr, \nShader Disassembly:\n\n); fprintf(stderr, %.*s\n, (int)section_data-d_size, (char *)section_data-d_buf); + } else if (!strncmp(name, .rodata, 7)) { + section_data = elf_getdata(section, section_data); + binary-rodata_size = section_data-d_size; + binary-rodata = MALLOC(binary-rodata_size * sizeof(unsigned char)); + memcpy(binary-rodata, section_data-d_buf, binary-rodata_size); } } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6f0504b..f07dbab 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2499,11 +2499,12 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, { unsigned r; /* llvm_compile result */ unsigned i; - uint32_t *ptr; + unsigned char *ptr; struct radeon_shader_binary binary; bool dump = r600_can_dump_shader(sctx-screen-b, shader-selector ? shader-selector-tokens : NULL); const char * gpu_family = r600_get_llvm_processor_name(sctx-screen-b.family); + unsigned code_size; /* Use LLVM to compile shader */ memset(binary, 0, sizeof(binary)); @@ -2551,19 +2552,28 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, } /* copy new shader */ + code_size = binary.code_size + binary.rodata_size; r600_resource_reference(shader-bo, NULL); shader-bo = si_resource_create_custom(sctx-b.b.screen, PIPE_USAGE_IMMUTABLE, - binary.code_size); + code_size); if (shader-bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + ptr = sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs, + PIPE_TRANSFER_WRITE); util_memcpy_cpu_to_le(ptr, binary.code, binary.code_size); + /* Copy read only data if any. */ + if (binary.rodata_size 0) { + ptr += binary.code_size; + util_memcpy_cpu_to_le(ptr, binary.rodata, binary.rodata_size); + } + sctx-b.ws-buffer_unmap(shader-bo-cs_buf); free(binary.code); free(binary.config); + free(binary.rodata); return r; } -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeon/llvm: Fix LLVM diagnostic error reporting
We were trying to print the error message after disposing the message object. --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 11 --- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 6a394b2..1b17dd4 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -83,16 +83,13 @@ static LLVMTargetRef get_r600_target() { #if HAVE_LLVM = 0x0305 static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) { - unsigned int *diagnosticflag; - char *diaginfo_message; - - diaginfo_message = LLVMGetDiagInfoDescription(di); - LLVMDisposeMessage(diaginfo_message); - - diagnosticflag = (unsigned int *)context; if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) { + unsigned int *diagnosticflag = (unsigned int *)context; + char *diaginfo_message = LLVMGetDiagInfoDescription(di); + *diagnosticflag = 1; fprintf(stderr,LLVM triggered Diagnostic Handler: %s\n, diaginfo_message); + LLVMDisposeMessage(diaginfo_message); } } -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 2/2] r600g/compute: Add debug information to promote and demote functions
On Fri, Jul 11, 2014 at 10:20:54AM +0200, Bruno Jiménez wrote: v2: Add information about the item's starting point and size Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/r600/compute_memory_pool.c | 9 + 1 file changed, 9 insertions(+) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 1d0ec85..6a525cf 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -339,6 +339,10 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, int64_t start_in_dw; int err = 0; + COMPUTE_DBG(pool-screen, * compute_memory_promote_item()\n + + Promoting Item: %i , starting at: %u (%u bytes) + size: %u (%u bytes)\n, item-id, item-start_in_dw, + item-start_in_dw * 4, item-size_in_dw, item-size_in_dw * 4); /* Search for free space in the pool for this item. */ while ((start_in_dw=compute_memory_prealloc_chunk(pool, @@ -409,6 +413,11 @@ void compute_memory_demote_item(struct compute_memory_pool *pool, struct pipe_resource *dst; struct pipe_box box; + COMPUTE_DBG(pool-screen, * compute_memory_demote_item()\n + + Demoting Item: %i, starting at: %u (%u bytes) + size: %u (%u bytes)\n, item-id, item-start_in_dw, + item-start_in_dw * 4, item-size_in_dw, item-size_in_dw * 4); + /* First, we remove the item from the item_list */ list_del(item-link); -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] radeonsi: switch descriptors to i32 vectors
On Fri, Jul 11, 2014 at 01:00:34AM +0200, Marek Olšák wrote: I have just tested it and it works with LLVM 3.4.2. Ok, thanks. Both patches are: Reviewed-by: Tom Stellard thomas.stell...@amd.com Marek On Thu, Jul 10, 2014 at 5:11 PM, Tom Stellard t...@stellard.net wrote: On Tue, Jul 08, 2014 at 01:37:02AM +0200, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com This is a follow-up to the commit which adds texture fetches with offsets. Will this still work with LLVM 3.4.2 ? -Tom --- src/gallium/drivers/radeonsi/si_shader.c | 29 - 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3dd6ad1..a28d682 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1574,7 +1574,7 @@ static void tex_fetch_args( LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm-context); LLVMTypeRef v16i8 = LLVMVectorType(i8, 16); - /* Truncate v32i8 to v16i8. */ + /* Bitcast and truncate v8i32 to v16i8. */ LLVMValueRef res = si_shader_ctx-resources[sampler_index]; res = LLVMBuildBitCast(gallivm-builder, res, v2i128, ); res = LLVMBuildExtractElement(gallivm-builder, res, bld_base-uint_bld.zero, ); @@ -2305,12 +2305,18 @@ static void create_meta_data(struct si_shader_context *si_shader_ctx) si_shader_ctx-const_md = LLVMMDNodeInContext(gallivm-context, args, 3); } +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) +{ + return LLVMPointerType(LLVMArrayType(elem_type, num_elements), +CONST_ADDR_SPACE); +} + static void create_function(struct si_shader_context *si_shader_ctx) { struct lp_build_tgsi_context *bld_base = si_shader_ctx-radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base-base.gallivm; struct si_pipe_shader *shader = si_shader_ctx-shader; - LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32; + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32; unsigned i, last_sgpr, num_params; i8 = LLVMInt8TypeInContext(gallivm-context); @@ -2318,21 +2324,18 @@ static void create_function(struct si_shader_context *si_shader_ctx) f32 = LLVMFloatTypeInContext(gallivm-context); v2i32 = LLVMVectorType(i32, 2); v3i32 = LLVMVectorType(i32, 3); + v4i32 = LLVMVectorType(i32, 4); + v8i32 = LLVMVectorType(i32, 8); + v16i8 = LLVMVectorType(i8, 16); - params[SI_PARAM_CONST] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE); - params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST]; - - /* We assume at most 16 textures per program at the moment. - * This need probably need to be changed to support bindless textures */ - params[SI_PARAM_SAMPLER] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_STATES), CONST_ADDR_SPACE); - params[SI_PARAM_RESOURCE] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_VIEWS), CONST_ADDR_SPACE); + params[SI_PARAM_CONST] = const_array(v16i8, NUM_CONST_BUFFERS); + params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, 6); /* XXX hardcoded */ + params[SI_PARAM_SAMPLER] = const_array(v4i32, NUM_SAMPLER_STATES); + params[SI_PARAM_RESOURCE] = const_array(v8i32, NUM_SAMPLER_VIEWS); switch (si_shader_ctx-type) { case TGSI_PROCESSOR_VERTEX: - params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; + params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, 16); /* XXX hardcoded */ params[SI_PARAM_START_INSTANCE] = i32; num_params = SI_PARAM_START_INSTANCE+1; if (shader-key.vs.as_es) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/5] configure.ac: require LLVM 3.4.2 for radeon
On Tue, Jul 08, 2014 at 03:37:02AM +0200, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com Needed by ARB_draw_indirect. I think we should come up with a rule for how long we should support older versions of LLVM. Do you have any thoughts about this? I was thinking we could have each Mesa release support current stable LLVM and also the development version from SVN. -Tom --- configure.ac | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 4646212..9d5cd89 100644 --- a/configure.ac +++ b/configure.ac @@ -1888,8 +1888,9 @@ radeon_llvm_check() { AC_MSG_ERROR([--enable-gallium-llvm is required when building $1]) fi LLVM_REQUIRED_VERSION_MAJOR=3 -LLVM_REQUIRED_VERSION_MINOR=3 -if test $LLVM_VERSION_INT -lt ${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}; then +LLVM_REQUIRED_VERSION_MINOR=4 +LLVM_REQUIRED_VERSION_PATCH=2 +if test ${LLVM_VERSION_INT}${LLVM_VERSION_PATCH} -lt ${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}${LLVM_REQUIRED_VERSION_PATCH}; then AC_MSG_ERROR([LLVM $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is required for $1]) fi if test true $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] radeonsi: switch descriptors to i32 vectors
On Tue, Jul 08, 2014 at 01:37:02AM +0200, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com This is a follow-up to the commit which adds texture fetches with offsets. Will this still work with LLVM 3.4.2 ? -Tom --- src/gallium/drivers/radeonsi/si_shader.c | 29 - 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3dd6ad1..a28d682 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1574,7 +1574,7 @@ static void tex_fetch_args( LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm-context); LLVMTypeRef v16i8 = LLVMVectorType(i8, 16); - /* Truncate v32i8 to v16i8. */ + /* Bitcast and truncate v8i32 to v16i8. */ LLVMValueRef res = si_shader_ctx-resources[sampler_index]; res = LLVMBuildBitCast(gallivm-builder, res, v2i128, ); res = LLVMBuildExtractElement(gallivm-builder, res, bld_base-uint_bld.zero, ); @@ -2305,12 +2305,18 @@ static void create_meta_data(struct si_shader_context *si_shader_ctx) si_shader_ctx-const_md = LLVMMDNodeInContext(gallivm-context, args, 3); } +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) +{ + return LLVMPointerType(LLVMArrayType(elem_type, num_elements), +CONST_ADDR_SPACE); +} + static void create_function(struct si_shader_context *si_shader_ctx) { struct lp_build_tgsi_context *bld_base = si_shader_ctx-radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base-base.gallivm; struct si_pipe_shader *shader = si_shader_ctx-shader; - LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32; + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32; unsigned i, last_sgpr, num_params; i8 = LLVMInt8TypeInContext(gallivm-context); @@ -2318,21 +2324,18 @@ static void create_function(struct si_shader_context *si_shader_ctx) f32 = LLVMFloatTypeInContext(gallivm-context); v2i32 = LLVMVectorType(i32, 2); v3i32 = LLVMVectorType(i32, 3); + v4i32 = LLVMVectorType(i32, 4); + v8i32 = LLVMVectorType(i32, 8); + v16i8 = LLVMVectorType(i8, 16); - params[SI_PARAM_CONST] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE); - params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST]; - - /* We assume at most 16 textures per program at the moment. - * This need probably need to be changed to support bindless textures */ - params[SI_PARAM_SAMPLER] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_STATES), CONST_ADDR_SPACE); - params[SI_PARAM_RESOURCE] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_VIEWS), CONST_ADDR_SPACE); + params[SI_PARAM_CONST] = const_array(v16i8, NUM_CONST_BUFFERS); + params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, 6); /* XXX hardcoded */ + params[SI_PARAM_SAMPLER] = const_array(v4i32, NUM_SAMPLER_STATES); + params[SI_PARAM_RESOURCE] = const_array(v8i32, NUM_SAMPLER_VIEWS); switch (si_shader_ctx-type) { case TGSI_PROCESSOR_VERTEX: - params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; + params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, 16); /* XXX hardcoded */ params[SI_PARAM_START_INSTANCE] = i32; num_params = SI_PARAM_START_INSTANCE+1; if (shader-key.vs.as_es) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] radeonsi: rename definitions of shader limits
On Tue, Jul 08, 2014 at 01:37:03AM +0200, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/radeonsi/si_blit.c| 2 +- src/gallium/drivers/radeonsi/si_descriptors.c | 12 +- src/gallium/drivers/radeonsi/si_pipe.c| 6 ++--- src/gallium/drivers/radeonsi/si_pipe.h| 4 +--- src/gallium/drivers/radeonsi/si_shader.c | 34 +-- src/gallium/drivers/radeonsi/si_state.c | 12 +- src/gallium/drivers/radeonsi/si_state.h | 31 +--- 7 files changed, 57 insertions(+), 44 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 8c3e136..072024a 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -76,7 +76,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) util_blitter_save_fragment_sampler_views(sctx-blitter, util_last_bit(sctx-samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask - ((1 NUM_TEX_UNITS) - 1)), + ((1 SI_NUM_USER_SAMPLERS) - 1)), sctx-samplers[PIPE_SHADER_FRAGMENT].views.views); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 38ad077..6ae9b82 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -289,7 +289,7 @@ static void si_init_sampler_views(struct si_context *sctx, si_init_descriptors(sctx, views-desc, si_get_shader_user_data_base(shader) + SI_SGPR_RESOURCE * 4, - 8, NUM_SAMPLER_VIEWS, si_emit_sampler_views); + 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views); } static void si_release_sampler_views(struct si_sampler_views *views) @@ -643,7 +643,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, /* Set the shader resources.*/ for (i = 0; i num_targets; i++) { - bufidx = SI_RW_SO + i; + bufidx = SI_SO_BUF_OFFSET + i; if (targets[i]) { struct pipe_resource *buffer = targets[i]-buffer; @@ -677,7 +677,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, buffers-desc.dirty_mask |= 1 bufidx; } for (; i old_num_targets; i++) { - bufidx = SI_RW_SO + i; + bufidx = SI_SO_BUF_OFFSET + i; /* Clear the descriptor and unset the resource. */ memset(buffers-desc_data[bufidx], 0, sizeof(uint32_t) * 4); pipe_resource_reference(buffers-buffers[bufidx], NULL); @@ -755,7 +755,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource buffers-desc.dirty_mask |= 1 i; found = true; - if (i = SI_RW_SO shader == PIPE_SHADER_VERTEX) { + if (i = SI_SO_BUF_OFFSET shader == PIPE_SHADER_VERTEX) { /* Update the streamout state. */ if (sctx-b.streamout.begin_emitted) { r600_emit_streamout_end(sctx-b); @@ -977,11 +977,11 @@ void si_init_all_descriptors(struct si_context *sctx) for (i = 0; i SI_NUM_SHADERS; i++) { si_init_buffer_resources(sctx, sctx-const_buffers[i], - NUM_CONST_BUFFERS, i, SI_SGPR_CONST, + SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); si_init_buffer_resources(sctx, sctx-rw_buffers[i], i == PIPE_SHADER_VERTEX ? - SI_RW_SO + 4 : SI_RW_SO, + SI_NUM_RW_BUFFERS : SI_NUM_RING_BUFFERS, i, SI_SGPR_RW_BUFFERS, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 184235d..0f99e44 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -146,7 +146,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * sctx-null_const_buf.buffer_size = sctx-null_const_buf.buffer-width0; for (shader = 0; shader SI_NUM_SHADERS; shader++) { - for (i = 0; i NUM_CONST_BUFFERS; i
Re: [Mesa-dev] [PATCH] r600g/compute: Try to use a temporary resource when growing the pool
On Mon, Jul 07, 2014 at 05:50:05PM +0200, Bruno Jiménez wrote: Now, before moving everything to host memory, we try to create a new resource to use as a pool. I we succeed we just use this resource and delete the previous one. If we fail we fallback to using the shadow. This should make growing the pool faster, and we can also save 64KB of memory that were allocated for the 'shadow', even if they weren't used. Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/r600/compute_memory_pool.c | 61 ++ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index fe19d9e..db6d937 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -73,10 +73,6 @@ static void compute_memory_pool_init(struct compute_memory_pool * pool, COMPUTE_DBG(pool-screen, * compute_memory_pool_init() initial_size_in_dw = %ld\n, initial_size_in_dw); - pool-shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4); - if (pool-shadow == NULL) - return; - pool-size_in_dw = initial_size_in_dw; pool-bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool-screen, pool-size_in_dw * 4); @@ -184,27 +180,56 @@ int compute_memory_grow_pool(struct compute_memory_pool* pool, if (!pool-bo) { compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16)); - if (pool-shadow == NULL) - return -1; } else { + struct r600_resource *temp = NULL; + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT); COMPUTE_DBG(pool-screen, Aligned size = %d (%d bytes)\n, new_size_in_dw, new_size_in_dw * 4); - compute_memory_shadow(pool, pipe, 1); - pool-shadow = realloc(pool-shadow, new_size_in_dw*4); - if (pool-shadow == NULL) - return -1; + temp = (struct r600_resource *)r600_compute_buffer_alloc_vram( + pool-screen, new_size_in_dw * 4); - pool-size_in_dw = new_size_in_dw; - pool-screen-b.b.resource_destroy( - (struct pipe_screen *)pool-screen, - (struct pipe_resource *)pool-bo); - pool-bo = (struct r600_resource*)r600_compute_buffer_alloc_vram( - pool-screen, - pool-size_in_dw * 4); - compute_memory_shadow(pool, pipe, 0); + if (temp != NULL) { + struct r600_context *rctx = (struct r600_context *)pipe; + struct pipe_resource *src = (struct pipe_resource *)pool-bo; + struct pipe_resource *dst = (struct pipe_resource *)temp; + struct pipe_box box; + + COMPUTE_DBG(pool-screen, Growing the pool using a temporary resource\n); + + u_box_1d(0, pool-size_in_dw * 4, box); + + rctx-b.b.resource_copy_region(pipe, + dst, 0, 0, 0 ,0, + src, 0, box); + + pool-screen-b.b.resource_destroy( + (struct pipe_screen *)pool-screen, + src); + + pool-bo = temp; + pool-size_in_dw = new_size_in_dw; + } + else { + COMPUTE_DBG(pool-screen, The creation of the temporary resource failed\n + Falling back to using 'shadow'\n); + + compute_memory_shadow(pool, pipe, 1); + pool-shadow = realloc(pool-shadow, new_size_in_dw * 4); + if (pool-shadow == NULL) + return -1; + + pool-size_in_dw = new_size_in_dw; + pool-screen-b.b.resource_destroy( + (struct pipe_screen *)pool-screen, + (struct pipe_resource *)pool-bo); + pool-bo = (struct r600_resource*)r600_compute_buffer_alloc_vram( + pool-screen, + pool-size_in_dw * 4); + compute_memory_shadow(pool, pipe, 0); + } } return 0; -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] What are some good beginner's tasks for Mesa?
On Thu, Jul 03, 2014 at 10:33:41PM -0500, Darius Goad wrote: Hello. I'm trying to get my feet wet with Mesa, and I was wondering what some good tasks for me would be. Thanks again. What hardware do you have? What are you interested in working on? -Tom - Darius Goad ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2
On Fri, Jul 04, 2014 at 12:28:05PM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Fri, Jul 04, 2014 at 12:28:20AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). --- src/gallium/state_trackers/clover/api/device.cpp | 6 +++--- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp| 5 - src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 7006702..1176668 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc6b761..6bf33e0 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -193,6 +193,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 16831ab..025c648 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -66,6 +66,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; This define belongs in the target-specific part of libclc. With this hunk removed this patch is: The declarations for double functions in the libclc headers are wrapped in this macro, so we need to set it here in order to be able to use them from clover. This abuses the ::build_opts() accessor to that end, which is only supposed to return the compiler
Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2
On Fri, Jul 04, 2014 at 05:25:42PM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Fri, Jul 04, 2014 at 12:28:05PM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Fri, Jul 04, 2014 at 12:28:20AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). --- src/gallium/state_trackers/clover/api/device.cpp | 6 +++--- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp| 5 - src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 7006702..1176668 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc6b761..6bf33e0 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -193,6 +193,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 16831ab..025c648 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -66,6 +66,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; This define belongs in the target-specific part of libclc. With this hunk removed this patch is: The declarations for double
Re: [Mesa-dev] [PATCH 1/2] R600/SI: fix shadow mapping for 1D and 2D array textures
On Thu, Jul 03, 2014 at 06:26:04PM +0200, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com Reviewed-by: Tom Stellard thomas.stell...@amd.com It was conflicting with def TEX_SHADOW_ARRAY, which also handles them. --- lib/Target/R600/R600Instructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 73fa345..704507d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -216,7 +216,7 @@ class R600_REDUCTION bits11 inst, dag ins, string asm, listdag pattern, def TEX_SHADOW : PatLeaf (imm), [{uint32_t TType = (uint32_t)N-getZExtValue(); -return (TType = 6 TType = 8) || (TType = 11 TType = 13); +return (TType = 6 TType = 8) || TType == 13; }] ; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2
On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). --- src/gallium/state_trackers/clover/api/device.cpp | 6 +++--- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp| 5 - src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 7006702..1176668 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc6b761..6bf33e0 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -193,6 +193,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 16831ab..025c648 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -66,6 +66,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; This define belongs in the target-specific part of libclc. With this hunk removed this patch is: The declarations for double functions in the libclc headers are wrapped in this macro, so we need to set it here in order to be able to use them from clover. This abuses the ::build_opts() accessor to that end, which is only supposed to return the compiler options that were specified by the user at build time, as required by the CL_PROGRAM_BUILD_OPTIONS build param. You are right, I can fix that. IMO preprocessor macros defined by the spec belong in the standard library. We probably need a specialization of libclc's header files for each triple (I hadn't noticed you didn't have one already -- it will probably be useful for other reasons too), as you have target-specific specializations of the LLVM bitcode
Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands
On Wed, Jul 02, 2014 at 04:34:24PM -0500, Aaron Watry wrote: Previously, we were assuming that kernel metadata nodes only had 1 operand. Kernels which have attributes can have more than 1, e.g.: !0 = metadata !{void (i32 addrspace(1)*)* @testKernel, metadata !1} !1 = metadata !{metadata !work_group_size_hint, i32 4, i32 1, i32 1} Attempting to get the kernel without the correct number of attributes led to memory corruption and luxrays crashing out. Fixes the cl/program/execute/attributes.cl piglit test. Thanks for tracking this down. Reviewed-by: Tom Stellard thomas.stell...@amd.com Signed-off-by: Aaron Watry awa...@gmail.com CC: Tom Stellard thomas.stell...@amd.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223 --- src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 2ace91f..ec11559 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -100,13 +100,17 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef)); LLVMGetNamedMetadataOperands(mod, opencl.kernels, kernel_metadata); for (i = 0; i num_kernels; i++) { - LLVMValueRef kernel_signature, kernel_function; + LLVMValueRef kernel_signature, *kernel_function; + unsigned num_kernel_md_operands; if (i == index) { continue; } kernel_signature = kernel_metadata[i]; - LLVMGetMDNodeOperands(kernel_signature, kernel_function); - LLVMDeleteFunction(kernel_function); + num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature); + kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef)); + LLVMGetMDNodeOperands(kernel_signature, kernel_function); + LLVMDeleteFunction(*kernel_function); + FREE(kernel_function); } FREE(kernel_metadata); radeon_llvm_optimize(mod); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands
On Thu, Jul 03, 2014 at 10:56:24AM -0400, Tom Stellard wrote: On Wed, Jul 02, 2014 at 04:34:24PM -0500, Aaron Watry wrote: Previously, we were assuming that kernel metadata nodes only had 1 operand. Kernels which have attributes can have more than 1, e.g.: !0 = metadata !{void (i32 addrspace(1)*)* @testKernel, metadata !1} !1 = metadata !{metadata !work_group_size_hint, i32 4, i32 1, i32 1} Attempting to get the kernel without the correct number of attributes led to memory corruption and luxrays crashing out. Fixes the cl/program/execute/attributes.cl piglit test. Thanks for tracking this down. Reviewed-by: Tom Stellard thomas.stell...@amd.com I forgot to mention we should CC stable on this patch. Signed-off-by: Aaron Watry awa...@gmail.com CC: Tom Stellard thomas.stell...@amd.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223 --- src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 2ace91f..ec11559 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -100,13 +100,17 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef)); LLVMGetNamedMetadataOperands(mod, opencl.kernels, kernel_metadata); for (i = 0; i num_kernels; i++) { - LLVMValueRef kernel_signature, kernel_function; + LLVMValueRef kernel_signature, *kernel_function; + unsigned num_kernel_md_operands; if (i == index) { continue; } kernel_signature = kernel_metadata[i]; - LLVMGetMDNodeOperands(kernel_signature, kernel_function); - LLVMDeleteFunction(kernel_function); + num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature); + kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef)); + LLVMGetMDNodeOperands(kernel_signature, kernel_function); + LLVMDeleteFunction(*kernel_function); + FREE(kernel_function); } FREE(kernel_metadata); radeon_llvm_optimize(mod); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands\
On Thu, Jul 03, 2014 at 11:59:00AM -0400, Alex Deucher wrote: On Thu, Jul 3, 2014 at 11:46 AM, Aaron Watry awa...@gmail.com wrote: On Thu, Jul 3, 2014 at 9:56 AM, Tom Stellard t...@stellard.net wrote: On Wed, Jul 02, 2014 at 04:34:24PM -0500, Aaron Watry wrote: Previously, we were assuming that kernel metadata nodes only had 1 operand. Kernels which have attributes can have more than 1, e.g.: !0 = metadata !{void (i32 addrspace(1)*)* @testKernel, metadata !1} !1 = metadata !{metadata !work_group_size_hint, i32 4, i32 1, i32 1} Attempting to get the kernel without the correct number of attributes led to memory corruption and luxrays crashing out. Fixes the cl/program/execute/attributes.cl piglit test. Thanks for tracking this down. no problem. It was driving me nuts. I've now got the luxmark kernels building successfully on evergreen (followed by a machine hang and loss of signal to the monitor, but that could be the kernel or the fact that CEDAR seems extra crashy compared to my other EG/NI cards) Someone mentioned stability issues with cedar with the golden register kernel patch. Can you see if skipping the golden register setup helps? If so can you narrow down which registers are problematic? Another possibility is that we aren't correctly implementing the workaround for the control flow stack hw bug on Cedar. Since it has a different wavefront size than other GPUs the bug is not handled the same way. You could try using FeatureWavefrontSize64 for cedar in Processors.td, which would force the backend to use the same work-around on cedar as it does for other GPUs. -Tom Alex and I'm getting an instruction selection error on radeonsi. Haven't managed to track that down yet, but at least it means that all required built-ins/defines for luxrays are now present (at least with my own libclc tree), at least with image support disabled in luxrays. If you enable image support, I believe that it is still going to fail due to mismatches/oddness with the number of supported pixel formats. --Aaron Reviewed-by: Tom Stellard thomas.stell...@amd.com Signed-off-by: Aaron Watry awa...@gmail.com CC: Tom Stellard thomas.stell...@amd.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223 --- src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 2ace91f..ec11559 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -100,13 +100,17 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef)); LLVMGetNamedMetadataOperands(mod, opencl.kernels, kernel_metadata); for (i = 0; i num_kernels; i++) { - LLVMValueRef kernel_signature, kernel_function; + LLVMValueRef kernel_signature, *kernel_function; + unsigned num_kernel_md_operands; if (i == index) { continue; } kernel_signature = kernel_metadata[i]; - LLVMGetMDNodeOperands(kernel_signature, kernel_function); - LLVMDeleteFunction(kernel_function); + num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature); + kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef)); + LLVMGetMDNodeOperands(kernel_signature, kernel_function); + LLVMDeleteFunction(*kernel_function); + FREE(kernel_function); } FREE(kernel_metadata); radeon_llvm_optimize(mod); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands
On Thu, Jul 03, 2014 at 11:55:25AM -0500, Aaron Watry wrote: On Thu, Jul 3, 2014 at 10:59 AM, Alex Deucher alexdeuc...@gmail.com wrote: Someone mentioned stability issues with cedar with the golden register kernel patch. Can you see if skipping the golden register setup helps? If so can you narrow down which registers are problematic? I'll give it a shot and see if it helps. I can reliably break the machine currently with luxrays' slg4 program, and the gpu doesn't recover after 10-sec as it should. Can you file a bug for this and post the output of R600_DEBUG=cs for this program? If I look at it I should be able to get an idea of what might be causing the lockup. -Tom I tried Tom's wavefront-size workaround with no apparent change. It's possible that we're messing something up in the instruction selection/lowering, but I'll give the kernel change a try first. --Aaron Alex and I'm getting an instruction selection error on radeonsi. Haven't managed to track that down yet, but at least it means that all required built-ins/defines for luxrays are now present (at least with my own libclc tree), at least with image support disabled in luxrays. If you enable image support, I believe that it is still going to fail due to mismatches/oddness with the number of supported pixel formats. --Aaron Reviewed-by: Tom Stellard thomas.stell...@amd.com Signed-off-by: Aaron Watry awa...@gmail.com CC: Tom Stellard thomas.stell...@amd.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223 --- src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 2ace91f..ec11559 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -100,13 +100,17 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef)); LLVMGetNamedMetadataOperands(mod, opencl.kernels, kernel_metadata); for (i = 0; i num_kernels; i++) { - LLVMValueRef kernel_signature, kernel_function; + LLVMValueRef kernel_signature, *kernel_function; + unsigned num_kernel_md_operands; if (i == index) { continue; } kernel_signature = kernel_metadata[i]; - LLVMGetMDNodeOperands(kernel_signature, kernel_function); - LLVMDeleteFunction(kernel_function); + num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature); + kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef)); + LLVMGetMDNodeOperands(kernel_signature, kernel_function); + LLVMDeleteFunction(*kernel_function); + FREE(kernel_function); } FREE(kernel_metadata); radeon_llvm_optimize(mod); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2
On Fri, Jul 04, 2014 at 12:28:20AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote: Tom Stellard t...@stellard.net writes: On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). --- src/gallium/state_trackers/clover/api/device.cpp | 6 +++--- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp| 5 - src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 7006702..1176668 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc6b761..6bf33e0 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -193,6 +193,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 16831ab..025c648 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -66,6 +66,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; This define belongs in the target-specific part of libclc. With this hunk removed this patch is: The declarations for double functions in the libclc headers are wrapped in this macro, so we need to set it here in order to be able to use them from clover. This abuses the ::build_opts() accessor to that end, which is only supposed to return the compiler options that were specified by the user at build time, as required by the CL_PROGRAM_BUILD_OPTIONS build param. You are right, I can fix that. IMO preprocessor macros defined by the spec belong in the standard library. We
Re: [Mesa-dev] [PATCH 3/9] gallium: add PIPE_BIND_COMMAND_ARGS_BUFFER
On Sat, Jun 28, 2014 at 01:12:49PM +0200, Marek Olšák wrote: The one that increases the number of input SGPRs to 22 (16 user + 6 streamout) in the calling convention. I don't remember the name. Please git blame on the calling convention. I merged this patch into LLVM 3.4.2, which was released last week, so you don't have to wait until LLVM 3.5. -Tom Marek On Fri, Jun 27, 2014 at 5:26 PM, Tom Stellard t...@stellard.net wrote: On Tue, Jun 17, 2014 at 01:51:10AM +0200, Marek Olšák wrote: Since LLVM 3.5 will be released in August and my radeon patches adding Which LLVM patches are required for ARB_draw_indirect? -Tom ARB_draw_indirect depend on it, I will commit ARB_draw_indirect support for Gallium with softpipe and llvmpipe support earlier. My plan is for patches 3,4,5,6 to get committed in one week from now, or sooner if somebody reviews them. Marek On Sat, Apr 26, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote: From: Christoph Bumiller e0425...@student.tuwien.ac.at Intended for use with GL_ARB_draw_indirect's DRAW_INDIRECT_BUFFER target or for D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS. --- src/gallium/docs/source/screen.rst | 3 +++ src/gallium/include/pipe/p_defines.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 89cbdbf..65885b9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -354,6 +354,9 @@ resources might be created and handled quite differently. bound to the graphics pipeline as a shader resource. * ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be bound to the compute program as a shader resource. +* ``PIPE_BIND_COMMAND_ARGS_BUFFER``: A buffer that may be sourced by the + GPU command processor. It can contain, for example, the arguments to + indirect draw calls. .. _pipe_usage: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a3a1ae1..4d5d833 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -350,6 +350,7 @@ enum pipe_flush_flags { #define PIPE_BIND_GLOBAL (1 18) /* set_global_binding */ #define PIPE_BIND_SHADER_RESOURCE (1 19) /* set_shader_resources */ #define PIPE_BIND_COMPUTE_RESOURCE (1 20) /* set_compute_resources */ +#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 21) /* pipe_draw_info.indirect */ /* The first two flags above were previously part of the amorphous * TEXTURE_USAGE, most of which are now descriptions of the ways a -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/llvm: Use the llvm.rsq.clamped intrinsic for RSQ
On Wed, Jul 02, 2014 at 06:29:25PM +0200, Laurent Carlier wrote: Le mercredi 25 juin 2014, 11:58:47 Michel Dänzer a écrit : On 25.06.2014 09:15, Tom Stellard wrote: https://bugs.freedesktop.org/show_bug.cgi?id=80015 CC: 10.1 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 217fa32..119e613 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1385,7 +1385,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem; #if HAVE_LLVM = 0x0305 - bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.; + bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.clamped.f32; #else bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq; #endif Reviewed-and-Tested-by: Michel Dänzer michel.daen...@amd.com Tested-by: Laurent Carlier lordhea...@gmail.com Anyone to commit it? I've committed it. Thanks for testing. -Tom -- Laurent Carlier http://www.archlinux.org ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2
On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). --- src/gallium/state_trackers/clover/api/device.cpp | 6 +++--- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp| 5 - src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 7006702..1176668 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc6b761..6bf33e0 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -193,6 +193,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 16831ab..025c648 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -66,6 +66,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; This define belongs in the target-specific part of libclc. With this hunk removed this patch is: The declarations for double functions in the libclc headers are wrapped in this macro, so we need to set it here in order to be able to use them from clover. -Tom Reviewed-by: Francisco Jerez curroje...@riseup.net } std::string diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 5d2efc4..f2b4fd9 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -183,7 +183,6 @@ namespace { // clc.h requires that this macro be defined: c.getPreprocessorOpts().addMacroDef(cl_clang_storage_class_specifiers); - c.getPreprocessorOpts().addMacroDef(cl_khr_fp64); c.getLangOpts().NoBuiltin = true; c.getTargetOpts().Triple = triple; -- 1.8.1.5 ___ mesa-dev mailing list mesa
[Mesa-dev] [PATCH 2/2] clover: Enable cl_khr_fp64 for devices that support doubles v3
v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). v3: - Return 0 for device::doubled_fp_confg() when doubles aren't supported. --- src/gallium/state_trackers/clover/api/device.cpp| 10 +++--- src/gallium/state_trackers/clover/core/device.cpp | 21 + src/gallium/state_trackers/clover/core/device.hpp | 2 ++ src/gallium/state_trackers/clover/core/program.cpp | 5 - .../state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 3b91e9e..5427492 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -204,6 +204,10 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, buf.as_scalarcl_device_fp_config() = dev.single_fp_config(); break; + case CL_DEVICE_DOUBLE_FP_CONFIG: + buf.as_scalarcl_device_fp_config() = dev.double_fp_config(); + break; + case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: buf.as_scalarcl_device_mem_cache_type() = CL_NONE; break; @@ -282,7 +286,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -314,7 +318,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 498e7d9..cfbe95a 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -175,6 +175,27 @@ device::single_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +cl_device_fp_config +device::double_fp_config() const { + if (!has_doubles()) + return 0; + + // TODO: Get these from somewhere. This is the mandated minimum double + // precision floating-point capability + return CL_FP_FMA + | CL_FP_ROUND_TO_NEAREST + | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF + | CL_FP_INF_NAN + | CL_FP_DENORM; +} + +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 93f9091..a1899bc 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -64,6 +64,8 @@ namespace clover { cl_uint max_clock_frequency() const; cl_uint max_compute_units() const; cl_device_fp_config single_fp_config() const; + cl_device_fp_config double_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; } std::string diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 5d2efc4..f2b4fd9 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -183,7 +183,6 @@ namespace { // clc.h requires that this macro be defined:
[Mesa-dev] [PATCH 1/2] clover: Report a default value for CL_DEVICE_SINGLE_FP_CONFIG
--- src/gallium/state_trackers/clover/api/device.cpp | 3 +-- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 97b2cf9..3b91e9e 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -201,8 +201,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_SINGLE_FP_CONFIG: - buf.as_scalarcl_device_fp_config() = - CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; + buf.as_scalarcl_device_fp_config() = dev.single_fp_config(); break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index b6078db..498e7d9 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -169,6 +169,12 @@ device::max_compute_units() const { PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0]; } +cl_device_fp_config +device::single_fp_config() const { + // TODO: Get these from somewhere. + return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 731c31e..93f9091 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -63,6 +63,7 @@ namespace clover { cl_ulong max_mem_alloc_size() const; cl_uint max_clock_frequency() const; cl_uint max_compute_units() const; + cl_device_fp_config single_fp_config() const; std::vectorsize_t max_block_size() const; std::string device_name() const; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] gallium: add PIPE_BIND_COMMAND_ARGS_BUFFER
On Tue, Jun 17, 2014 at 01:51:10AM +0200, Marek Olšák wrote: Since LLVM 3.5 will be released in August and my radeon patches adding Which LLVM patches are required for ARB_draw_indirect? -Tom ARB_draw_indirect depend on it, I will commit ARB_draw_indirect support for Gallium with softpipe and llvmpipe support earlier. My plan is for patches 3,4,5,6 to get committed in one week from now, or sooner if somebody reviews them. Marek On Sat, Apr 26, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote: From: Christoph Bumiller e0425...@student.tuwien.ac.at Intended for use with GL_ARB_draw_indirect's DRAW_INDIRECT_BUFFER target or for D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS. --- src/gallium/docs/source/screen.rst | 3 +++ src/gallium/include/pipe/p_defines.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 89cbdbf..65885b9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -354,6 +354,9 @@ resources might be created and handled quite differently. bound to the graphics pipeline as a shader resource. * ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be bound to the compute program as a shader resource. +* ``PIPE_BIND_COMMAND_ARGS_BUFFER``: A buffer that may be sourced by the + GPU command processor. It can contain, for example, the arguments to + indirect draw calls. .. _pipe_usage: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a3a1ae1..4d5d833 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -350,6 +350,7 @@ enum pipe_flush_flags { #define PIPE_BIND_GLOBAL (1 18) /* set_global_binding */ #define PIPE_BIND_SHADER_RESOURCE (1 19) /* set_shader_resources */ #define PIPE_BIND_COMPUTE_RESOURCE (1 20) /* set_compute_resources */ +#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 21) /* pipe_draw_info.indirect */ /* The first two flags above were previously part of the amorphous * TEXTURE_USAGE, most of which are now descriptions of the ways a -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2
v2: - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE. - Only define cl_khr_fp64 if the extension is supported. - Remove trailing space from extension string. - Rename device query function from cl_khr_fp86() to has_doubles(). --- src/gallium/state_trackers/clover/api/device.cpp | 6 +++--- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp| 5 - src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 - 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 7006702..1176668 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - buf.as_scalarcl_uint() = 2; + buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc6b761..6bf33e0 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -193,6 +193,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::has_doubles() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 16831ab..025c648 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -66,6 +66,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool has_doubles() const; std::vectorsize_t max_block_size() const; std::string device_name() const; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..f65f321 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -95,7 +95,10 @@ program::build_status(const device dev) const { std::string program::build_opts(const device dev) const { - return _opts.count(dev) ? _opts.find(dev)-second : ; + std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ; + if (dev.has_doubles()) + opts.append( -Dcl_khr_fp64); + return opts; } std::string diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 5d2efc4..f2b4fd9 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -183,7 +183,6 @@ namespace { // clc.h requires that this macro be defined: c.getPreprocessorOpts().addMacroDef(cl_clang_storage_class_specifiers); - c.getPreprocessorOpts().addMacroDef(cl_khr_fp64); c.getLangOpts().NoBuiltin = true; c.getTargetOpts().Triple = triple; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] clover: Have compat::string allocate its own memory.
From: Francisco Jerez curroje...@riseup.net --- src/gallium/state_trackers/clover/api/kernel.cpp | 4 +++- src/gallium/state_trackers/clover/util/compat.hpp | 8 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp index 96cf302..05cc392 100644 --- a/src/gallium/state_trackers/clover/api/kernel.cpp +++ b/src/gallium/state_trackers/clover/api/kernel.cpp @@ -58,7 +58,9 @@ clCreateKernelsInProgram(cl_program d_prog, cl_uint count, if (rd_kerns) copy(map([](const module::symbol sym) { - return desc(new kernel(prog, compat::string(sym.name), + return desc(new kernel(prog, + std::string(sym.name.begin(), + sym.name.end()), range(sym.args))); }, syms), rd_kerns); diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index e68d9df..28601e8 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -72,7 +72,7 @@ namespace clover { vector(const vector v) : p(alloc(v.n, v.p, v.n)), n(v.n) { } - vector(iterator p, size_type n) : p(alloc(n, p, n)), n(n) { + vector(const_iterator p, size_type n) : p(alloc(n, p, n)), n(n) { } templatetypename C @@ -263,13 +263,13 @@ namespace clover { size_t offset; }; - class string : public vector_refconst char { + class string : public vectorchar { public: - string(const char *p) : vector_ref(p, std::strlen(p)) { + string(const char *p) : vector(p, std::strlen(p)) { } templatetypename C - string(const C v) : vector_ref(v) { + string(const C v) : vector(v) { } operator std::string() const { -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
This is for reporting whether or not double precision floating-point operations are supported. Reviewed-by: Francisco Jerez curroje...@riseup.net --- src/gallium/auxiliary/gallivm/lp_bld_limits.h | 2 ++ src/gallium/auxiliary/tgsi/tgsi_exec.h| 2 ++ src/gallium/docs/source/screen.rst| 2 ++ src/gallium/drivers/radeonsi/si_pipe.c| 5 + src/gallium/include/pipe/p_defines.h | 3 ++- 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 6cb0949..9ccaf46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -126,6 +126,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param) return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_DOUBLES: + return 0; } /* if we get here, we missed a shader cap above (and should have seen * a compiler warning.) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index d53c4ba..56a7034 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -456,6 +456,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_DOUBLES: + return 0; } /* if we get here, we missed a shader cap above (and should have seen * a compiler warning.) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 1a80b04..9522090 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -282,6 +282,8 @@ to be 0. program. It should be one of the ``pipe_shader_ir`` enum values. * ``PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS``: The maximum number of texture sampler views. Must not be lower than PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS. +* ``PIPE_SHADER_CAP_DOUBLES``: Whether double precision floating-point + operations are supported. .. _pipe_compute_cap: diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 07f4291..6263c70 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -325,6 +325,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_LLVM; + case PIPE_SHADER_CAP_DOUBLES: + return 0; /* XXX: Enable doubles once the compiler can +handle them. */ default: return 0; } @@ -376,6 +379,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_DOUBLES: + return 0; } return 0; } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 90f6493..9a9963d 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -617,7 +617,8 @@ enum pipe_shader_cap PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS, PIPE_SHADER_CAP_PREFERRED_IR, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED, - PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS + PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS, + PIPE_SHADER_CAP_DOUBLES }; /** -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] clover: Report default values for half and double fp configs v2
From: Matt Arsenault arse...@gmail.com v2: -Fix indentation --- src/gallium/state_trackers/clover/api/device.cpp | 11 +-- src/gallium/state_trackers/clover/core/device.cpp | 24 +++ src/gallium/state_trackers/clover/core/device.hpp | 3 +++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 97b2cf9..7006702 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -201,8 +201,15 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_SINGLE_FP_CONFIG: - buf.as_scalarcl_device_fp_config() = - CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; + buf.as_scalarcl_device_fp_config() = dev.single_fp_config(); + break; + + case CL_DEVICE_DOUBLE_FP_CONFIG: + buf.as_scalarcl_device_fp_config() = dev.double_fp_config(); + break; + + case CL_DEVICE_HALF_FP_CONFIG: + buf.as_scalarcl_device_fp_config() = dev.half_fp_config(); break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index b6078db..bc6b761 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -169,6 +169,30 @@ device::max_compute_units() const { PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0]; } +cl_device_fp_config +device::single_fp_config() const { + // TODO: Get these from somewhere. + return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; +} + +cl_device_fp_config +device::double_fp_config() const { + // TODO: Get these from somewhere. This is the mandated minimum double + // precision floating-point capability + return CL_FP_FMA + | CL_FP_ROUND_TO_NEAREST + | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF + | CL_FP_INF_NAN + | CL_FP_DENORM; +} + +cl_device_fp_config +device::half_fp_config() const { + // TODO: Get these from somewhere. + return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 731c31e..16831ab 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -63,6 +63,9 @@ namespace clover { cl_ulong max_mem_alloc_size() const; cl_uint max_clock_frequency() const; cl_uint max_compute_units() const; + cl_device_fp_config single_fp_config() const; + cl_device_fp_config double_fp_config() const; + cl_device_fp_config half_fp_config() const; std::vectorsize_t max_block_size() const; std::string device_name() const; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] clover: Fix not setting build log if the build succeeds v2
From: Matt Arsenault arse...@gmail.com If there were only warnings, they would not be added to the log. Also fixes valgrind use after free errors. v2: - Use compat::string. --- src/gallium/state_trackers/clover/core/compiler.hpp | 3 ++- src/gallium/state_trackers/clover/core/error.hpp | 4 ++-- src/gallium/state_trackers/clover/core/program.cpp| 11 +++ src/gallium/state_trackers/clover/llvm/invocation.cpp | 16 ++-- src/gallium/state_trackers/clover/util/compat.hpp | 3 +++ 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 49cd022..6ef84d1 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -32,7 +32,8 @@ namespace clover { module compile_program_llvm(const compat::string source, pipe_shader_ir ir, const compat::string target, - const compat::string opts); + const compat::string opts, + compat::string r_log); module compile_program_tgsi(const compat::string source); } diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 28459f3..cecbe9b 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -66,8 +66,8 @@ namespace clover { class build_error : public error { public: - build_error(const compat::string log) : - error(CL_BUILD_PROGRAM_FAILURE, log) { + build_error(const compat::string what = ) : + error(CL_BUILD_PROGRAM_FAILURE, what) { } }; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 3aaa652..e09c3aa 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -52,15 +52,18 @@ program::build(const ref_vectordevice devs, const char *opts) { _opts.insert({ dev, opts }); + compat::string log; + try { auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ? compile_program_tgsi(_source) : compile_program_llvm(_source, dev.ir_format(), -dev.ir_target(), build_opts(dev))); +dev.ir_target(), build_opts(dev), +log)); _binaries.insert({ dev, module }); - - } catch (build_error e) { -_logs.insert({ dev, e.what() }); +_logs.insert({ dev, std::string(log.c_str()) }); + } catch (const build_error ) { +_logs.insert({ dev, std::string(log.c_str()) }); throw; } } diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index c3daa81..5d2efc4 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -120,7 +120,7 @@ namespace { compile(llvm::LLVMContext llvm_ctx, const std::string source, const std::string name, const std::string triple, const std::string processor, const std::string opts, - clang::LangAS::Map address_spaces) { + clang::LangAS::Map address_spaces, compat::string r_log) { clang::CompilerInstance c; clang::EmitLLVMOnlyAction act(llvm_ctx); @@ -224,11 +224,14 @@ namespace { c.getCodeGenOpts().LinkBitcodeFile = libclc_path; // Compile the code - if (!c.ExecuteAction(act)) - throw build_error(log); + bool ExecSuccess = c.ExecuteAction(act); + r_log = log; + + if (!ExecSuccess) + throw build_error(); // Get address spaces map to be able to find kernel argument address space - memcpy(address_spaces, c.getTarget().getAddressSpaceMap(), + memcpy(address_spaces, c.getTarget().getAddressSpaceMap(), sizeof(address_spaces)); return act.takeModule(); @@ -391,7 +394,8 @@ module clover::compile_program_llvm(const compat::string source, enum pipe_shader_ir ir, const compat::string target, - const compat::string opts) { + const compat::string opts, + compat::string r_log) { std::vectorllvm::Function * kernels; size_t processor_str_len = std::string(target.begin()).find_first_of(-); @@ -405,7 +409,7 @@ clover::compile_program_llvm(const compat::string source, // The input file
Re: [Mesa-dev] [PATCH 1/1] r600: Fix use after free in compute_memory_promote_item.
On Tue, Jun 24, 2014 at 08:39:28AM -0400, Jan Vesely wrote: On Mon, 2014-06-23 at 17:15 +0200, Bruno Jimenez wrote: On Mon, 2014-06-23 at 10:39 -0400, Jan Vesely wrote: The dst pointer needs to be initialized after any calls to compute_memory_grow_pool, as the function might change the pool-vbo pointer. You are completely right. Good catch. Reviewed-by: Bruno Jiménez brunoji...@gmail.com Thanks for review, now we just need a volunteer to push this... I just pushed this patch, thanks! -Tom Sorry for any inconvenience this may have caused. no worries, none caused This fixes crashes and assertion failures in two gegl tests. Signed-off-by: Jan Vesely jan.ves...@rutgers.edu CC: Bruno Jimenez brunoji...@gmail.com CC: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/r600/compute_memory_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index a78ff1e..9cb16f8 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -308,8 +308,8 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, { struct pipe_screen *screen = (struct pipe_screen *)pool-screen; struct r600_context *rctx = (struct r600_context *)pipe; - struct pipe_resource *dst = (struct pipe_resource *)pool-bo; struct pipe_resource *src = (struct pipe_resource *)item-real_buffer; + struct pipe_resource *dst = NULL; struct pipe_box box; struct list_head *pos; @@ -339,6 +339,7 @@ int compute_memory_promote_item(struct compute_memory_pool *pool, if (err == -1) return -1; } + dst = (struct pipe_resource *)pool-bo; COMPUTE_DBG(pool-screen, + Found space for Item %p id = %u start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n, item, item-id, start_in_dw, start_in_dw * 4, -- Jan Vesely jan.ves...@rutgers.edu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeon/llvm: Use the llvm.rsq.clamped intrinsic for RSQ
https://bugs.freedesktop.org/show_bug.cgi?id=80015 CC: 10.1 10.2 mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 217fa32..119e613 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1385,7 +1385,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem; #if HAVE_LLVM = 0x0305 - bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.; + bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.clamped.f32; #else bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq; #endif -- 1.9.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/5] clover: Fix not setting build log if the build succeeds.
On Sat, Jun 21, 2014 at 06:33:17PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: From: Matt Arsenault arse...@gmail.com If there were only warnings, they would not be added to the log. Also fixes valgrind use after free errors. --- src/gallium/state_trackers/clover/core/compiler.hpp | 3 ++- src/gallium/state_trackers/clover/core/error.hpp | 2 +- src/gallium/state_trackers/clover/core/program.cpp| 11 +++ src/gallium/state_trackers/clover/llvm/invocation.cpp | 14 +++--- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 49cd022..3ce132f 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -32,7 +32,8 @@ namespace clover { module compile_program_llvm(const compat::string source, pipe_shader_ir ir, const compat::string target, - const compat::string opts); + const compat::string opts, + std::string log_out); This doesn't work. I'm afraid you need to use compat::string on the compiler interface because the C++98 and C++11 versions of std::string are not guaranteed to be binary compatible. This mess will go away once we can drop support for the non-C++11 versions of LLVM. Have a look at the attached patch for the memory management issues with compat::string. Even with your patch, I'm still having trouble getting this to work. What is the correct pattern here? I know I need to use compat::string in the function signature, but what type should I pass to the compile_program_llvm() function from program::build()? A std::string a compat::string, something else? -Tom And maybe rename the output argument to r_log as is usual everywhere else in clover? module compile_program_tgsi(const compat::string source); } diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 28459f3..9802195 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -66,7 +66,7 @@ namespace clover { class build_error : public error { public: - build_error(const compat::string log) : + build_error(const compat::string log = ) : Can you rename the argument to what as it's no longer going to hold the compilation log? error(CL_BUILD_PROGRAM_FAILURE, log) { } }; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 3aaa652..91ee553 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -52,15 +52,18 @@ program::build(const ref_vectordevice devs, const char *opts) { _opts.insert({ dev, opts }); + std::string build_log; + try { auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ? compile_program_tgsi(_source) : compile_program_llvm(_source, dev.ir_format(), -dev.ir_target(), build_opts(dev))); +dev.ir_target(), build_opts(dev), +build_log)); _binaries.insert({ dev, module }); - - } catch (build_error e) { -_logs.insert({ dev, e.what() }); +_logs.insert({ dev, build_log }); + } catch (const build_error ) { +_logs.insert({ dev, build_log }); throw; } } diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 48810bd..0dc1f50 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -120,12 +120,11 @@ namespace { compile(llvm::LLVMContext llvm_ctx, const std::string source, const std::string name, const std::string triple, const std::string processor, const std::string opts, - clang::LangAS::Map address_spaces) { + clang::LangAS::Map address_spaces, std::string log_out) { clang::CompilerInstance c; clang::EmitLLVMOnlyAction act(llvm_ctx); - std::string log; - llvm::raw_string_ostream s_log(log); + llvm::raw_string_ostream s_log(log_out); std::string libclc_path = LIBCLC_LIBEXECDIR + processor + - + triple + .bc
Re: [Mesa-dev] [PATCH 00/11] [RFC v2] Solve the mapping bug
On Sun, Jun 22, 2014 at 04:05:49PM +0200, Francisco Jerez wrote: Bruno Jimenez brunoji...@gmail.com writes: On Sat, 2014-06-21 at 17:39 +0200, Francisco Jerez wrote: [...] The implementation of PIPE_TRANSFER_MAP_DIRECTLY introduced in PATCH 10 has somewhat worrying semantics: A mapping with this flag might become stale unpredictably if a kernel is run, maybe from a different command queue. Clover's transfer functions don't hit that path right now on single-threaded applications, but they might in the future as we start accelerating the APIs we currently implement with soft_copy_op(). This is a bug IMHO: even direct mappings should last until the corresponding unmap call. I think I'm not fully understanding you here. I tried to use PIPE_TRANSFER_MAP_DIRECTLY only with clEnqueue{Write,Read} functions, which map the resource, copy it and unmap it when finished. Is it possible for another kernel to access the memory of a buffer that is being read/written? AFAICT, yes. All command queues created on the same device share the same memory pool, so a kernel being queued for execution in one could invalidate a concurrent mapping done with PIPE_TRANSFER_MAP_DIRECTLY by one of the transfer functions. On top of that the transfer functions might start queuing kernels themselves in the future to accelerate certain operations we currently do on the CPU, which would make this scenario more likely. I had no intention of having user mappings made with that flag. [Although a possible solution, with a lot of warnings of course, for the avobe problem could be to allow a user to use this flag] I'm not advocating a revert of the series because it fixes a serious bug, but please don't push patches 10-11, we should probably start looking for a different solution. Some suggestions are: I also asked for them to not to be pushed. And with your reasons, until we find a better way or we change how buffers are handled, I won't propose them again. - Why do you even need a pool? Wouldn't it be possible to create a huge RAT, e.g. covering a 4GB portion of the GPU memory and then use a special memory domain or some sort of flag to tell the kernel to allocate a buffer from that region (or relocate if it's already been allocated elsewhere)? This is especially easy on hardware with virtual memory, as you could simply reserve an arbitrarily large block of virtual memory, bind it as e.g. RAT0, and then map other buffer objects into the block on-demand as they're bound to the compute pipeline -- There would be no need to move the actual bits around. This is similar to the approach I used in my original proof-of-concept implementation of the compute API on nv50. This is one of the things I have been wondering recently, given that radeonsi doesn't use a pool, why r600 needs one? I still have to understand AMD docs and how *exactly* everything works. Probably because on SI compute kernels can access random locations of memory without going through an RAT? I have little actual experience with radeons, Tom should know the low-level details. The reason there is no memory pool in radeonsi is because SI and newer support virtual memory, so there is already one contiguous address space and also because there is no limit to the number of resources that can be accessed by a shader. -Tom 4GB seems like a big amount of memory for me, my little cedar has only 512MB :) - If you insist on using a pool, you could (mostly) avoid the storage duplication and the mapping copies by allocating buffer objects directly from the pool as it was before this series, and then keep some sort of reference count specific to the pool storage that would be incremented on map and decremented on unmap. Once you need to grow the pool you'd keep the old storage around temporarily and migrate buffers to the new storage lazily as they are required or unmapped. Once the reference count drops to zero you'd be free to release the backing BO to the system. The fact that you'd keep both storage buffers around for a bit means that you'd be able to use DMA to migrate the pool contents instead of the CPU copies you're doing now, which is likely to be substantially more efficient. I see how this would solve the slow mappings problem, but I think that it could mean a higher memory usage. In the case of a user creating some buffers, mapping one of them and them adding more so that the pool has to grow, we would have to keep the full size of the old pool just for a buffer, plus the new pool. That's a fair point, this solution would only get rid of the extra copying but it wouldn't solve memory usage problem in some situations (long-lived mappings). IMHO the former is more worrying because it has an impact on every map operation no matter what, while the increased
Re: [Mesa-dev] [PATCH 00/11] [RFC v2] Solve the mapping bug
On Wed, Jun 18, 2014 at 05:01:50PM +0200, Bruno Jiménez wrote: Hi, This is my second attempt to fix the mapping bug adding all the suggestions that Tom Stellard sent, and, so far, it seems that it is resolved. This series changes completely how OpenCL buffers are handled by the r600g driver. Before this, we would add them directly to a pool, and this pool would grow whenever we needed more space. But this process implied destroying the pool and creating a new one. There could be cases where a buffer would be mapped and the pool would grow, leaving one side of the mapping pointed to where the item was. This is the 'mapping bug' Now, Items will have an intermediate resource, where all mappings can be done, and when a buffer is going to be used with a kernel it is promoted to the pool. In the case where a promoted item is going to be mapped, it is previously demoted, so even if the pool changes its location due to growing, the map remains valid. In the case of a buffer mapped for reading, and used by a kernel to read from it, we will duplicate this buffer, having the intermediate buffer, where the user has its map, and an item in the pool, which is the one that the kernel is going to use. I've just pushed patches 1-9. Nice work! -Tom As a summary for v2: Patches 1-8: These are the main part of the series, and solve the mapping bug. Patches 1 and 7 now use less explicit castings Patch 2 is new and introduces the 'is_item_in_pool' function, which is used in patches 3 and 8 Patch 9: Is a complete rewrite of v1 patch 8 using gallium utils for double lists Patches 10 and 11: These are just a proof of concept for avoiding transfers GPU - GPU when using all CL Read/Write functions. They are v1 patch 9 splited in two to separate r600g changes from clover changes. Now, in clover's side it introduces and uses 'CLOVER_TRANSFER_MAP_DIRECTLY' so it doesen't collide with any other OpenCL flag. Please review and Thanks :) Bruno Jiménez (11): r600g/compute: Add an intermediate resource for OpenCL buffers r600g/compute: Add an util function to know if an item is in the pool r600g/compute: Add statuses to the compute_memory_items r600g/compute: divide the item list in two r600g/compute: Only move to the pool the buffers marked for promoting r600g/compute: Avoid problems when promoting items mapped for reading r600g/compute: Implement compute_memory_demote_item r600g/compute: Map only against intermediate buffers r600g/compute: Use gallium util functions for double lists r600g/compute: Map directly the pool in some cases clover: Use PIPE_TRANSFER_MAP_DIRECTLY when writing/reading buffers src/gallium/drivers/r600/compute_memory_pool.c | 294 - src/gallium/drivers/r600/compute_memory_pool.h | 31 ++- src/gallium/drivers/r600/evergreen_compute.c | 38 ++- src/gallium/state_trackers/clover/api/transfer.cpp | 4 +- src/gallium/state_trackers/clover/core/object.hpp | 4 + .../state_trackers/clover/core/resource.cpp| 2 + 6 files changed, 233 insertions(+), 140 deletions(-) -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: Fix build after LLVM commit 211259
On Fri, Jun 20, 2014 at 07:14:43PM -0500, Aaron Watry wrote: Signed-off-by: Aaron Watry awa...@gmail.com Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index df26883..413a0c2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -51,7 +51,9 @@ #include llvm/MC/MCInstPrinter.h #include llvm/MC/MCRegisterInfo.h -#if HAVE_LLVM = 0x0303 +#if HAVE_LLVM = 0x0305 +#define OwningPtr std::unique_ptr +#elif HAVE_LLVM = 0x0303 #include llvm/ADT/OwningPtr.h #endif -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/llvm: Adapt to AMDGPU.rsq intrinsic change in LLVM 3.5
On Thu, Jun 19, 2014 at 03:53:42PM +0900, Michel Dänzer wrote: From: Michel Dänzer michel.daen...@amd.com I just pushed this patch to fix the regressions. We can update the other intrinsics in a follow on patch. We also need to change RSQ to use llvm.AMDGPU.rsq.clamped once http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140616/80.html is committed in order to fix https://bugs.freedesktop.org/show_bug.cgi?id=80015 Also, I forgot to do this before I committed it, but I think this patch should go to stable. -Tom Signed-off-by: Michel Dänzer michel.daen...@amd.com --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index f8be0df..217fa32 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1384,7 +1384,11 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base-op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem; +#if HAVE_LLVM = 0x0305 + bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.; +#else bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq; +#endif } void radeon_llvm_create_func(struct radeon_llvm_context * ctx, -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] r600: Fix possible endless loop in compute_memory_pool allocations.
On Thu, Jun 19, 2014 at 10:21:32AM -0400, Jan Vesely wrote: The important part is the change of the condition to = 0. Otherwise the loop gets stuck never actually growing the pool. The change in the aux-need calculation guarantees max 2 iterations, and avoids wasting memory in case a smaller item can't fit into a relatively larger pool. Does this patch obsolete the XXX comment around line 292 of this file? If so, we should remove it. Also have tried this with patches 1-9 of this series: http://lists.freedesktop.org/archives/mesa-dev/2014-June/061742.html -Tom Signed-off-by: Jan Vesely jan.ves...@rutgers.edu CC: Bruno Jimenez brunoji...@gmail.com --- This fixes hang in gegl colors.xml test src/gallium/drivers/r600/compute_memory_pool.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ec8c470..0b6d2da6 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -320,8 +320,11 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, int64_t need = item-size_in_dw+2048 - (pool-size_in_dw - allocated); - if (need 0) { - need = pool-size_in_dw / 10; + if (need = 0) { + /* There's enough free space, but it's too + * fragmented. Assume half of the item can fit + * int the last chunk */ + need = (item-size_in_dw / 2) + ITEM_ALIGNMENT; } need = align(need, ITEM_ALIGNMENT); -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] r600: Handle failures in compute_memory_pool_finalize
On Thu, Jun 19, 2014 at 11:22:28AM -0400, Jan Vesely wrote: On Thu, 2014-06-19 at 17:12 +0200, Bruno Jimenez wrote: Hi, To which failure are you refering? Could you please send me a test/program that I can try to track this down? well, the compute_memory_finalize_pending() function can possibly return -1 so it's prudent to check for it. as for the testcase, I replaced the inside of 'if (need = 0)' in the previous patch with return -1 (to simulate failure). The I used GEGL test op colors.xml to trigger the situation. but gegl needs some extra patches to get working on current mesa/clover. I can send you log with R600_DEBUG=compute if it helps. Have you ever looked into integrated the gegl tests with piglit, like we've done for opencv. This would make it much easier for other devs to execute these tests. -Tom regards, Jan Thanks! Bruno On Thu, 2014-06-19 at 10:21 -0400, Jan Vesely wrote: Signed-off-by: Jan Vesely jan.ves...@rutgers.edu CC: Bruno Jimenez brunoji...@gmail.com --- The failure now hits assertion compute_memory_pool.c:408, instead of u_inlines.h:275:pipe_buffer_map_range: Assertion `offset buffer-width0' src/gallium/drivers/r600/evergreen_compute.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a2abf15..bd6e720 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -659,7 +659,10 @@ static void evergreen_set_global_binding( return; } - compute_memory_finalize_pending(pool, ctx_); + if (compute_memory_finalize_pending(pool, ctx_) == -1) { + /* XXX: Unset */ + return; + } for (int i = 0; i n; i++) { @@ -967,7 +970,9 @@ void *r600_compute_global_transfer_map( %u (box.x)\n, buffer-chunk-id, box-x); - compute_memory_finalize_pending(pool, ctx_); + if (compute_memory_finalize_pending(pool, ctx_) == -1) { + return NULL; + } assert(resource-target == PIPE_BUFFER); assert(resource-bind PIPE_BIND_GLOBAL); -- Jan Vesely jan.ves...@rutgers.edu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/23] Megadrivers galore
On Tue, Jun 17, 2014 at 07:38:16PM +0100, Emil Velikov wrote: Hi all, As a follow up to the static/shared pipe-drivers series here is the final series (if anyone is interested I may take a look at egl + opencl) of refactoring the gallium dri targets into a single library/provider. Hi Emil, One common problem I run into when using the pipe drivers is if there is an undefined symbol in the pipe_*.so then it will silently fail to load, and with OpenCL for example it won't report any devices which can be confusing for users. I would recommend adding some error handling to util_dlopen, so that it prints an error message when it fails to load a shared object. Other than that, it's hard to review a series like this, but I'll give it an: Acked-by: Tom Stellard thomas.stell...@amd.com Since I like the general approach. -Tom In a nutshell: - Convert one target per patch. - Merge the drm and sw backends of our dri state-tracker. - Adds __driDriverGetExtensions_$drivername symbol for each driver. - Megadrivers. - *** - Profit. Things works like a charm for nouveau and swrast, and testing on other platforms is greatly appreciated. The complete series can be found in the static-or-shared-pipe-drivers-v2 branch at my github repo. I would like to get this reviewed/pushed over the next month, although that depends on the number of bugs that come up with the previous batch. As always comments, suggestions and flame is welcome. Cheers, Emil Emil Velikov (23): targets/dri-swrast: use drm aware dricommon when building more than swrast st/dri: Allow separate dri-targets st/dri/drm: Add a second libdridrm library targets/dri-nouveau: Convert to static/shared pipe-drivers targets/(r300|r600|radeonsi)/dri: Convert to static/shared pipe-drivers targets/dri-freedreno: Convert to static/shared pipe-drivers targets/dri-i915: Convert to static/shared pipe-drivers targets/dri-ilo: Convert to static/shared pipe-driver targets/dri-vmwgfx: Convert to static/shared pipe-drivers st/dri: Remove the old libdridrm library targets/dri: Add __driDriverGetExtensions_nouveau symbol targets/dri: Add __driDriverGetExtensions_(r300|r600|radeonsi) symbols targets/dri: Add __driDriverGetExtensions_freedreno symbol targets/dri: Add __driDriverGetExtensions_i915 symbol targets/dri: Add __driDriverGetExtensions_i965 symbol targets/dri: Add __driDriverGetExtensions_vmwgfx targets/dri: update scons build to handle __driDriverGetExtensions_vmwgfx targets/dri: cleanup conversion leftovers st/dri/drm: remove __driDriverExtensions and driDriverAPI scons: build and use a single dri_common library targets/dri-swrast: convert to gallium megadrivers :) st/dri: merge dri/drm and dri/sw backends targets/dri-swrast: Convert to static/shared pipe-driver 61 files changed, 536 insertions(+), 1375 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
On Wed, Jun 18, 2014 at 09:54:28AM +1000, Dave Airlie wrote: On 18 June 2014 05:08, Roland Scheidegger srol...@vmware.com wrote: This looks ok to me though since tgsi currently doesn't have any double opcodes (well the docs have them...) it doesn't really apply to most drivers (at least I assume you don't want to add support for it for tgsi). I've mostly forward ported the old gallium double code, and have written most of ARB_gpu_shader_fp64 on top, Though the question I did want to ask Tom is if he is just going to expose hw that has doubles, or does he plan on emulating doubles. My intention was that this CAP would be enabled for drivers with either native double support or with double emulation implemented in the driver. I'm not currently planning to implement double emulation, so I would only enable this for hardware with native double support. For a lot of GLSL4.0 GPUs from AMD fglrx emulates doubles using massive magic shaders, I'm unsure if we should have a lowering pass above/below the TGSI line for these types of situations and what that would mean for this CAP. If someone implemented double lowering above the TGSI line, then it should only been done with drivers that report 0 for this CAP. -Tom Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium/radeon: Only print a message for LLVM diagnostic errors
We were printing messages for all diagnostic types, which was spamming the console for some OpenCL programs. --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 891781f..6a394b2 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -87,11 +87,13 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) { char *diaginfo_message; diaginfo_message = LLVMGetDiagInfoDescription(di); - fprintf(stderr,LLVM triggered Diagnostic Handler: %s\n, diaginfo_message); LLVMDisposeMessage(diaginfo_message); diagnosticflag = (unsigned int *)context; - *diagnosticflag = ((LLVMDSError == LLVMGetDiagInfoSeverity(di)) ? 1 : 0); + if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) { + *diagnosticflag = 1; + fprintf(stderr,LLVM triggered Diagnostic Handler: %s\n, diaginfo_message); + } } #endif -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] R600/SI: add Gather4 intrinsics (v2)
On Mon, Jun 16, 2014 at 09:19:59PM +0200, Marek Olšák wrote: Why are there SDNodes for the other sample intrinsics then? The reason there are SDnodes for sample intrinsics is because at one point he had to modify the type of the resource description and the easiest way to do that was by replacing the intrinsic with an SDNode. -Tom Marek On Mon, Jun 16, 2014 at 5:45 PM, Tom Stellard t...@stellard.net wrote: On Thu, Jun 12, 2014 at 02:11:10AM +0200, Marek Olšák wrote: From: Marek Olšák marek.ol...@amd.com This adds a new type of intrinsic and SDNode: SampleRaw. All fields of the MIMG opcodes are exposed and can be set by Mesa, even DMASK. All GATHER4 variants are added and there are a lot of them. v2: document DMASK behavior --- lib/Target/R600/AMDGPUISelLowering.cpp | 24 + lib/Target/R600/AMDGPUISelLowering.h | 31 +++ lib/Target/R600/SIISelLowering.cpp | 72 + lib/Target/R600/SIISelLowering.h | 2 + lib/Target/R600/SIInstrInfo.td | 91 lib/Target/R600/SIInstructions.td | 96 +- lib/Target/R600/SIIntrinsics.td| 48 + 7 files changed, 340 insertions(+), 24 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 849f169..359161c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1542,6 +1542,30 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLEB) NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) + NODE_NAME_CASE(GATHER4) + NODE_NAME_CASE(GATHER4_CL) + NODE_NAME_CASE(GATHER4_L) + NODE_NAME_CASE(GATHER4_B) + NODE_NAME_CASE(GATHER4_B_CL) + NODE_NAME_CASE(GATHER4_LZ) + NODE_NAME_CASE(GATHER4_C) + NODE_NAME_CASE(GATHER4_C_CL) + NODE_NAME_CASE(GATHER4_C_L) + NODE_NAME_CASE(GATHER4_C_B) + NODE_NAME_CASE(GATHER4_C_B_CL) + NODE_NAME_CASE(GATHER4_C_LZ) + NODE_NAME_CASE(GATHER4_O) + NODE_NAME_CASE(GATHER4_CL_O) + NODE_NAME_CASE(GATHER4_L_O) + NODE_NAME_CASE(GATHER4_B_O) + NODE_NAME_CASE(GATHER4_B_CL_O) + NODE_NAME_CASE(GATHER4_LZ_O) + NODE_NAME_CASE(GATHER4_C_O) + NODE_NAME_CASE(GATHER4_C_CL_O) + NODE_NAME_CASE(GATHER4_C_L_O) + NODE_NAME_CASE(GATHER4_C_B_O) + NODE_NAME_CASE(GATHER4_C_B_CL_O) + NODE_NAME_CASE(GATHER4_C_LZ_O) You don't need to add new SDNodes for all these instructions, you can just use the intrinsic directly in the pattern. The only reason to add SDNodes, is if there are optimizations / special lowering we can do for these instructions. NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index d5d821d..a9af195 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -203,6 +203,37 @@ enum { SAMPLEB, SAMPLED, SAMPLEL, + + // Gather4 opcodes + GATHER4, + GATHER4_CL, + GATHER4_L, + GATHER4_B, + GATHER4_B_CL, + GATHER4_LZ, + + GATHER4_C, + GATHER4_C_CL, + GATHER4_C_L, + GATHER4_C_B, + GATHER4_C_B_CL, + GATHER4_C_LZ, + + GATHER4_O, + GATHER4_CL_O, + GATHER4_L_O, + GATHER4_B_O, + GATHER4_B_CL_O, + GATHER4_LZ_O, + + GATHER4_C_O, + GATHER4_C_CL_O, + GATHER4_C_L_O, + GATHER4_C_B_O, + GATHER4_C_B_CL_O, + GATHER4_C_LZ_O, + + // Nemory opcodes FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1a861d4..909255d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + +// Gather4 intrinsics +case AMDGPUIntrinsic::SI_gather4: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG); +case AMDGPUIntrinsic::SI_gather4_cl: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG); +case AMDGPUIntrinsic::SI_gather4_l: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG); +case AMDGPUIntrinsic::SI_gather4_b: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG); +case AMDGPUIntrinsic::SI_gather4_b_cl: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG); +case AMDGPUIntrinsic::SI_gather4_lz: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG); + +case AMDGPUIntrinsic::SI_gather4_c: + return
[Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES
This is for reporting whether or not double precision floating-point operations are supported. --- src/gallium/auxiliary/gallivm/lp_bld_limits.h | 2 ++ src/gallium/auxiliary/tgsi/tgsi_exec.h| 2 ++ src/gallium/docs/source/screen.rst| 2 ++ src/gallium/drivers/r300/r300_screen.c| 4 src/gallium/drivers/r600/r600_pipe.c | 2 ++ src/gallium/drivers/radeonsi/si_pipe.c| 5 + src/gallium/drivers/svga/svga_screen.c| 4 src/gallium/include/pipe/p_defines.h | 3 ++- 8 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 6cb0949..9ccaf46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -126,6 +126,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param) return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_DOUBLES: + return 0; } /* if we get here, we missed a shader cap above (and should have seen * a compiler warning.) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index d53c4ba..56a7034 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -456,6 +456,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_DOUBLES: + return 0; } /* if we get here, we missed a shader cap above (and should have seen * a compiler warning.) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index b8e356f..2867bfc 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -282,6 +282,8 @@ to be 0. program. It should be one of the ``pipe_shader_ir`` enum values. * ``PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS``: The maximum number of texture sampler views. Must not be lower than PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS. +* ``PIPE_SHADER_CAP_DOUBLES``: Whether double precision floating-point + operations are supported. .. _pipe_compute_cap: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 82d30e7..e5ed59a 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -262,6 +262,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; +case PIPE_SHADER_CAP_DOUBLES: +return 0; } break; case PIPE_SHADER_VERTEX: @@ -313,6 +315,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; +case PIPE_SHADER_CAP_DOUBLES: +return 0; } break; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 2b65056..24e3c1a 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -444,6 +444,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } else { return PIPE_SHADER_IR_TGSI; } +case PIPE_SHADER_CAP_DOUBLES: + return 0; } return 0; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4b96f20..9eab1fe 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -321,6 +321,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_LLVM; + case PIPE_SHADER_CAP_DOUBLES: + return 0; /* TODO: Report doubles as supported once + * the compiler is ready. */ default: return 0; } @@ -372,6 +375,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_DOUBLES: + return 0; } return 0; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 4e1e331..89d3c49 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -347,6 +347,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 16;
[Mesa-dev] [PATCH 3/5] clover: Report default values for half and double fp configs
From: Matt Arsenault arse...@gmail.com --- src/gallium/state_trackers/clover/api/device.cpp | 11 +-- src/gallium/state_trackers/clover/core/device.cpp | 24 +++ src/gallium/state_trackers/clover/core/device.hpp | 3 +++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index 1bc2692..dc8e22c 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -201,8 +201,15 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_SINGLE_FP_CONFIG: - buf.as_scalarcl_device_fp_config() = - CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; + buf.as_scalarcl_device_fp_config() = dev.single_fp_config(); + break; + + case CL_DEVICE_DOUBLE_FP_CONFIG: + buf.as_scalarcl_device_fp_config() = dev.double_fp_config(); + break; + + case CL_DEVICE_HALF_FP_CONFIG: + buf.as_scalarcl_device_fp_config() = dev.half_fp_config(); break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index bc3e3e6..6d52dd4 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -163,6 +163,30 @@ device::max_clock_frequency() const { PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0]; } +cl_device_fp_config +device::single_fp_config() const { + // TODO: Get these from somewhere. + return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; +} + +cl_device_fp_config +device::double_fp_config() const { + // TODO: Get these from somewhere. This is the mandated minimum double + // precision floating-point capability +return CL_FP_FMA + | CL_FP_ROUND_TO_NEAREST + | CL_FP_ROUND_TO_ZERO + | CL_FP_ROUND_TO_INF + | CL_FP_INF_NAN + | CL_FP_DENORM; +} + +cl_device_fp_config +device::half_fp_config() const { + // TODO: Get these from somewhere. + return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 3662c6b..380029e 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -62,6 +62,9 @@ namespace clover { size_t max_threads_per_block() const; cl_ulong max_mem_alloc_size() const; cl_uint max_clock_frequency() const; + cl_device_fp_config single_fp_config() const; + cl_device_fp_config double_fp_config() const; + cl_device_fp_config half_fp_config() const; std::vectorsize_t max_block_size() const; std::string device_name() const; -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles
--- src/gallium/state_trackers/clover/api/device.cpp | 4 +++- src/gallium/state_trackers/clover/core/device.cpp | 6 ++ src/gallium/state_trackers/clover/core/device.hpp | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp index dc8e22c..275542d 100644 --- a/src/gallium/state_trackers/clover/api/device.cpp +++ b/src/gallium/state_trackers/clover/api/device.cpp @@ -290,7 +290,9 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, break; case CL_DEVICE_EXTENSIONS: - buf.as_string() = ; + // The trailing space is intentional. It is a spec-ism that there is a + // trailing space at the end of the list of extensions. + buf.as_string() = dev.cl_khr_fp64() ? cl_khr_fp64 : ; break; case CL_DEVICE_PLATFORM: diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 6d52dd4..51b54fa 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -187,6 +187,12 @@ device::half_fp_config() const { return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST; } +bool +device::cl_khr_fp64() const { + return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_DOUBLES); +} + std::vectorsize_t device::max_block_size() const { auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 380029e..38bea54 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -65,6 +65,7 @@ namespace clover { cl_device_fp_config single_fp_config() const; cl_device_fp_config double_fp_config() const; cl_device_fp_config half_fp_config() const; + bool cl_khr_fp64() const; std::vectorsize_t max_block_size() const; std::string device_name() const; -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] clover: Don't use llvm's global context
An LLVMContext should only be accessed by a single and using the global context was causing crashes in multi-threaded environments. Now we use a separate context for each compile. --- src/gallium/state_trackers/clover/llvm/invocation.cpp | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 82e262f..48810bd 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -117,12 +117,13 @@ namespace { #endif llvm::Module * - compile(const std::string source, const std::string name, - const std::string triple, const std::string processor, - const std::string opts, clang::LangAS::Map address_spaces) { + compile(llvm::LLVMContext llvm_ctx, const std::string source, + const std::string name, const std::string triple, + const std::string processor, const std::string opts, + clang::LangAS::Map address_spaces) { clang::CompilerInstance c; - clang::EmitLLVMOnlyAction act(llvm::getGlobalContext()); + clang::EmitLLVMOnlyAction act(llvm_ctx); std::string log; llvm::raw_string_ostream s_log(log); std::string libclc_path = LIBCLC_LIBEXECDIR + processor + - @@ -394,10 +395,12 @@ clover::compile_program_llvm(const compat::string source, target.size() - processor_str_len - 1); clang::LangAS::Map address_spaces; + llvm::LLVMContext llvm_ctx; + // The input file name must have the .cl extension in order for the // CompilerInvocation class to recognize it as an OpenCL source file. - llvm::Module *mod = compile(source, input.cl, triple, processor, opts, - address_spaces); + llvm::Module *mod = compile(llvm_ctx, source, input.cl, triple, processor, + opts, address_spaces); find_kernels(mod, kernels); -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] clover: Fix not setting build log if the build succeeds.
From: Matt Arsenault arse...@gmail.com If there were only warnings, they would not be added to the log. Also fixes valgrind use after free errors. --- src/gallium/state_trackers/clover/core/compiler.hpp | 3 ++- src/gallium/state_trackers/clover/core/error.hpp | 2 +- src/gallium/state_trackers/clover/core/program.cpp| 11 +++ src/gallium/state_trackers/clover/llvm/invocation.cpp | 14 +++--- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 49cd022..3ce132f 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -32,7 +32,8 @@ namespace clover { module compile_program_llvm(const compat::string source, pipe_shader_ir ir, const compat::string target, - const compat::string opts); + const compat::string opts, + std::string log_out); module compile_program_tgsi(const compat::string source); } diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 28459f3..9802195 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -66,7 +66,7 @@ namespace clover { class build_error : public error { public: - build_error(const compat::string log) : + build_error(const compat::string log = ) : error(CL_BUILD_PROGRAM_FAILURE, log) { } }; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 3aaa652..91ee553 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -52,15 +52,18 @@ program::build(const ref_vectordevice devs, const char *opts) { _opts.insert({ dev, opts }); + std::string build_log; + try { auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ? compile_program_tgsi(_source) : compile_program_llvm(_source, dev.ir_format(), -dev.ir_target(), build_opts(dev))); +dev.ir_target(), build_opts(dev), +build_log)); _binaries.insert({ dev, module }); - - } catch (build_error e) { -_logs.insert({ dev, e.what() }); +_logs.insert({ dev, build_log }); + } catch (const build_error ) { +_logs.insert({ dev, build_log }); throw; } } diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 48810bd..0dc1f50 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -120,12 +120,11 @@ namespace { compile(llvm::LLVMContext llvm_ctx, const std::string source, const std::string name, const std::string triple, const std::string processor, const std::string opts, - clang::LangAS::Map address_spaces) { + clang::LangAS::Map address_spaces, std::string log_out) { clang::CompilerInstance c; clang::EmitLLVMOnlyAction act(llvm_ctx); - std::string log; - llvm::raw_string_ostream s_log(log); + llvm::raw_string_ostream s_log(log_out); std::string libclc_path = LIBCLC_LIBEXECDIR + processor + - + triple + .bc; @@ -220,10 +219,10 @@ namespace { // Compile the code if (!c.ExecuteAction(act)) - throw build_error(log); + throw build_error(); // Get address spaces map to be able to find kernel argument address space - memcpy(address_spaces, c.getTarget().getAddressSpaceMap(), + memcpy(address_spaces, c.getTarget().getAddressSpaceMap(), sizeof(address_spaces)); return act.takeModule(); @@ -386,7 +385,8 @@ module clover::compile_program_llvm(const compat::string source, enum pipe_shader_ir ir, const compat::string target, - const compat::string opts) { + const compat::string opts, + std::string log_out) { std::vectorllvm::Function * kernels; size_t processor_str_len = std::string(target.begin()).find_first_of(-); @@ -400,7 +400,7 @@ clover::compile_program_llvm(const compat::string source, // The input file name must have the .cl extension in order for the //
Re: [Mesa-dev] [PATCH 1/9] r600g/compute: Add an intermediate resource for OpenCL buffers
On Fri, Jun 13, 2014 at 10:35:30PM +0200, Bruno Jiménez wrote: This patch changes completely the way buffers are added to the compute_memory_pool. Before this, whenever we were going to map a buffer or write to or read from it, it would get placed into the pool. Now, every unallocated buffer has its own r600_resource until it is allocated in the pool. NOTE: This patch also increase the GPU memory usage at the moment of putting every buffer in it's place. More or less, the memory usage is ~2x(sum of every buffer size) v2: Cleanup --- src/gallium/drivers/r600/compute_memory_pool.c | 21 - src/gallium/drivers/r600/compute_memory_pool.h | 2 ++ src/gallium/drivers/r600/evergreen_compute.c | 18 +- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ec8c470..94ddcde 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -71,7 +71,6 @@ static void compute_memory_pool_init(struct compute_memory_pool * pool, if (pool-shadow == NULL) return; - pool-next_id = 1; pool-size_in_dw = initial_size_in_dw; pool-bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool-screen, pool-size_in_dw * 4); @@ -365,6 +364,18 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, pool-item_list = item; } + ((struct r600_context *)pipe)-b.b.resource_copy_region(pipe, + (struct pipe_resource *)pool-bo, + 0, item-start_in_dw * 4, 0 ,0, + (struct pipe_resource *)item-real_buffer, + 0, (struct pipe_box) {.width = item-size_in_dw * 4, + .height = 1, .depth = 1}); + + pool-screen-b.b.resource_destroy( + (struct pipe_screen *)pool-screen, + (struct pipe_resource *)item-real_buffer); You should use temporary variables rather than inlining the casts everywhere. It will make the code easier to read. Make sure to declare the at the beginning of the function or basic block. + item-real_buffer = NULL; + allocated += item-size_in_dw; } @@ -393,6 +404,12 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id) item-next-prev = item-prev; } + if (item-real_buffer) { + pool-screen-b.b.resource_destroy( + (struct pipe_screen *)pool-screen, + (struct pipe_resource *)item-real_buffer); + } + Same thing here with the casts. free(item); return; @@ -426,6 +443,8 @@ struct compute_memory_item* compute_memory_alloc( new_item-start_in_dw = -1; /* mark pending */ new_item-id = pool-next_id++; new_item-pool = pool; + new_item-real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram( + pool-screen, size_in_dw * 4); if (pool-item_list) { for (last_item = pool-item_list; last_item-next; diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index c711c59..e94159c 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -38,6 +38,8 @@ struct compute_memory_item int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo int64_t size_in_dw; ///Size of the chunk in dwords + struct r600_resource *real_buffer; + struct compute_memory_pool* pool; struct compute_memory_item* prev; diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a2abf15..c152e54 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -958,6 +958,17 @@ void *r600_compute_global_transfer_map( struct r600_resource_global* buffer = (struct r600_resource_global*)resource; + struct pipe_resource *dst; + unsigned offset = box-x; + + if (buffer-chunk-real_buffer) { + dst = (struct pipe_resource*)buffer-chunk-real_buffer; + } + else { + dst = (struct pipe_resource*)buffer-chunk-pool-bo; + offset += (buffer-chunk-start_in_dw * 4); + } + COMPUTE_DBG(rctx-screen, * r600_compute_global_transfer_map()\n level = %u, usage = %u, box(x = %u, y
Re: [Mesa-dev] [PATCH 2/9] r600g/compute: Add statuses to the compute_memory_items
On Fri, Jun 13, 2014 at 10:35:31PM +0200, Bruno Jiménez wrote: These statuses will help track whether the items are mapped or if they should be promoted to or demoted from the pool --- src/gallium/drivers/r600/compute_memory_pool.h | 7 ++- src/gallium/drivers/r600/evergreen_compute.c | 12 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index e94159c..166093d 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -27,13 +27,18 @@ #include stdlib.h +#define ITEM_MAPPED_FOR_READING (10) +#define ITEM_MAPPED_FOR_WRITING (11) +#define ITEM_FOR_PROMOTING (12) +#define ITEM_FOR_DEMOTING (13) + struct compute_memory_pool; struct compute_memory_item { int64_t id; ///ID of the memory chunk - int untouched; ///True if the memory contains only junk, no need to save it for defrag + uint32_t status; ///Will track the status of the item int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo int64_t size_in_dw; ///Size of the chunk in dwords This whole structure should use c style comments /* */, but that should be fixed in a follow up patch. diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index c152e54..9123a40 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -659,6 +659,15 @@ static void evergreen_set_global_binding( return; } + /* We mark these items for promotion to the pool if they + * aren't already there */ + for (int i = 0; i n; i++) { + struct compute_memory_item *item = buffers[i]-chunk; + + if (item-start_in_dw == -1) I would prefer to encapsulate this in an is_item_in_pool() helper function. + buffers[i]-chunk-status |= ITEM_FOR_PROMOTING; + } + compute_memory_finalize_pending(pool, ctx_); for (int i = 0; i n; i++) @@ -969,6 +978,9 @@ void *r600_compute_global_transfer_map( offset += (buffer-chunk-start_in_dw * 4); } + if (usage PIPE_TRANSFER_READ) + buffer-chunk-status |= ITEM_MAPPED_FOR_READING; + COMPUTE_DBG(rctx-screen, * r600_compute_global_transfer_map()\n level = %u, usage = %u, box(x = %u, y = %u, z = %u width = %u, height = %u, depth = %u)\n, level, usage, -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] r600g/compute: divide the item list in two
On Fri, Jun 13, 2014 at 10:35:32PM +0200, Bruno Jiménez wrote: Now we will have a list with the items that are in the pool (item_list) and the items that are outside it (unallocated_list) Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/r600/compute_memory_pool.c | 99 +- src/gallium/drivers/r600/compute_memory_pool.h | 1 + 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index 94ddcde..5a5ef12 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -108,13 +108,11 @@ int64_t compute_memory_prealloc_chunk( size_in_dw); for (item = pool-item_list; item; item = item-next) { - if (item-start_in_dw -1) { - if (last_end + size_in_dw = item-start_in_dw) { - return last_end; - } - - last_end = item-start_in_dw + align(item-size_in_dw, ITEM_ALIGNMENT); + if (last_end + size_in_dw = item-start_in_dw) { + return last_end; } + + last_end = item-start_in_dw + align(item-size_in_dw, ITEM_ALIGNMENT); } if (pool-size_in_dw - last_end size_in_dw) { @@ -226,7 +224,6 @@ void compute_memory_shadow(struct compute_memory_pool* pool, int compute_memory_finalize_pending(struct compute_memory_pool* pool, struct pipe_context * pipe) { - struct compute_memory_item *pending_list = NULL, *end_p = NULL; struct compute_memory_item *item, *next; int64_t allocated = 0; @@ -244,45 +241,16 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, item-size_in_dw, item-size_in_dw * 4); } - /* Search through the list of memory items in the pool */ + /* Calculate the total allocated size */ for (item = pool-item_list; item; item = next) { next = item-next; + allocated += align(item-size_in_dw, ITEM_ALIGNMENT); + } - /* Check if the item is pending. */ - if (item-start_in_dw == -1) { - /* It is pending, so add it to the pending_list... */ - if (end_p) { - end_p-next = item; - } - else { - pending_list = item; - } - - /* ... and then remove it from the item list. */ - if (item-prev) { - item-prev-next = next; - } - else { - pool-item_list = next; - } - - if (next) { - next-prev = item-prev; - } - - /* This sequence makes the item be at the end of the list */ - item-prev = end_p; - item-next = NULL; - end_p = item; - - /* Update the amount of space we will need to allocate. */ - unallocated += item-size_in_dw+1024; - } - else { - /* The item is not pending, so update the amount of space - * that has already been allocated. */ - allocated += item-size_in_dw; - } + /* Calculate the total unallocated size */ + for (item = pool-unallocated_list; item; item = next) { + next = item-next; + unallocated += align(item-size_in_dw, ITEM_ALIGNMENT); } /* If we require more space than the size of the pool, then grow the @@ -302,15 +270,15 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, * In this case, there are 300 units of free space in the pool, but * they aren't contiguous, so it will be impossible to allocate Item D. */ - if (pool-size_in_dw allocated+unallocated) { - err = compute_memory_grow_pool(pool, pipe, allocated+unallocated); + if (pool-size_in_dw allocated + unallocated) { + err = compute_memory_grow_pool(pool, pipe, allocated + unallocated); if (err == -1) return -1; } - /* Loop through all the pending items, allocate space for them and - * add them back to the item_list. */ - for (item = pending_list; item; item = next) { + /* Loop through all the unallocated items, allocate space for them + * and add them to the item_list. */ + for (item = pool-unallocated_list; item; item = next) { next = item-next; /* Search for free space in the pool for this item
Re: [Mesa-dev] [PATCH] clover: Cache serialized binaries
On Sun, Jun 15, 2014 at 01:08:14PM +0200, Francisco Jerez wrote: Tom Stellard thomas.stell...@amd.com writes: We were serializing the binaries once when clGetProgramInfo was called with CL_PROGRAM_BINARY_SIZES and then again when it was called with CL_PROGRAM_BINARIES. This was slowing down some OpenCV tests which were building binary kernel caches. This improves the run-time of OpenCV's OCL_ImgProc/CvtColor8u.* test from 7 minutes to 1 minute. --- Can you give the attached two patches a try? I'm curious to see if they have a comparable effect -- If they do I'd prefer to fix the underlying object rather than caching binaries in serialized form. Thanks. [...] These patches improve performance even more. Now it only takes 10 seconds to run the tests instead of 7 minutes. For both patches: Tested-by: Tom Stellard thomas.stell...@amd.com From a500126213b073793184b0b6f170a58229340778 Mon Sep 17 00:00:00 2001 From: Francisco Jerez curroje...@riseup.net Date: Sat, 14 Jun 2014 20:53:35 +0200 Subject: [PATCH 1/2] clover: Optimize module serialization for vectors of fundamental types. --- src/gallium/state_trackers/clover/core/module.cpp | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp index 3e3ad99..41de734 100644 --- a/src/gallium/state_trackers/clover/core/module.cpp +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -69,7 +69,9 @@ namespace { /// (De)serialize a vector. templatetypename T - struct _serializercompat::vectorT { + struct _serializercompat::vectorT, + typename std::enable_if + !std::is_scalarT::value::type { static void proc(compat::ostream os, const compat::vectorT v) { _procuint32_t(os, v.size()); @@ -87,6 +89,25 @@ namespace { } }; + templatetypename T + struct _serializercompat::vectorT, + typename std::enable_if + std::is_scalarT::value::type { + static void + proc(compat::ostream os, const compat::vectorT v) { + _procuint32_t(os, v.size()); + os.write(reinterpret_castconst char *(v.begin()), + v.size() * sizeof(T)); + } + + static void + proc(compat::istream is, compat::vectorT v) { + v.reserve(_procuint32_t(is)); + is.read(reinterpret_castchar *(v.begin()), + v.size() * sizeof(T)); + } + }; + /// (De)serialize a module::section. template struct _serializermodule::section { -- 1.9.2 From 1267038c2b0621dddc3d5c7718eed7ef2beb111b Mon Sep 17 00:00:00 2001 From: Francisco Jerez curroje...@riseup.net Date: Sat, 14 Jun 2014 21:03:02 +0200 Subject: [PATCH 2/2] clover: Calculate the serialized size of a module efficiently. --- src/gallium/state_trackers/clover/api/program.cpp | 5 +--- src/gallium/state_trackers/clover/core/module.cpp | 32 +++ src/gallium/state_trackers/clover/core/module.hpp | 1 + 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index fedc91d..a14baa3 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -190,10 +190,7 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param, case CL_PROGRAM_BINARY_SIZES: buf.as_vectorsize_t() = map([](const device dev) { -compat::ostream::buffer_t bin; -compat::ostream s(bin); -prog.binary(dev).serialize(s); -return bin.size(); +return prog.binary(dev).size(); }, prog.devices()); break; diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp index 41de734..55ed91a 100644 --- a/src/gallium/state_trackers/clover/core/module.cpp +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -52,6 +52,13 @@ namespace { return x; } + /// Calculate the size of the specified object. + templatetypename T + void + _proc(module::size_t sz, const T x) { + _serializerT::proc(sz, x); + } + /// (De)serialize a scalar value. templatetypename T struct _serializerT, typename std::enable_if @@ -65,6 +72,11 @@ namespace { proc(compat::istream is, T x) { is.read(reinterpret_castchar *(x), sizeof(x)); } + + static void + proc(module::size_t sz, const T x) { + sz += sizeof(x); + } }; /// (De)serialize a vector. @@ -87,6 +99,14 @@ namespace { for (size_t i = 0; i v.size(); i++) new(v[i]) T(_procT