Re: [Mesa-dev] [PATCH 2/2] r600g/compute: Decrement map_count when unmapping items

2014-08-13 Thread Tom Stellard
On Thu, Aug 07, 2014 at 12:14:24PM +0200, Bruno Jiménez wrote:
 This patch adds a new struct: r600_transfer_global. It will
 act as a wrapper around an r600_resource_global and an r600_transfer.
 
 It will be used for calling r600_compute_global_transfer_unmap when
 transfer_unmap is called. And at the same time, keep all the transfer
 information, so we can call r600_buffer_transfer_unmap with the
 'real' transfer.
 ---
  src/gallium/drivers/r600/evergreen_compute.c | 46 
 +---
  src/gallium/drivers/r600/evergreen_compute.h |  5 +++
  2 files changed, 40 insertions(+), 11 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index f50f94a..ac72256 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -970,10 +970,16 @@ void *r600_compute_global_transfer_map(
   struct r600_resource_global* buffer =
   (struct r600_resource_global*)resource;
  
 + struct r600_transfer_global *trans = NULL;
 + uint8_t *data;
 +
   struct compute_memory_item *item = buffer-chunk;
   struct pipe_resource *dst = NULL;
   unsigned offset = box-x;
  
 + trans = CALLOC(1, sizeof(struct r600_transfer_global));
 + trans-resource = resource;
 +
   if (is_item_in_pool(item)) {
   compute_memory_demote_item(pool, item, ctx_);
   }
 @@ -1004,8 +1010,11 @@ void *r600_compute_global_transfer_map(
   assert(box-z == 0);
  
   ///TODO: do it better, mapping is not possible if the pool is too big
 - return pipe_buffer_map_range(ctx_, dst,
 - offset, box-width, usage, ptransfer);
 + data = pipe_buffer_map_range(ctx_, dst,
 + offset, box-width, usage, trans-ptransfer);
 +
 + *ptransfer = (struct pipe_transfer *)trans;
 + return data;
  }
  
  void r600_compute_global_transfer_unmap(
 @@ -1013,16 +1022,31 @@ void r600_compute_global_transfer_unmap(
   struct pipe_transfer* transfer)
  {
   /* struct r600_resource_global are not real resources, they just map
 -  * to an offset within the compute memory pool.  The function
 -  * r600_compute_global_transfer_map() maps the memory pool
 -  * resource rather than the struct r600_resource_global passed to
 -  * it as an argument and then initalizes ptransfer-resource with
 -  * the memory pool resource (via pipe_buffer_map_range).
 -  * When transfer_unmap is called it uses the memory pool's
 -  * vtable which calls r600_buffer_transfer_map() rather than
 -  * this function.
 +  * to an offset within the compute memory pool. The function
 +  * r600_compute_global_transfer_map() creates a struct
 +  * r600_transfer_global, which has as resource an r600_global_resource
 +  * and an r600_transfer which will act as the 'real' pipe_transfer
 +  * that will be passed to pipe_buffer_map_range.
 +  *
 +  * This allows us to use an r600_resource_global vtable when 
 transfer_unmap
 +  * is called, and still have the full information about the transfer,
 +  * which will be used to actually unmap the resource.
*/
 - assert (!This function should not be called);
 +
 + struct r600_context *rctx = (struct r600_context *)ctx_;
 + struct r600_transfer_global *trans =
 + (struct r600_transfer_global *)transfer;
 + struct r600_resource_global *buffer =
 + (struct r600_resource_global *)trans-resource;
 + struct compute_memory_item *item = buffer-chunk;
 +
 + COMPUTE_DBG(rctx-screen, * r600_compute_global_transfer_unmap()\n
 + Unmaping Buffer: %u\n, item-id);
 +
 + ctx_-transfer_unmap(ctx_, trans-ptransfer);
 + item-map_count--;
 +
 + FREE(trans);
  }
  
  void r600_compute_global_transfer_flush_region(
 diff --git a/src/gallium/drivers/r600/evergreen_compute.h 
 b/src/gallium/drivers/r600/evergreen_compute.h
 index 4fb53a1..842e5e4 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.h
 +++ b/src/gallium/drivers/r600/evergreen_compute.h
 @@ -38,6 +38,11 @@ struct r600_resource_global {
   struct compute_memory_item *chunk;
  };
  
 +struct r600_transfer_global {
 + struct pipe_resource *resource;
 + struct pipe_transfer *ptransfer;

This still looks wrong. ptransfer should be the first member, and it
should not be a pointer.

-Tom

 +};
 +
  void *evergreen_create_compute_state(struct pipe_context *ctx, const struct 
 pipe_compute_state *cso);
  void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
  void evergreen_compute_upload_input(struct pipe_context *context, const uint 
 *block_layout, const uint *grid_layout, const void *input);
 -- 
 2.0.4
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/r300: Fix a link error in the tests

2014-08-12 Thread Tom Stellard
On Tue, Aug 12, 2014 at 11:14:06AM -0700, Jason Ekstrand wrote:
 The link error occurs because the static libraries are linked in the wrong
 order.  This fixes it.
 

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 Signed-off-by: Jason Ekstrand jason.ekstr...@intel.com
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82483
 ---
  src/gallium/drivers/r300/Makefile.am | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/src/gallium/drivers/r300/Makefile.am 
 b/src/gallium/drivers/r300/Makefile.am
 index ae6e8d2..e74e445 100644
 --- a/src/gallium/drivers/r300/Makefile.am
 +++ b/src/gallium/drivers/r300/Makefile.am
 @@ -18,8 +18,8 @@ check_PROGRAMS = r300_compiler_tests
  TESTS = r300_compiler_tests
  
  r300_compiler_tests_LDADD = libr300.la libr300-helper.la \
 - $(top_builddir)/src/util/libmesautil.la \
   $(top_builddir)/src/gallium/auxiliary/libgallium.la \
 + $(top_builddir)/src/util/libmesautil.la \
   $(GALLIUM_COMMON_LIB_DEPS)
  r300_compiler_tests_CPPFLAGS = \
   -I$(top_srcdir)/src/gallium/drivers/r300/compiler
 -- 
 2.0.4
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] radeonsi/compute: Call si_pm4_free_state() after emitting compute state

2014-08-08 Thread Tom Stellard
This will decrement the reference count for buffers referenced in the
command stream will prevent us from leaking them.

CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_compute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 482d475..e8fc8eb 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -374,8 +374,8 @@ static void si_launch_grid(
}
 #endif
 
-   FREE(pm4);
FREE(kernel_args);
+   si_pm4_free_state(sctx, pm4, ~0);
 }
 
 
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/7] clover: Flush the command queue in clReleaseCommandQueue()

2014-08-08 Thread Tom Stellard
This is required by the spec.

CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/state_trackers/clover/api/queue.cpp | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/queue.cpp 
b/src/gallium/state_trackers/clover/api/queue.cpp
index a136018..06a2863 100644
--- a/src/gallium/state_trackers/clover/api/queue.cpp
+++ b/src/gallium/state_trackers/clover/api/queue.cpp
@@ -58,7 +58,11 @@ clRetainCommandQueue(cl_command_queue d_q) try {
 
 CLOVER_API cl_int
 clReleaseCommandQueue(cl_command_queue d_q) try {
-   if (obj(d_q).release())
+   auto q = obj(d_q);
+
+   q.flush();
+
+   if (q.release())
   delete pobj(d_q);
 
return CL_SUCCESS;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] radeonsi/compute: Stop leaking the input buffer

2014-08-08 Thread Tom Stellard
We were leaking the input buffer used for kernel arguments and since
we were allocating it using si_upload_const_buffer() we were leaking
1 MB per kernel invocation.

CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_compute.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index dff5ddd..01aa0c6 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -48,6 +48,7 @@ struct si_pipe_compute {
struct si_pipe_shader *kernels;
unsigned num_user_sgprs;
 
+   struct r600_resource *input_buffer;
struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
 
LLVMContextRef llvm_ctx;
@@ -85,6 +86,9 @@ static void *si_create_compute_state(
LLVMDisposeModule(mod);
}
 
+   program-input_buffer = si_resource_create_custom(sctx-b.b.screen,
+   PIPE_USAGE_IMMUTABLE, program-input_size);
+
return program;
 }
 
@@ -167,7 +171,7 @@ static void si_launch_grid(
struct si_context *sctx = (struct si_context*)ctx;
struct si_pipe_compute *program = sctx-cs_shader_state.program;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-   struct r600_resource *kernel_args_buffer = NULL;
+   struct r600_resource *input_buffer = program-input_buffer;
unsigned kernel_args_size;
unsigned num_work_size_bytes = 36;
uint32_t kernel_args_offset = 0;
@@ -199,7 +203,8 @@ static void si_launch_grid(
/* The extra num_work_size_bytes are for work group / work item size 
information */
kernel_args_size = program-input_size + num_work_size_bytes + 8 /* For 
scratch va */;
 
-   kernel_args = MALLOC(kernel_args_size);
+   kernel_args = sctx-b.ws-buffer_map(input_buffer-cs_buf,
+   sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
for (i = 0; i  3; i++) {
kernel_args[i] = grid_layout[i];
kernel_args[i + 3] = grid_layout[i] * block_layout[i];
@@ -236,13 +241,13 @@ static void si_launch_grid(
kernel_args[i]);
}
 
-   si_upload_const_buffer(sctx, kernel_args_buffer, (uint8_t*)kernel_args,
-   kernel_args_size, kernel_args_offset);
-   kernel_args_va = r600_resource_va(ctx-screen,
-   (struct pipe_resource*)kernel_args_buffer);
+   sctx-b.ws-buffer_unmap(input_buffer-cs_buf);
+
+   kernel_args_va = r600_resource_va(ctx-screen, input_buffer-b.b);
kernel_args_va += kernel_args_offset;
 
-   si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_DATA);
+   si_pm4_add_bo(pm4, input_buffer, RADEON_USAGE_READ,
+   RADEON_PRIO_SHADER_DATA);
 
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, 
S_008F04_BASE_ADDRESS_HI (kernel_args_va  32) | S_008F04_STRIDE(0));
@@ -374,7 +379,6 @@ static void si_launch_grid(
}
 #endif
 
-   FREE(kernel_args);
si_pm4_free_state(sctx, pm4, ~0);
 }
 
@@ -398,6 +402,8 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
if (program-llvm_ctx){
LLVMContextDispose(program-llvm_ctx);
}
+   pipe_resource_reference(
+   (struct pipe_resource **)program-input_buffer, NULL);
 
//And then free the program itself.
FREE(program);
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] radeon/compute: Fix reported values for MAX_GLOBAL_SIZE and MAX_MEM_ALLOC_SIZE

2014-08-08 Thread Tom Stellard
There is a hard limit in older kernels of 256 MB for buffer allocations,
so report this value as MAX_MEM_ALLOC_SIZE and adjust MAX_GLOBAL_SIZE
to statisfy requirements of OpenCL.

CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 32 ---
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3476021..0886b02 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -474,13 +474,21 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
if (ret) {
uint64_t *max_global_size = ret;
-   /* XXX: This is what the proprietary driver reports, we
-* may want to use a different value. */
-   /* XXX: Not sure what to put here for SI. */
-   if (rscreen-chip_class = SI)
-   *max_global_size = 20;
-   else
-   *max_global_size = 201326592;
+   uint64_t max_mem_alloc_size;
+
+   r600_get_compute_param(screen,
+   PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+   max_mem_alloc_size);
+
+   /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
+* 1/4 of the MAX_GLOBAL_SIZE.  Since the
+* MAX_MEM_ALLOC_SIZE is fixed for older kernels,
+* make sure we never report more than
+* 4 * MAX_MEM_ALLOC_SIZE.
+*/
+   *max_global_size = MIN2(4 * max_mem_alloc_size,
+   rscreen-info.gart_size +
+   rscreen-info.vram_size);
}
return sizeof(uint64_t);
 
@@ -504,13 +512,11 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
if (ret) {
uint64_t max_global_size;
uint64_t *max_mem_alloc_size = ret;
-   r600_get_compute_param(screen, 
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, max_global_size);
-   /* OpenCL requres this value be at least
-* max(MAX_GLOBAL_SIZE / 4, 128 * 1024 *1024)
-* I'm really not sure what value to report here, but
-* MAX_GLOBAL_SIZE / 4 seems resonable.
+
+   /* XXX: The limit in older kernels is 256 MB.  We
+* should add a query here for newer kernels.
 */
-   *max_mem_alloc_size = max_global_size / 4;
+   *max_mem_alloc_size = 256 * 1024 * 1024;
}
return sizeof(uint64_t);
 
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] radeonsi/compute: Whitespace fixes

2014-08-08 Thread Tom Stellard
CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_compute.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index e8fc8eb..dff5ddd 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -48,7 +48,7 @@ struct si_pipe_compute {
struct si_pipe_shader *kernels;
unsigned num_user_sgprs;
 
-struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
+   struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
 
LLVMContextRef llvm_ctx;
 };
@@ -392,7 +392,6 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
si_pipe_shader_destroy(ctx, 
program-kernels[i]);
}
}
-   
FREE(program-kernels);
}
 
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] radeon/compute: Report a value for PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE

2014-08-08 Thread Tom Stellard
CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/r600/r600_pipe.c   | 11 ++-
 src/gallium/drivers/radeonsi/si_pipe.c |  7 +++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index a08e70e..7ace671 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -421,7 +421,16 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
/* XXX Isn't this equal to TEMPS? */
return 1; /* Max native address registers */
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
-   return R600_MAX_CONST_BUFFER_SIZE;
+   if (shader == PIPE_SHADER_COMPUTE) {
+   uint64_t max_const_buffer_size;
+   pscreen-get_compute_param(pscreen,
+   PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+   max_const_buffer_size);
+   return max_const_buffer_size;
+
+   } else {
+   return R600_MAX_CONST_BUFFER_SIZE;
+   }
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return R600_MAX_USER_CONST_BUFFERS;
case PIPE_SHADER_CAP_MAX_PREDS:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 635b37d..791838f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -327,6 +327,13 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_DOUBLES:
return 0; /* XXX: Enable doubles once the compiler can
 handle them. */
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
+   uint64_t max_const_buffer_size;
+   pscreen-get_compute_param(pscreen,
+   PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+   max_const_buffer_size);
+   return max_const_buffer_size;
+   }
default:
return 0;
}
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] radeonsi/compute: Update reference counts for buffers in si_set_global_binding()

2014-08-08 Thread Tom Stellard
CC: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_compute.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 42e4fec..482d475 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -105,7 +105,7 @@ static void si_set_global_binding(
 
if (!resources) {
for (i = first; i  first + n; i++) {
-   program-global_buffers[i] = NULL;
+   pipe_resource_reference(program-global_buffers[i], 
NULL);
}
return;
}
@@ -113,7 +113,7 @@ static void si_set_global_binding(
for (i = first; i  first + n; i++) {
uint64_t va;
uint32_t offset;
-   program-global_buffers[i] = resources[i];
+   pipe_resource_reference(program-global_buffers[i], 
resources[i]);
va = r600_resource_va(ctx-screen, resources[i]);
offset = util_le32_to_cpu(*handles[i]);
va += offset;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] radeonsi/compute: Memory usage fixes

2014-08-08 Thread Tom Stellard
Hi,

This series contains fixes for applications which allocate large amounts
of memory.

The first two patches fix the values reported for
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
and PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE so that applications don't
allocate more memory than is available.

The next five patches eliminate some GPU buffer leaks which should fix
long running applications that launch a lot of kernels.

Please Review.

Thanks,
Tom

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] cl workdim v2

2014-08-07 Thread Tom Stellard
On Thu, Aug 07, 2014 at 04:02:40PM +0300, Francisco Jerez wrote:
 Jan Vesely jan.ves...@rutgers.edu writes:
 
  This respin includes Francisco's approach of providing implicit
  in the arg vector passed from clover, and Tom's idea of appending
  implicit args after the kernel args.
 
 
 Hmmm...  Maybe it would make sense to add some sort of versioning
 (e.g. as part of the target triple) to the binary interface between
 clover and the kernel instead, so we can handle this sort of
 non-backwards compatible changes and the compiler back-end and libclc
 have some way to find out whether some specific feature is available and
 e.g. some specific extension should be enabled.
 

I was thinking the way to do this would be to use calling conventions
on the kernel functions to specify which binary interface to use.
However, I don't want to change the binary interface right now, because
it is still missing a lot of things, and I don't want to have to change
it every time we add something new.

I think we should keep the current interface of:
Offset   | Data
-|--
0: Kernel Arguments
sizeof(Kernel Inputs): work_dim
sizeof(Kernel Inputs) + 4: 
...

We can always revisit this once clover is more mature and we think
we have a binary interface that won't change. Although, personally I
prefer adding implicit inputs to the end of the kernel arguments rather
than having of them somewhere else.

-Tom

  I assumed it's not safe to modify exec.input, so the input vector is copied
  before appending work dim.
 
 
 Why wouldn't it be safe?  You just need to make sure they're appended
 before the compute state is created.
 
  Passes get-work-dim piglit on turks without any regression,
  I have not tested SI as I don't have the hw.
 
  jan
 
 
 
 
  Jan Vesely (3):
gallium: Pass input data size to launch_grid
clover: Add work dimension implicit param to input
r600,radeonsi: Copy implicit args provided by clover
 
   src/gallium/drivers/ilo/ilo_gpgpu.c   |   2 +-
   src/gallium/drivers/nouveau/nvc0/nvc0_compute.c   |   2 +-
   src/gallium/drivers/nouveau/nvc0/nvc0_context.h   |   4 +-
   src/gallium/drivers/nouveau/nvc0/nve4_compute.c   |   2 +-
   src/gallium/drivers/r600/evergreen_compute.c  |  14 +-
   src/gallium/drivers/r600/evergreen_compute.h  |   1 -
   src/gallium/drivers/radeonsi/si_compute.c |   6 +-
   src/gallium/include/pipe/p_context.h  |   2 +-
   src/gallium/state_trackers/clover/core/kernel.cpp | 162 
  --
   src/gallium/tests/trivial/compute.c   |  40 +++---
   10 files changed, 122 insertions(+), 113 deletions(-)
 
  -- 
  1.9.3



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Is the r600 llvm backend supposed to behave exactly the same as the radeonsi llvm?

2014-08-07 Thread Tom Stellard
On Fri, Aug 08, 2014 at 04:53:46AM +0300, Kertesz Laszlo wrote:
 Is the r600 llvm backend supposed to behave exactly the same as the
 radeonsi llvm?
 

Support for r600 in LLVM is experimental, but for radeonsi it should
work well.

-Tom

 
 -- 
 O zi buna,
 
 Kertesz Laszlo
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r300/compiler: recursive look for RC_OPCODE_S**

2014-08-01 Thread Tom Stellard
On Fri, Aug 01, 2014 at 03:55:56PM +0200, Marek Olšák wrote:
 From: David Heidelberger david.heidelber...@ixit.cz
 
 Get rid of error Failed to build loop info by fixing failure in cases
 like
 4:   SGE temp[2].x, temp[0]., const[0].;
 5:   CMP temp[1].x, -temp[2]., const[0]., temp[1].;
 6:   IF temp[1].;
 
 On RS690
  - fixes piglit glean do-loop with continue and break
  - changes error from Failed to build loop info -
Not a native swizzle: 0e89
r300_fragprog_emit.c::begin_tex(): Too many texture indirections
for discard statement in for loop
  - hide Failed to build loop info for
precision log2, while-loop with continue,
for-loop with continue and return 1 1 1 1 insted of 0 0 0 1
 
 Signed-off-by: David Heidelberger david.heidelber...@ixit.cz
 ---
 
 I'm sending this on behalf of David. I'll commit it soon if there is no 
 review.
 
  .../drivers/r300/compiler/radeon_emulate_loops.c   | 27 
 +-
  1 file changed, 16 insertions(+), 11 deletions(-)
 
 diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c 
 b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
 index 91ed9d2..d1fae9a 100644
 --- a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
 +++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
 @@ -376,18 +376,23 @@ static int build_loop_info(struct radeon_compiler * c, 
 struct loop_info * loop,
   loop-Brk = ptr;
   loop-If = ptr-Prev;
   loop-EndIf = ptr-Next;
 - switch(loop-If-Prev-U.I.Opcode){
 - case RC_OPCODE_SLT:
 - case RC_OPCODE_SGE:
 - case RC_OPCODE_SGT:
 - case RC_OPCODE_SLE:
 - case RC_OPCODE_SEQ:
 - case RC_OPCODE_SNE:
 - break;
 - default:
 - return 0;
 - }
 +
   loop-Cond = loop-If-Prev;
 + while (loop-Cond-U.I.Opcode != RC_OPCODE_BGNLOOP) {
 + switch(loop-Cond-U.I.Opcode){
 + case RC_OPCODE_SLT:
 + case RC_OPCODE_SGE:
 + case RC_OPCODE_SGT:
 + case RC_OPCODE_SLE:
 + case RC_OPCODE_SEQ:
 + case RC_OPCODE_SNE:
 + goto found;
 + default:

You can't just look for any S* instruction here, you need to look for
the one that is defining the register used by IF.  I know I posted a patch
that does this, but I forgot what the problem was with it.

-Tom

 + loop-Cond = loop-Cond-Prev;
 + break;
 + }
 + }
 + found:
   break;
  
   case RC_OPCODE_ENDLOOP:
 -- 
 1.9.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: Pass dimension parameter to compute shader.

2014-07-31 Thread Tom Stellard
On Wed, Jul 30, 2014 at 07:11:35PM -0400, Jan Vesely wrote:
 Make the function static.
 

No need to cc llvm-commits on these mesa patches.  Reviewers follow both
lists.

 This needs corresponding change in LLVM otherwise it breaks parameter passing
 
 CC: Tom Stellard t...@stellard.net
 CC: Matt Arsenault matthew.arsena...@amd.com
 
 Signed-off-by: Jan Vesely jan.ves...@rutgers.edu
 ---
  src/gallium/drivers/r600/evergreen_compute.c | 26 +-
  src/gallium/drivers/r600/evergreen_compute.h |  1 -
  2 files changed, 21 insertions(+), 6 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index 3928676..150bc5c 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -266,24 +266,31 @@ static void evergreen_bind_compute_state(struct 
 pipe_context *ctx_, void *state)
   * DWORDS 3-5: Number of global work items in each dimension (x,y,z)
   * DWORDS 6-8: Number of work items within each work group in each dimension
   * (x,y,z)
 - * DWORDS 9+ : Kernel parameters
 + * DWORD  9  : work dimension (needs new enough llvm)
 + * DWORDS 10+: Kernel parameters

I would prefer to add new parameters after the kernel arguments, so we
don't need to break compatibility with LLVM every time we add a new
parameter.


-Tom

   */
 -void evergreen_compute_upload_input(
 +static void evergreen_compute_upload_input(
   struct pipe_context *ctx_,
   const uint *block_layout,
   const uint *grid_layout,
 - const void *input)
 + const void *input,
 + unsigned dimensions)
  {
   struct r600_context *ctx = (struct r600_context *)ctx_;
   struct r600_pipe_compute *shader = ctx-cs_shader_state.shader;
   unsigned i;
 - /* We need to reserve 9 dwords (36 bytes) for implicit kernel
 + /* We need to reserve 10 dwords (40 bytes) for implicit kernel
* parameters.
*/
 +#if HAVE_LLVM = 0x0306
 + unsigned input_size = shader-input_size + 40;
 +#else
   unsigned input_size = shader-input_size + 36;
 +#endif
   uint32_t * num_work_groups_start;
   uint32_t * global_size_start;
   uint32_t * local_size_start;
 + uint32_t * work_dim;
   uint32_t * kernel_parameters_start;
   struct pipe_box box;
   struct pipe_transfer *transfer = NULL;
 @@ -306,7 +313,14 @@ void evergreen_compute_upload_input(
   box, transfer);
   global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
   local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
 +#if HAVE_LLVM = 0x0306
 + work_dim = local_size_start + (3 * (sizeof(uint)) / 4);
 + work_dim[0] = dimensions;
 +
 + kernel_parameters_start = work_dim + (1 * (sizeof(uint)) / 4);
 +#else
   kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
 +#endif
  
   /* Copy the work group size */
   memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint));
 @@ -319,6 +333,7 @@ void evergreen_compute_upload_input(
   /* Copy the local dimensions */
   memcpy(local_size_start, block_layout, 3 * sizeof(uint));
  
 +
   /* Copy the kernel inputs */
   memcpy(kernel_parameters_start, input, shader-input_size);
  
 @@ -584,7 +599,8 @@ static void evergreen_launch_grid(
  #endif
   shader-active_kernel = kernel;
   ctx-cs_shader_state.kernel_index = pc;
 - evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
 + evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input,
 +dimensions);
   compute_emit_cs(ctx, block_layout, grid_layout);
  }
  
 diff --git a/src/gallium/drivers/r600/evergreen_compute.h 
 b/src/gallium/drivers/r600/evergreen_compute.h
 index 4fb53a1..570ab2a 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.h
 +++ b/src/gallium/drivers/r600/evergreen_compute.h
 @@ -40,7 +40,6 @@ struct r600_resource_global {
  
  void *evergreen_create_compute_state(struct pipe_context *ctx, const struct 
 pipe_compute_state *cso);
  void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
 -void evergreen_compute_upload_input(struct pipe_context *context, const uint 
 *block_layout, const uint *grid_layout, const void *input);
  void evergreen_init_atom_start_compute_cs(struct r600_context *rctx);
  void evergreen_init_compute_state_functions(struct r600_context *rctx);
  void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_atom * 
 atom);
 -- 
 1.9.3
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600g: Pass dimension parameter to compute shader.

2014-07-31 Thread Tom Stellard
On Thu, Jul 31, 2014 at 01:28:45PM -0400, Jan Vesely wrote:
 On Thu, 2014-07-31 at 11:02 -0400, Tom Stellard wrote:
  On Wed, Jul 30, 2014 at 07:11:35PM -0400, Jan Vesely wrote:
   Make the function static.
   
  
  No need to cc llvm-commits on these mesa patches.  Reviewers follow both
  lists.
 
 sorry about that
 
  
   This needs corresponding change in LLVM otherwise it breaks parameter 
   passing
   
   CC: Tom Stellard t...@stellard.net
   CC: Matt Arsenault matthew.arsena...@amd.com
   
   Signed-off-by: Jan Vesely jan.ves...@rutgers.edu
   ---
src/gallium/drivers/r600/evergreen_compute.c | 26 
   +-
src/gallium/drivers/r600/evergreen_compute.h |  1 -
2 files changed, 21 insertions(+), 6 deletions(-)
   
   diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
   b/src/gallium/drivers/r600/evergreen_compute.c
   index 3928676..150bc5c 100644
   --- a/src/gallium/drivers/r600/evergreen_compute.c
   +++ b/src/gallium/drivers/r600/evergreen_compute.c
   @@ -266,24 +266,31 @@ static void evergreen_bind_compute_state(struct 
   pipe_context *ctx_, void *state)
 * DWORDS 3-5: Number of global work items in each dimension (x,y,z)
 * DWORDS 6-8: Number of work items within each work group in each 
   dimension
 * (x,y,z)
   - * DWORDS 9+ : Kernel parameters
   + * DWORD  9  : work dimension (needs new enough llvm)
   + * DWORDS 10+: Kernel parameters
  
  I would prefer to add new parameters after the kernel arguments, so we
  don't need to break compatibility with LLVM every time we add a new
  parameter.
 
 How would this work with multiple kernel in one module? Is it possible
 to know what kernel is the lowered code going to end up in? or did you
 have a static offset in mind?
 

I was thinking of using a static offset from the end of the kernel
arguments.  So KernelArgSize + 0 would be number of dimensions
and it would be available to all kernels.

Then the compiler could lower any use of get_dims to a load
from the input buffer at the correct offset.

-Tom

 jan
 
 
 
  
  
  -Tom
  
 */
   -void evergreen_compute_upload_input(
   +static void evergreen_compute_upload_input(
 struct pipe_context *ctx_,
 const uint *block_layout,
 const uint *grid_layout,
   - const void *input)
   + const void *input,
   + unsigned dimensions)
{
 struct r600_context *ctx = (struct r600_context *)ctx_;
 struct r600_pipe_compute *shader = ctx-cs_shader_state.shader;
 unsigned i;
   - /* We need to reserve 9 dwords (36 bytes) for implicit kernel
   + /* We need to reserve 10 dwords (40 bytes) for implicit kernel
  * parameters.
  */
   +#if HAVE_LLVM = 0x0306
   + unsigned input_size = shader-input_size + 40;
   +#else
 unsigned input_size = shader-input_size + 36;
   +#endif
 uint32_t * num_work_groups_start;
 uint32_t * global_size_start;
 uint32_t * local_size_start;
   + uint32_t * work_dim;
 uint32_t * kernel_parameters_start;
 struct pipe_box box;
 struct pipe_transfer *transfer = NULL;
   @@ -306,7 +313,14 @@ void evergreen_compute_upload_input(
 box, transfer);
 global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
 local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
   +#if HAVE_LLVM = 0x0306
   + work_dim = local_size_start + (3 * (sizeof(uint)) / 4);
   + work_dim[0] = dimensions;
   +
   + kernel_parameters_start = work_dim + (1 * (sizeof(uint)) / 4);
   +#else
 kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
   +#endif

 /* Copy the work group size */
 memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint));
   @@ -319,6 +333,7 @@ void evergreen_compute_upload_input(
 /* Copy the local dimensions */
 memcpy(local_size_start, block_layout, 3 * sizeof(uint));

   +
 /* Copy the kernel inputs */
 memcpy(kernel_parameters_start, input, shader-input_size);

   @@ -584,7 +599,8 @@ static void evergreen_launch_grid(
#endif
 shader-active_kernel = kernel;
 ctx-cs_shader_state.kernel_index = pc;
   - evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
   + evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input,
   +dimensions);
 compute_emit_cs(ctx, block_layout, grid_layout);
}

   diff --git a/src/gallium/drivers/r600/evergreen_compute.h 
   b/src/gallium/drivers/r600/evergreen_compute.h
   index 4fb53a1..570ab2a 100644
   --- a/src/gallium/drivers/r600/evergreen_compute.h
   +++ b/src/gallium/drivers/r600/evergreen_compute.h
   @@ -40,7 +40,6 @@ struct r600_resource_global {

void *evergreen_create_compute_state(struct pipe_context *ctx, const 
   struct pipe_compute_state *cso);
void evergreen_delete_compute_state(struct pipe_context *ctx, void 
   *state);
   -void evergreen_compute_upload_input(struct pipe_context *context, const 
   uint *block_layout

Re: [Mesa-dev] Clover Platform Naming

2014-07-30 Thread Tom Stellard
On Tue, Jul 29, 2014 at 09:50:23AM -0500, Aaron Watry wrote:
 Hi list,
 
 I was starting to look into getting cppamp-driver-ng working with
 mesa/clover, and I quickly ran into a question...
 
 cppamp-driver-ng explicitly lists which platforms are supported in its
 OpenCL back-end's source code. I went to add in the Mesa/Clover
 entries, and discovered that we're actually reporting ourselves as:
 CL_PLATFORM_VENDOR=Mesa
 CL_PLATFORM_NAME=Default
 

What do other implementations return for CL_PLATFORM_NAME ?

-Tom

 The way that cppamp-driver-ng is written, they don't really care about
 the vendor so much as the platform name, and Default is about as
 generic as you can get.
 
 At this point in the game, are we stuck with this platform name, or
 would we be able to change this to Clover (or something more
 appropriate)?
 
 Just wanted to ask around and see what people thought.
 
 --Aaron
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 1/2] r600g/compute: Add documentation to compute_memory_pool

2014-07-28 Thread Tom Stellard
On Sun, Jul 27, 2014 at 01:56:15PM +0200, Bruno Jiménez wrote:
 v2: Rebased on top of master

I've pushed both of these patches, thanks!

-Tom

 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 59 
 +-
  src/gallium/drivers/r600/compute_memory_pool.h | 58 -
  2 files changed, 86 insertions(+), 31 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index d53587f..928618c 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -44,7 +44,7 @@
  
  #define ITEM_ALIGNMENT 1024
  /**
 - * Creates a new pool
 + * Creates a new pool.
   */
  struct compute_memory_pool* compute_memory_pool_new(
   struct r600_screen * rscreen)
 @@ -66,6 +66,12 @@ struct compute_memory_pool* compute_memory_pool_new(
   return pool;
  }
  
 +/**
 + * Initializes the pool with a size of \a initial_size_in_dw.
 + * \param pool   The pool to be initialized.
 + * \param initial_size_in_dw The initial size.
 + * \see compute_memory_grow_defrag_pool
 + */
  static void compute_memory_pool_init(struct compute_memory_pool * pool,
   unsigned initial_size_in_dw)
  {
 @@ -79,7 +85,7 @@ static void compute_memory_pool_init(struct 
 compute_memory_pool * pool,
  }
  
  /**
 - * Frees all stuff in the pool and the pool struct itself too
 + * Frees all stuff in the pool and the pool struct itself too.
   */
  void compute_memory_pool_delete(struct compute_memory_pool* pool)
  {
 @@ -94,7 +100,9 @@ void compute_memory_pool_delete(struct 
 compute_memory_pool* pool)
  
  /**
   * Searches for an empty space in the pool, return with the pointer to the
 - * allocatable space in the pool, returns -1 on failure.
 + * allocatable space in the pool.
 + * \param size_in_dw The size of the space we are looking for.
 + * \return -1 on failure
   */
  int64_t compute_memory_prealloc_chunk(
   struct compute_memory_pool* pool,
 @@ -126,6 +134,8 @@ int64_t compute_memory_prealloc_chunk(
  
  /**
   *  Search for the chunk where we can link our new chunk after it.
 + *  \param start_in_dw   The position of the item we want to add to the 
 pool.
 + *  \return The item that is just before the passed position
   */
  struct list_head *compute_memory_postalloc_chunk(
   struct compute_memory_pool* pool,
 @@ -166,8 +176,9 @@ struct list_head *compute_memory_postalloc_chunk(
  }
  
  /**
 - * Reallocates pool, conserves data.
 - * @returns -1 if it fails, 0 otherwise
 + * Reallocates and defragments the pool, conserves data.
 + * \returns -1 if it fails, 0 otherwise
 + * \see compute_memory_finalize_pending
   */
  int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
   struct pipe_context *pipe, int new_size_in_dw)
 @@ -234,6 +245,8 @@ int compute_memory_grow_defrag_pool(struct 
 compute_memory_pool *pool,
  
  /**
   * Copy pool from device to host, or host to device.
 + * \param device_to_host 1 for device-host, 0 for host-device
 + * \see compute_memory_grow_defrag_pool
   */
  void compute_memory_shadow(struct compute_memory_pool* pool,
   struct pipe_context * pipe, int device_to_host)
 @@ -251,8 +264,10 @@ void compute_memory_shadow(struct compute_memory_pool* 
 pool,
  }
  
  /**
 - * Allocates pending allocations in the pool
 - * @returns -1 if it fails, 0 otherwise
 + * Moves all the items marked for promotion from the \a unallocated_list
 + * to the \a item_list.
 + * \return -1 if it fails, 0 otherwise
 + * \see evergreen_set_global_binding
   */
  int compute_memory_finalize_pending(struct compute_memory_pool* pool,
   struct pipe_context * pipe)
 @@ -323,6 +338,9 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
  /**
   * Defragments the pool, so that there's no gap between items.
   * \param pool   The pool to be defragmented
 + * \param srcThe origin resource
 + * \param dstThe destination resource
 + * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
   */
  void compute_memory_defrag(struct compute_memory_pool *pool,
   struct pipe_resource *src, struct pipe_resource *dst,
 @@ -348,6 +366,12 @@ void compute_memory_defrag(struct compute_memory_pool 
 *pool,
   pool-status = ~POOL_FRAGMENTED;
  }
  
 +/**
 + * Moves an item from the \a unallocated_list to the \a item_list.
 + * \param item   The item that will be promoted.
 + * \return -1 if it fails, 0 otherwise
 + * \see compute_memory_finalize_pending
 + */
  int compute_memory_promote_item(struct compute_memory_pool *pool,
   struct compute_memory_item *item, struct pipe_context *pipe,
   int64_t start_in_dw)
 @@ -390,6 +414,11 @@ int compute_memory_promote_item(struct 
 compute_memory_pool *pool,
   return 0;
  }
  
 +/**
 + * Moves an item from the \a item_list to the \a unallocated_list.
 + * \param item   The item that 

Re: [Mesa-dev] [PATCH 3/3] r600g/compute: Defrag the pool at the same time as we grow it

2014-07-25 Thread Tom Stellard
On Sat, Jul 19, 2014 at 07:35:51PM +0200, Bruno Jiménez wrote:
 This allows us two things: we now need less item copies when we have
 to defrag+grow the pool (to just one copy per item) and, even in the
 case where we don't need to defrag the pool, we reduce the data copied
 to just the useful data that the items use.
 
 Note: The fallback path is a bit ugly now, but hopefully we won't need
 it much.

Hi,

I pushed the first two patches, but I couldn't get this one to apply.
Could you send an updated version rebased on master?

Thanks,
Tom

 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 40 
 --
  src/gallium/drivers/r600/compute_memory_pool.h |  2 +-
  2 files changed, 19 insertions(+), 23 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index ca36240..32f5892 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -169,10 +169,12 @@ struct list_head *compute_memory_postalloc_chunk(
   * Reallocates pool, conserves data.
   * @returns -1 if it fails, 0 otherwise
   */
 -int compute_memory_grow_pool(struct compute_memory_pool* pool,
 - struct pipe_context * pipe, int new_size_in_dw)
 +int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
 + struct pipe_context *pipe, int new_size_in_dw)
  {
 - COMPUTE_DBG(pool-screen, * compute_memory_grow_pool() 
 + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
 +
 + COMPUTE_DBG(pool-screen, * compute_memory_grow_defrag_pool() 
   new_size_in_dw = %d (%d bytes)\n,
   new_size_in_dw, new_size_in_dw * 4);
  
 @@ -183,27 +185,17 @@ int compute_memory_grow_pool(struct 
 compute_memory_pool* pool,
   } else {
   struct r600_resource *temp = NULL;
  
 - new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
 -
 - COMPUTE_DBG(pool-screen,   Aligned size = %d (%d bytes)\n,
 - new_size_in_dw, new_size_in_dw * 4);
 -
   temp = (struct r600_resource *)r600_compute_buffer_alloc_vram(
   pool-screen, 
 new_size_in_dw * 4);
  
   if (temp != NULL) {
 - struct r600_context *rctx = (struct r600_context *)pipe;
   struct pipe_resource *src = (struct pipe_resource 
 *)pool-bo;
   struct pipe_resource *dst = (struct pipe_resource 
 *)temp;
 - struct pipe_box box;
  
 - COMPUTE_DBG(pool-screen,   Growing the pool using a 
 temporary resource\n);
 + COMPUTE_DBG(pool-screen,   Growing and defragmenting 
 the pool 
 + using a temporary resource\n);
  
 - u_box_1d(0, pool-size_in_dw * 4, box);
 -
 - rctx-b.b.resource_copy_region(pipe,
 - dst, 0, 0, 0 ,0,
 - src, 0, box);
 + compute_memory_defrag(pool, src, dst, pipe);
  
   pool-screen-b.b.resource_destroy(
   (struct pipe_screen *)pool-screen,
 @@ -229,6 +221,11 @@ int compute_memory_grow_pool(struct compute_memory_pool* 
 pool,
   pool-screen,
   pool-size_in_dw * 4);
   compute_memory_shadow(pool, pipe, 0);
 +
 + if (pool-status  POOL_FRAGMENTED) {
 + struct pipe_resource *src = (struct 
 pipe_resource *)pool-bo;
 + compute_memory_defrag(pool, src, src, pipe);
 + }
   }
   }
  
 @@ -292,16 +289,15 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   return 0;
   }
  
 - if (pool-status  POOL_FRAGMENTED) {
 - struct pipe_resource *src = (struct pipe_resource *)pool-bo;
 - compute_memory_defrag(pool, src, src, pipe);
 - }
 -
   if (pool-size_in_dw  allocated + unallocated) {
 - err = compute_memory_grow_pool(pool, pipe, allocated + 
 unallocated);
 + err = compute_memory_grow_defrag_pool(pool, pipe, allocated + 
 unallocated);
   if (err == -1)
   return -1;
   }
 + else if (pool-status  POOL_FRAGMENTED) {
 + struct pipe_resource *src = (struct pipe_resource *)pool-bo;
 + compute_memory_defrag(pool, src, src, pipe);
 + }
  
   /* After defragmenting the pool, allocated is equal to the first 
 available
* position for new items in the pool */
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.h 
 b/src/gallium/drivers/r600/compute_memory_pool.h
 index 5f1d72b..c7eb237 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.h
 +++ 

Re: [Mesa-dev] [PATCH 1/3] r600g/compute: Fix singed/unsigned comparison compiler warnings.

2014-07-25 Thread Tom Stellard
On Fri, Jul 25, 2014 at 10:33:42AM -0400, Jan Vesely wrote:
 The iteration variables go from 0 anyway.
 
 Signed-off-by: Jan Vesely jan.ves...@rutgers.edu

Thanks, I pushed patch #1.

-Tom
 ---
 
 A collection of fixes for gcc warnings I ran across.
 
  src/gallium/drivers/r600/evergreen_compute.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index 12e9c85..1970414 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -100,7 +100,7 @@ struct r600_resource* r600_compute_buffer_alloc_vram(
  
  static void evergreen_set_rat(
   struct r600_pipe_compute *pipe,
 - int id,
 + unsigned id,
   struct r600_resource* bo,
   int start,
   int size)
 @@ -276,7 +276,7 @@ void evergreen_compute_upload_input(
  {
   struct r600_context *ctx = (struct r600_context *)ctx_;
   struct r600_pipe_compute *shader = ctx-cs_shader_state.shader;
 - int i;
 + unsigned i;
   /* We need to reserve 9 dwords (36 bytes) for implicit kernel
* parameters.
*/
 @@ -405,7 +405,7 @@ static void compute_emit_cs(struct r600_context *ctx, 
 const uint *block_layout,
   const uint *grid_layout)
  {
   struct radeon_winsys_cs *cs = ctx-b.rings.gfx.cs;
 - int i;
 + unsigned i;
  
   /* make sure that the gfx ring is only one active */
   if (ctx-b.rings.dma.cs  ctx-b.rings.dma.cs-cdw) {
 @@ -598,7 +598,7 @@ static void evergreen_set_compute_resources(struct 
 pipe_context * ctx_,
   COMPUTE_DBG(ctx-screen, *** evergreen_set_compute_resources: start = 
 %u count = %u\n,
   start, count);
  
 - for (int i = 0; i  count; i++) {
 + for (unsigned i = 0; i  count; i++) {
   /* The First two vertex buffers are reserved for parameters and
* global buffers. */
   unsigned vtx_id = 2 + i;
 @@ -629,7 +629,7 @@ void evergreen_set_cs_sampler_view(struct pipe_context 
 *ctx_,
   struct r600_pipe_sampler_view **resource =
   (struct r600_pipe_sampler_view **)views;
  
 - for (int i = 0; i  count; i++) {
 + for (unsigned i = 0; i  count; i++){
   if (resource[i]) {
   assert(i+1  12);
   /* XXX: Implement */
 @@ -661,7 +661,7 @@ static void evergreen_set_global_binding(
  
   /* We mark these items for promotion to the pool if they
* aren't already there */
 - for (int i = 0; i  n; i++) {
 + for (unsigned i = 0; i  n; i++) {
   struct compute_memory_item *item = buffers[i]-chunk;
  
   if (!is_item_in_pool(item))
 @@ -673,7 +673,7 @@ static void evergreen_set_global_binding(
   return;
   }
  
 - for (int i = 0; i  n; i++)
 + for (unsigned i = 0; i  n; i++)
   {
   uint32_t buffer_offset;
   uint32_t handle;
 -- 
 1.9.3
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] r600g/compute: Defrag the pool at the same time as we grow it

2014-07-25 Thread Tom Stellard
On Fri, Jul 25, 2014 at 11:28:19PM +0200, Bruno Jimenez wrote:
 On Fri, 2014-07-25 at 12:46 -0400, Tom Stellard wrote:
  On Sat, Jul 19, 2014 at 07:35:51PM +0200, Bruno Jiménez wrote:
   This allows us two things: we now need less item copies when we have
   to defrag+grow the pool (to just one copy per item) and, even in the
   case where we don't need to defrag the pool, we reduce the data copied
   to just the useful data that the items use.
   
   Note: The fallback path is a bit ugly now, but hopefully we won't need
   it much.
  
  Hi,
  
  I pushed the first two patches, but I couldn't get this one to apply.
  Could you send an updated version rebased on master?
 
 Hi,
 
 This patch needs this other patch first:
 http://lists.freedesktop.org/archives/mesa-dev/2014-July/062923.html
 
 As I mentioned it in the cover letter and you reviewed it when I sent it
 I thought that you would push it too. Although I forgot to say that it
 wasn't pushed, sorry.

Sorry, I missed that.  I've pushed both patches.

-Tom

 
 I can squash them and send it if needed.
 
 Sorry for any inconvenience.
 Bruno
 
  
  Thanks,
  Tom
  
   ---
src/gallium/drivers/r600/compute_memory_pool.c | 40 
   --
src/gallium/drivers/r600/compute_memory_pool.h |  2 +-
2 files changed, 19 insertions(+), 23 deletions(-)
   
   diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
   b/src/gallium/drivers/r600/compute_memory_pool.c
   index ca36240..32f5892 100644
   --- a/src/gallium/drivers/r600/compute_memory_pool.c
   +++ b/src/gallium/drivers/r600/compute_memory_pool.c
   @@ -169,10 +169,12 @@ struct list_head *compute_memory_postalloc_chunk(
 * Reallocates pool, conserves data.
 * @returns -1 if it fails, 0 otherwise
 */
   -int compute_memory_grow_pool(struct compute_memory_pool* pool,
   - struct pipe_context * pipe, int new_size_in_dw)
   +int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
   + struct pipe_context *pipe, int new_size_in_dw)
{
   - COMPUTE_DBG(pool-screen, * compute_memory_grow_pool() 
   + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
   +
   + COMPUTE_DBG(pool-screen, * compute_memory_grow_defrag_pool() 
 new_size_in_dw = %d (%d bytes)\n,
 new_size_in_dw, new_size_in_dw * 4);

   @@ -183,27 +185,17 @@ int compute_memory_grow_pool(struct 
   compute_memory_pool* pool,
 } else {
 struct r600_resource *temp = NULL;

   - new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
   -
   - COMPUTE_DBG(pool-screen,   Aligned size = %d (%d bytes)\n,
   - new_size_in_dw, new_size_in_dw * 4);
   -
 temp = (struct r600_resource *)r600_compute_buffer_alloc_vram(
 pool-screen, 
   new_size_in_dw * 4);

 if (temp != NULL) {
   - struct r600_context *rctx = (struct r600_context *)pipe;
 struct pipe_resource *src = (struct pipe_resource 
   *)pool-bo;
 struct pipe_resource *dst = (struct pipe_resource 
   *)temp;
   - struct pipe_box box;

   - COMPUTE_DBG(pool-screen,   Growing the pool using a 
   temporary resource\n);
   + COMPUTE_DBG(pool-screen,   Growing and defragmenting 
   the pool 
   + using a temporary resource\n);

   - u_box_1d(0, pool-size_in_dw * 4, box);
   -
   - rctx-b.b.resource_copy_region(pipe,
   - dst, 0, 0, 0 ,0,
   - src, 0, box);
   + compute_memory_defrag(pool, src, dst, pipe);

 pool-screen-b.b.resource_destroy(
 (struct pipe_screen *)pool-screen,
   @@ -229,6 +221,11 @@ int compute_memory_grow_pool(struct 
   compute_memory_pool* pool,
 pool-screen,
 pool-size_in_dw * 4);
 compute_memory_shadow(pool, pipe, 0);
   +
   + if (pool-status  POOL_FRAGMENTED) {
   + struct pipe_resource *src = (struct 
   pipe_resource *)pool-bo;
   + compute_memory_defrag(pool, src, src, pipe);
   + }
 }
 }

   @@ -292,16 +289,15 @@ int compute_memory_finalize_pending(struct 
   compute_memory_pool* pool,
 return 0;
 }

   - if (pool-status  POOL_FRAGMENTED) {
   - struct pipe_resource *src = (struct pipe_resource *)pool-bo;
   - compute_memory_defrag(pool, src, src, pipe);
   - }
   -
 if (pool-size_in_dw  allocated + unallocated) {
   - err = compute_memory_grow_pool(pool, pipe, allocated + 
   unallocated);
   + err = compute_memory_grow_defrag_pool(pool, pipe, allocated + 
   unallocated);
 if (err == -1

[Mesa-dev] [PATCH] clover: Add checks for image support to the image functions v2

2014-07-25 Thread Tom Stellard
Most image functions are required to return a CL_INVALID_OPERATION
error when used on devices without image support.

v2:
  - Simplified the code
---
 src/gallium/state_trackers/clover/api/memory.cpp   | 6 ++
 src/gallium/state_trackers/clover/api/sampler.cpp  | 3 +++
 src/gallium/state_trackers/clover/api/transfer.cpp | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/src/gallium/state_trackers/clover/api/memory.cpp 
b/src/gallium/state_trackers/clover/api/memory.cpp
index d26b1c6..a094e74 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -106,6 +106,9 @@ clCreateImage2D(cl_context d_ctx, cl_mem_flags flags,
 void *host_ptr, cl_int *r_errcode) try {
auto ctx = obj(d_ctx);
 
+   if (!any_of(std::mem_fn(device::image_support), ctx.devices()))
+  throw error(CL_INVALID_OPERATION);
+
if (flags  ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
  CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
  CL_MEM_COPY_HOST_PTR))
@@ -141,6 +144,9 @@ clCreateImage3D(cl_context d_ctx, cl_mem_flags flags,
 void *host_ptr, cl_int *r_errcode) try {
auto ctx = obj(d_ctx);
 
+   if (!any_of(std::mem_fn(device::image_support), ctx.devices()))
+  throw error(CL_INVALID_OPERATION);
+
if (flags  ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
  CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
  CL_MEM_COPY_HOST_PTR))
diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp 
b/src/gallium/state_trackers/clover/api/sampler.cpp
index 403892b..482e55a 100644
--- a/src/gallium/state_trackers/clover/api/sampler.cpp
+++ b/src/gallium/state_trackers/clover/api/sampler.cpp
@@ -31,6 +31,9 @@ clCreateSampler(cl_context d_ctx, cl_bool norm_mode,
 cl_int *r_errcode) try {
auto ctx = obj(d_ctx);
 
+   if (!any_of(std::mem_fn(device::image_support), ctx.devices()))
+  throw error(CL_INVALID_OPERATION);
+
ret_error(r_errcode, CL_SUCCESS);
return new sampler(ctx, norm_mode, addr_mode, filter_mode);
 
diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
b/src/gallium/state_trackers/clover/api/transfer.cpp
index 404ceb0..07d8a73 100644
--- a/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -101,6 +101,9 @@ namespace {
const vector_t orig, const vector_t region) {
   vector_t size = { img.width(), img.height(), img.depth() };
 
+  if (!q.device().image_support())
+ throw error(CL_INVALID_OPERATION);
+
   if (img.context() != q.context())
  throw error(CL_INVALID_CONTEXT);
 
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium: Add PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE

2014-07-24 Thread Tom Stellard
On Thu, Jul 24, 2014 at 01:09:49PM +0200, Marek Olšák wrote:
 Isn't this redundant with get_shader_param(PIPE_SHADER_COMPUTE,
 PIPE_SHADER_CAP_MAX_CONSTS) * 16?
 

This is what clover was using, but I was confused about what the value
was supposed to represent.  Now, I think I understand (number of 4 x 32-bit
constants).  I can use this instead.

-Tom
 Marek
 
 On Thu, Jul 24, 2014 at 3:05 AM, Tom Stellard thomas.stell...@amd.com wrote:
  ---
   src/gallium/docs/source/screen.rst   | 2 ++
   src/gallium/include/pipe/p_defines.h | 3 ++-
   2 files changed, 4 insertions(+), 1 deletion(-)
 
  diff --git a/src/gallium/docs/source/screen.rst 
  b/src/gallium/docs/source/screen.rst
  index 830a1a5..219c9f9 100644
  --- a/src/gallium/docs/source/screen.rst
  +++ b/src/gallium/docs/source/screen.rst
  @@ -334,6 +334,8 @@ pipe_screen::get_compute_param.
 Value type: ``uint32_t``
   * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
 non-zero means yes, zero means no. Value type: ``uint32_t``
  +* ``PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE``: The maximum size in bytes
  +  of a constant buffer.  Value type: ``uint64_t``
 
   .. _pipe_bind:
 
  diff --git a/src/gallium/include/pipe/p_defines.h 
  b/src/gallium/include/pipe/p_defines.h
  index 43bb1f5..78709b9 100644
  --- a/src/gallium/include/pipe/p_defines.h
  +++ b/src/gallium/include/pipe/p_defines.h
  @@ -651,7 +651,8 @@ enum pipe_compute_cap
  PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
  PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
  PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
  -   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
  +   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
  +   PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE
   };
 
   /**
  --
  1.8.1.5
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Use hardware sqrt instruction

2014-07-23 Thread Tom Stellard
On Fri, Jul 18, 2014 at 12:35:24PM -0400, Alex Deucher wrote:
 On Fri, Jul 18, 2014 at 3:54 AM, Glenn Kennard glenn.kenn...@gmail.com 
 wrote:
  Piglit quick tests including sqrt pass, no other regressions,
  tested on radeon 6670.
  ---
  Should be slightly more precise than the invsqrt/recip/mul combination
  used previously, I reckon up to about 2 bits of mantissa, and saves
  two instructions per sqrt emitted.
 
  It would be good if someone could test this on Cayman since it uses
  a slightly different codepath.
 
 Reviewed-by: Alex Deucher alexander.deuc...@amd.com
 

I've pushed this patch, thanks!

-Tom

 
   src/gallium/drivers/r600/r600_pipe.c   | 2 +-
   src/gallium/drivers/r600/r600_shader.c | 9 +++--
   2 files changed, 4 insertions(+), 7 deletions(-)
 
  diff --git a/src/gallium/drivers/r600/r600_pipe.c 
  b/src/gallium/drivers/r600/r600_pipe.c
  index 5bf9c00..ee6a416 100644
  --- a/src/gallium/drivers/r600/r600_pipe.c
  +++ b/src/gallium/drivers/r600/r600_pipe.c
  @@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* 
  pscreen, unsigned shader, e
  case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
  return 1;
  case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
  -   return 0;
  +   return 1;
  case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
  case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
  case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
  diff --git a/src/gallium/drivers/r600/r600_shader.c 
  b/src/gallium/drivers/r600/r600_shader.c
  index db928f3..907547d 100644
  --- a/src/gallium/drivers/r600/r600_shader.c
  +++ b/src/gallium/drivers/r600/r600_shader.c
  @@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction 
  r600_shader_tgsi_instruction[] = {
  {TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
  {TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
  {TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
  -   /* gap */
  -   {20,0, ALU_OP0_NOP, tgsi_unsupported},
  +   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
  tgsi_trans_srcx_replicate},
  {TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
  /* gap */
  {22,0, ALU_OP0_NOP, tgsi_unsupported},
  @@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction 
  eg_shader_tgsi_instruction[] = {
  {TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
  {TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
  {TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
  -   /* gap */
  -   {20,0, ALU_OP0_NOP, tgsi_unsupported},
  +   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
  tgsi_trans_srcx_replicate},
  {TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
  /* gap */
  {22,0, ALU_OP0_NOP, tgsi_unsupported},
  @@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction 
  cm_shader_tgsi_instruction[] = {
  {TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
  {TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
  {TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
  -   /* gap */
  -   {20,0, ALU_OP0_NOP, tgsi_unsupported},
  +   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
  cayman_emit_float_instr},
  {TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
  /* gap */
  {22,0, ALU_OP0_NOP, tgsi_unsupported},
  --
  1.8.3.2
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] [RFC] r600g/compute: Adding support for defragmenting compute_memory_pool

2014-07-23 Thread Tom Stellard
On Fri, Jul 18, 2014 at 01:09:03PM +0200, Bruno Jimenez wrote:
 On Thu, 2014-07-17 at 22:56 -0400, Tom Stellard wrote:
  On Wed, Jul 16, 2014 at 11:12:42PM +0200, Bruno Jiménez wrote:
   Hi,
   
   This series finally adds support for defragmenting the pool for
   OpenCL buffers in the r600g driver. It is mostly a rewritten of
   the series that I wrote some months ago.
   
   For defragmenting the pool I have thought of two different
   possibilities:
   
   - Creating a new pool and moving every item here in the correct
   position. This has the advantage of being very simple to
   implement and that it allows the pool to be grown at the
   same time. But it has a couple of problems, namely that it
   has a high memory peak usage (sum of current pool + new pool)
   and that in the case of having a pool not very fragmented you
   have to copy every item to its new place.
   - Using the same pool by moving the items in it. This has the
   advantage of using less memory (sum of current pool + biggest
   item in it) and that it is easier to handle the case of
   only having few elements out of place. The disadvantages
   are that it doesn't allow growing the pool at the same time
   and that it may involve twice the number of item-copies in 
   the worst case.
   
   I have chosen to implement the second option, but if you think
   that it is better the first one I can rewrite the series for it.
   (^_^)
   
   The worst case I have mentioned is this: Imagine that you have
   a series of items in which the first is, at least, 1 'unit'
   smaller than the rest. You now free this item and create a new
   one with the same size [why would anyone do this? I don't know]
   For now, the defragmenter code is so dumb that it will move
   every item to the front of the pool without trying first to
   put this new item in the available space.
   
   Hopefully situations like this won't be very common.
   
   If you want me to explain any detail about any of the patches
   just ask. And as said, if you prefer the first version of the
   defragmenter, just ask. [In fact, after having written this,
   I may add it for the case grow+defrag]
   
   Also, no regressions found in piglit.
   
   Thanks in advance!
   Bruno
   
   Bruno Jiménez (5):
 r600g/compute: Add a function for moving items in the pool
 r600g/compute: Add a function for defragmenting the pool
 r600g/compute: Defrag the pool if it's necesary
 r600g/compute: Quick exit if there's nothing to add to the pool
 r600g/compute: Remove unneeded code from compute_memory_promote_item
   
src/gallium/drivers/r600/compute_memory_pool.c | 196 
   ++---
src/gallium/drivers/r600/compute_memory_pool.h |  13 +-
2 files changed, 156 insertions(+), 53 deletions(-)
  
  Hi,
  
  A took a brief look at these patches and they look pretty good.  I will
  look at them again tomorrow and then commit if I don't see any issues.
 

I've pushed these patches, thanks!

-Tom

 Hi,
 
 Thanks, if you have any doubt about any of the patches just ask.
 
 I have just ended writing a follow up series for doing grow + defrag at
 the same time. I still have to test it, but if no problems arise I'll
 send it to the list as soon as possible.
 
 This new series is based on the patch that I sent here:
 http://lists.freedesktop.org/archives/mesa-dev/2014-July/062923.html 
 If you think it's good, could you push it to master?
 
 Thanks in advance!
 Bruno
 
  -Tom
  
   
   -- 
   2.0.1
   
   ___
   mesa-dev mailing list
   mesa-dev@lists.freedesktop.org
   http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders

2014-07-23 Thread Tom Stellard
On Tue, Jul 22, 2014 at 12:36:33AM +0200, Grigori Goronzy wrote:
 On 17.07.2014 21:24, Tom Stellard wrote:
  On Thu, Jul 17, 2014 at 06:44:25PM +0200, Grigori Goronzy wrote:
  Accuracy of some operations was recently improved in the R600 backend,
  at the cost of slower code. This is required for compute shaders,
  but not for graphics shaders. Add unsafe-fp-math hint to make LLVM
  generate faster but possibly less accurate code.
 
  Piglit didn't indicate any regressions.
  
  Both patches are:
  Reviewed-by: Tom Stellard thomas.stell...@amd.com
 
 
 Can you please commit the patches for me? My account request is still
 pending.
 

I just pushed these, thanks!

-Tom

 Grigori
 
  ---
   src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 +
   1 file changed, 5 insertions(+)
 
  diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
  b/src/gallium/drivers/radeon/radeon_llvm_emit.c
  index 1b17dd4..171ccaa 100644
  --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
  +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
  @@ -26,6 +26,7 @@
   #include radeon_llvm_emit.h
   #include radeon_elf_util.h
   #include util/u_memory.h
  +#include pipe/p_shader_tokens.h
   
   #include llvm-c/Target.h
   #include llvm-c/TargetMachine.h
  @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned 
  type)
 sprintf(Str, %1d, type);
   
 LLVMAddTargetDependentFunctionAttr(F, ShaderType, Str);
  +
  +  if (type != TGSI_PROCESSOR_COMPUTE) {
  +LLVMAddTargetDependentFunctionAttr(F, unsafe-fp-math, true);
  +  }
   }
   
   static void init_r600_target() {
  -- 
  1.8.3.2
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
  
 
 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] r600g/compute: Allow compute_memory_move_item to move items between resources

2014-07-23 Thread Tom Stellard
On Sat, Jul 19, 2014 at 07:35:49PM +0200, Bruno Jiménez wrote:
 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 43 
 ++
  src/gallium/drivers/r600/compute_memory_pool.h |  1 +
  2 files changed, 25 insertions(+), 19 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index 254c1d7..1ad77ad 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -331,6 +331,7 @@ void compute_memory_defrag(struct compute_memory_pool 
 *pool,
   struct pipe_context *pipe)
  {
   struct compute_memory_item *item;
 + struct pipe_resource *src = (struct pipe_resource *)pool-bo;
   int64_t last_pos;
  
   COMPUTE_DBG(pool-screen, * compute_memory_defrag()\n);
 @@ -340,7 +341,8 @@ void compute_memory_defrag(struct compute_memory_pool 
 *pool,
   if (item-start_in_dw != last_pos) {
   assert(last_pos  item-start_in_dw);
  
 - compute_memory_move_item(pool, item, last_pos, pipe);
 + compute_memory_move_item(pool, src, src,
 + item, last_pos, pipe);
   }
  
   last_pos += align(item-size_in_dw, ITEM_ALIGNMENT);
 @@ -431,7 +433,8 @@ void compute_memory_demote_item(struct 
 compute_memory_pool *pool,
  }
  
  /**
 - * Moves the item \a item forward in the pool to \a new_start_in_dw
 + * Moves the item \a item forward from the resource \a src to the
 + * resource \a dst at \a new_start_in_dw
   *
   * This function assumes two things:
   * 1) The item is \b only moved forward
 @@ -442,13 +445,14 @@ void compute_memory_demote_item(struct 
 compute_memory_pool *pool,
   * \see compute_memory_defrag
   */
  void compute_memory_move_item(struct compute_memory_pool *pool,
 + struct pipe_resource *src, struct pipe_resource *dst,
   struct compute_memory_item *item, uint64_t new_start_in_dw,
   struct pipe_context *pipe)
  {
   struct pipe_screen *screen = (struct pipe_screen *)pool-screen;
   struct r600_context *rctx = (struct r600_context *)pipe;
 - struct pipe_resource *src = (struct pipe_resource *)pool-bo;
 - struct pipe_resource *dst;
 + struct pipe_resource *src_ = src;
 + struct pipe_resource *dst_;

I think it is confusing to have variables named _src and src.  Could you
rename one of them to something more descriptive.

   struct pipe_box box;
  
   struct compute_memory_item *prev;
 @@ -465,34 +469,35 @@ void compute_memory_move_item(struct 
 compute_memory_pool *pool,
  
   u_box_1d(item-start_in_dw * 4, item-size_in_dw * 4, box);
  
 - /* If the ranges don't overlap, we can just copy the item directly */
 - if (new_start_in_dw + item-size_in_dw = item-start_in_dw) {
 - dst = (struct pipe_resource *)pool-bo;
 + /* If the ranges don't overlap, or we are copying from one resource
 +  * to another, we can just copy the item directly */
 + if (src != dst || new_start_in_dw + item-size_in_dw = 
 item-start_in_dw) {
 + dst_ = dst;
  
   rctx-b.b.resource_copy_region(pipe,
 - dst, 0, new_start_in_dw * 4, 0, 0,
 - src, 0, box);
 + dst_, 0, new_start_in_dw * 4, 0, 0,
 + src_, 0, box);
   } else {
   /* The ranges overlap, we will try first to use an intermediate
* resource to move the item */
 - dst = (struct pipe_resource *)r600_compute_buffer_alloc_vram(
 + dst_ = (struct pipe_resource *)r600_compute_buffer_alloc_vram(
   pool-screen, item-size_in_dw * 4);
  
 - if (dst != NULL) {
 + if (dst_ != NULL) {
   rctx-b.b.resource_copy_region(pipe,
 - dst, 0, 0, 0, 0,
 - src, 0, box);
 + dst_, 0, 0, 0, 0,
 + src_, 0, box);
  
 - src = dst;
 - dst = (struct pipe_resource *)pool-bo;
 + src_ = dst_;
 + dst_ = dst;
  
   box.x = 0;
  
   rctx-b.b.resource_copy_region(pipe,
 - dst, 0, new_start_in_dw * 4, 0, 0,
 - src, 0, box);
 + dst_, 0, new_start_in_dw * 4, 0, 0,
 + src_, 0, box);
  
 - pool-screen-b.b.resource_destroy(screen, src);
 + pool-screen-b.b.resource_destroy(screen, src_);
  
   } else {
   /* The allocation of the temporary resource failed,
 @@ -505,7 +510,7 @@ void compute_memory_move_item(struct compute_memory_pool 
 *pool,
  
   u_box_1d(new_start_in_dw * 4, (offset + 
 

Re: [Mesa-dev] [PATCH 2/3] r600g/compute: Allow compute_memory_defrag to defragment between resources

2014-07-23 Thread Tom Stellard
On Sat, Jul 19, 2014 at 07:35:50PM +0200, Bruno Jiménez wrote:
 This will be used in the following patch to avoid duplicated code
 ---

Reviewed-by: Tom Stellard thomas.stell...@amd.com

  src/gallium/drivers/r600/compute_memory_pool.c | 11 ++-
  src/gallium/drivers/r600/compute_memory_pool.h |  1 +
  2 files changed, 7 insertions(+), 5 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index 1ad77ad..ca36240 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -293,7 +293,8 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   }
  
   if (pool-status  POOL_FRAGMENTED) {
 - compute_memory_defrag(pool, pipe);
 + struct pipe_resource *src = (struct pipe_resource *)pool-bo;
 + compute_memory_defrag(pool, src, src, pipe);
   }
  
   if (pool-size_in_dw  allocated + unallocated) {
 @@ -328,20 +329,20 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   * \param pool   The pool to be defragmented
   */
  void compute_memory_defrag(struct compute_memory_pool *pool,
 + struct pipe_resource *src, struct pipe_resource *dst,
   struct pipe_context *pipe)
  {
   struct compute_memory_item *item;
 - struct pipe_resource *src = (struct pipe_resource *)pool-bo;
   int64_t last_pos;
  
   COMPUTE_DBG(pool-screen, * compute_memory_defrag()\n);
  
   last_pos = 0;
   LIST_FOR_EACH_ENTRY(item, pool-item_list, link) {
 - if (item-start_in_dw != last_pos) {
 - assert(last_pos  item-start_in_dw);
 + if (src != dst || item-start_in_dw != last_pos) {
 + assert(last_pos = item-start_in_dw);
  
 - compute_memory_move_item(pool, src, src,
 + compute_memory_move_item(pool, src, dst,
   item, last_pos, pipe);
   }
  
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.h 
 b/src/gallium/drivers/r600/compute_memory_pool.h
 index 822bfbe..5f1d72b 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.h
 +++ b/src/gallium/drivers/r600/compute_memory_pool.h
 @@ -91,6 +91,7 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   struct pipe_context * pipe);
  
  void compute_memory_defrag(struct compute_memory_pool *pool,
 + struct pipe_resource *src, struct pipe_resource *dst,
   struct pipe_context *pipe);
  
  int compute_memory_promote_item(struct compute_memory_pool *pool,
 -- 
 2.0.2
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] r600g/compute: Defrag the pool at the same time as we grow it

2014-07-23 Thread Tom Stellard
On Sat, Jul 19, 2014 at 07:35:51PM +0200, Bruno Jiménez wrote:
 This allows us two things: we now need less item copies when we have
 to defrag+grow the pool (to just one copy per item) and, even in the
 case where we don't need to defrag the pool, we reduce the data copied
 to just the useful data that the items use.
 
 Note: The fallback path is a bit ugly now, but hopefully we won't need
 it much.

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 40 
 --
  src/gallium/drivers/r600/compute_memory_pool.h |  2 +-
  2 files changed, 19 insertions(+), 23 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index ca36240..32f5892 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -169,10 +169,12 @@ struct list_head *compute_memory_postalloc_chunk(
   * Reallocates pool, conserves data.
   * @returns -1 if it fails, 0 otherwise
   */
 -int compute_memory_grow_pool(struct compute_memory_pool* pool,
 - struct pipe_context * pipe, int new_size_in_dw)
 +int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
 + struct pipe_context *pipe, int new_size_in_dw)
  {
 - COMPUTE_DBG(pool-screen, * compute_memory_grow_pool() 
 + new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
 +
 + COMPUTE_DBG(pool-screen, * compute_memory_grow_defrag_pool() 
   new_size_in_dw = %d (%d bytes)\n,
   new_size_in_dw, new_size_in_dw * 4);
  
 @@ -183,27 +185,17 @@ int compute_memory_grow_pool(struct 
 compute_memory_pool* pool,
   } else {
   struct r600_resource *temp = NULL;
  
 - new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
 -
 - COMPUTE_DBG(pool-screen,   Aligned size = %d (%d bytes)\n,
 - new_size_in_dw, new_size_in_dw * 4);
 -
   temp = (struct r600_resource *)r600_compute_buffer_alloc_vram(
   pool-screen, 
 new_size_in_dw * 4);
  
   if (temp != NULL) {
 - struct r600_context *rctx = (struct r600_context *)pipe;
   struct pipe_resource *src = (struct pipe_resource 
 *)pool-bo;
   struct pipe_resource *dst = (struct pipe_resource 
 *)temp;
 - struct pipe_box box;
  
 - COMPUTE_DBG(pool-screen,   Growing the pool using a 
 temporary resource\n);
 + COMPUTE_DBG(pool-screen,   Growing and defragmenting 
 the pool 
 + using a temporary resource\n);
  
 - u_box_1d(0, pool-size_in_dw * 4, box);
 -
 - rctx-b.b.resource_copy_region(pipe,
 - dst, 0, 0, 0 ,0,
 - src, 0, box);
 + compute_memory_defrag(pool, src, dst, pipe);
  
   pool-screen-b.b.resource_destroy(
   (struct pipe_screen *)pool-screen,
 @@ -229,6 +221,11 @@ int compute_memory_grow_pool(struct compute_memory_pool* 
 pool,
   pool-screen,
   pool-size_in_dw * 4);
   compute_memory_shadow(pool, pipe, 0);
 +
 + if (pool-status  POOL_FRAGMENTED) {
 + struct pipe_resource *src = (struct 
 pipe_resource *)pool-bo;
 + compute_memory_defrag(pool, src, src, pipe);
 + }
   }
   }
  
 @@ -292,16 +289,15 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   return 0;
   }
  
 - if (pool-status  POOL_FRAGMENTED) {
 - struct pipe_resource *src = (struct pipe_resource *)pool-bo;
 - compute_memory_defrag(pool, src, src, pipe);
 - }
 -
   if (pool-size_in_dw  allocated + unallocated) {
 - err = compute_memory_grow_pool(pool, pipe, allocated + 
 unallocated);
 + err = compute_memory_grow_defrag_pool(pool, pipe, allocated + 
 unallocated);
   if (err == -1)
   return -1;
   }
 + else if (pool-status  POOL_FRAGMENTED) {
 + struct pipe_resource *src = (struct pipe_resource *)pool-bo;
 + compute_memory_defrag(pool, src, src, pipe);
 + }
  
   /* After defragmenting the pool, allocated is equal to the first 
 available
* position for new items in the pool */
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.h 
 b/src/gallium/drivers/r600/compute_memory_pool.h
 index 5f1d72b..c7eb237 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.h
 +++ b/src/gallium/drivers/r600/compute_memory_pool.h
 @@ -81,7 +81,7 @@ int64_t compute_memory_prealloc_chunk

[Mesa-dev] [PATCH 3/3] clover: Add checks for image support to the image functions

2014-07-23 Thread Tom Stellard
Most image functions are required to return a CL_INVALID_OPERATION
error when used on devices without image support.
---
 src/gallium/state_trackers/clover/api/memory.cpp   |  6 ++
 src/gallium/state_trackers/clover/api/sampler.cpp  |  3 +++
 src/gallium/state_trackers/clover/api/transfer.cpp | 17 +
 src/gallium/state_trackers/clover/core/context.cpp |  9 +
 src/gallium/state_trackers/clover/core/context.hpp |  2 ++
 5 files changed, 37 insertions(+)

diff --git a/src/gallium/state_trackers/clover/api/memory.cpp 
b/src/gallium/state_trackers/clover/api/memory.cpp
index d26b1c6..77f8b96 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -106,6 +106,9 @@ clCreateImage2D(cl_context d_ctx, cl_mem_flags flags,
 void *host_ptr, cl_int *r_errcode) try {
auto ctx = obj(d_ctx);
 
+   if (!ctx.image_support())
+  throw error(CL_INVALID_OPERATION);
+
if (flags  ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
  CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
  CL_MEM_COPY_HOST_PTR))
@@ -141,6 +144,9 @@ clCreateImage3D(cl_context d_ctx, cl_mem_flags flags,
 void *host_ptr, cl_int *r_errcode) try {
auto ctx = obj(d_ctx);
 
+   if (!ctx.image_support())
+  throw error(CL_INVALID_OPERATION);
+
if (flags  ~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
  CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
  CL_MEM_COPY_HOST_PTR))
diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp 
b/src/gallium/state_trackers/clover/api/sampler.cpp
index 403892b..7f2e04d 100644
--- a/src/gallium/state_trackers/clover/api/sampler.cpp
+++ b/src/gallium/state_trackers/clover/api/sampler.cpp
@@ -31,6 +31,9 @@ clCreateSampler(cl_context d_ctx, cl_bool norm_mode,
 cl_int *r_errcode) try {
auto ctx = obj(d_ctx);
 
+   if (!ctx.image_support())
+  throw error(CL_INVALID_OPERATION);
+
ret_error(r_errcode, CL_SUCCESS);
return new sampler(ctx, norm_mode, addr_mode, filter_mode);
 
diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
b/src/gallium/state_trackers/clover/api/transfer.cpp
index 404ceb0..da12d2b 100644
--- a/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -457,6 +457,8 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
auto src_origin = vector(p_origin);
auto src_pitch = pitch(region, {{ img.pixel_size(),
  img.row_pitch(), img.slice_pitch() }});
+   if (!q.device().image_support())
+  throw error(CL_INVALID_OPERATION);
 
validate_common(q, deps);
validate_object(q, ptr, {}, dst_pitch, region);
@@ -491,6 +493,9 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
auto src_pitch = pitch(region, {{ img.pixel_size(),
  row_pitch, slice_pitch }});
 
+   if (!q.device().image_support())
+  throw error(CL_INVALID_OPERATION);
+
validate_common(q, deps);
validate_object(q, img, dst_origin, region);
validate_object(q, ptr, {}, src_pitch, region);
@@ -522,6 +527,9 @@ clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, 
cl_mem d_dst_mem,
auto dst_origin = vector(p_dst_origin);
auto src_origin = vector(p_src_origin);
 
+   if (!q.device().image_support())
+  throw error(CL_INVALID_OPERATION);
+
validate_common(q, deps);
validate_object(q, dst_img, dst_origin, region);
validate_object(q, src_img, src_origin, region);
@@ -559,6 +567,9 @@ clEnqueueCopyImageToBuffer(cl_command_queue d_q,
  src_img.row_pitch(),
  src_img.slice_pitch() }});
 
+   if (!q.device().image_support())
+  throw error(CL_INVALID_OPERATION);
+
validate_common(q, deps);
validate_object(q, dst_mem, dst_origin, dst_pitch, region);
validate_object(q, src_img, src_origin, region);
@@ -595,6 +606,9 @@ clEnqueueCopyBufferToImage(cl_command_queue d_q,
vector_t src_origin = { src_offset };
auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
 
+   if (!q.device().image_support())
+  throw error(CL_INVALID_OPERATION);
+
validate_common(q, deps);
validate_object(q, dst_img, dst_origin, region);
validate_object(q, src_mem, src_origin, src_pitch, region);
@@ -651,6 +665,9 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, 
cl_bool blocking,
auto region = vector(p_region);
auto origin = vector(p_origin);
 
+   if (!q.device().image_support())
+  throw error(CL_INVALID_OPERATION);
+
validate_common(q, deps);
validate_object(q, img, origin, region);
 
diff --git a/src/gallium/state_trackers/clover/core/context.cpp 
b/src/gallium/state_trackers/clover/core/context.cpp
index bf4df39..722c97d 100644
--- 

[Mesa-dev] [PATCH 1/3] gallium: Add PIPE_CAP_COMPUTE_IMAGES_SUPPORTED

2014-07-23 Thread Tom Stellard
---
 src/gallium/docs/source/screen.rst| 2 ++
 src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++
 src/gallium/include/pipe/p_defines.h  | 3 ++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index ba583fe..830a1a5 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -332,6 +332,8 @@ pipe_screen::get_compute_param.
   clock in MHz. Value type: ``uint32_t``
 * ``PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS``: Maximum number of compute units
   Value type: ``uint32_t``
+* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
+  non-zero means yes, zero means no. Value type: ``uint32_t``
 
 .. _pipe_bind:
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 6535992..bf0585d 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -519,6 +519,13 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
*max_compute_units = 
MAX2(rscreen-info.max_compute_units, 1);
}
return sizeof(uint32_t);
+
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+   if (ret) {
+   uint32_t *images_supported = ret;
+   *images_supported = 0;
+   }
+   return sizeof(uint32_t);
}
 
 fprintf(stderr, unknown PIPE_COMPUTE_CAP %d\n, param);
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index d9b6e5a..43bb1f5 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -650,7 +650,8 @@ enum pipe_compute_cap
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
-   PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS
+   PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
+   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
 };
 
 /**
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] clover: Query the device to see if images are supported

2014-07-23 Thread Tom Stellard
---
 src/gallium/state_trackers/clover/api/device.cpp  | 2 +-
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 97b2cf9..e825468 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -184,7 +184,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_IMAGE_SUPPORT:
-  buf.as_scalarcl_bool() = CL_TRUE;
+  buf.as_scalarcl_bool() = dev.image_support();
   break;
 
case CL_DEVICE_MAX_PARAMETER_SIZE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index b6078db..63aa193 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -169,6 +169,12 @@ device::max_compute_units() const {
   PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
 }
 
+bool
+device::image_support() const {
+   return get_compute_paramuint32_t(pipe,
+  PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 731c31e..2201700 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -63,6 +63,7 @@ namespace clover {
   cl_ulong max_mem_alloc_size() const;
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
+  bool image_support() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] gallium: Add PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE

2014-07-23 Thread Tom Stellard
---
 src/gallium/docs/source/screen.rst   | 2 ++
 src/gallium/include/pipe/p_defines.h | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 830a1a5..219c9f9 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -334,6 +334,8 @@ pipe_screen::get_compute_param.
   Value type: ``uint32_t``
 * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
   non-zero means yes, zero means no. Value type: ``uint32_t``
+* ``PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE``: The maximum size in bytes
+  of a constant buffer.  Value type: ``uint64_t``
 
 .. _pipe_bind:
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 43bb1f5..78709b9 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -651,7 +651,8 @@ enum pipe_compute_cap
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
-   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
+   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
+   PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE
 };
 
 /**
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeon/compute: Return a value for PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE

2014-07-23 Thread Tom Stellard
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index bf0585d..2ea8f3d 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -492,6 +492,7 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
}
return sizeof(uint64_t);
 
+   case PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE:
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
if (ret) {
uint64_t max_global_size;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] clover: Use correct query for CL_MAX_CONSTANT_BUFFER_SIZE

2014-07-23 Thread Tom Stellard
---
 src/gallium/state_trackers/clover/core/device.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 63aa193..ada5267 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -135,8 +135,8 @@ device::max_mem_input() const {
 
 cl_ulong
 device::max_const_buffer_size() const {
-   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
- PIPE_SHADER_CAP_MAX_CONSTS) * 16;
+   return get_compute_paramuint64_t(pipe,
+  
PIPE_COMPUTE_CAP_MAX_CONSTANT_BUFFER_SIZE)[0];
 }
 
 cl_uint
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi/compute: Bump number of user sgprs for LLVM 3.5

2014-07-18 Thread Tom Stellard
---
 src/gallium/drivers/radeonsi/si_compute.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 3a9f00f..a7d61e7 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -32,6 +32,11 @@
 #include radeon_llvm_util.h
 
 #define MAX_GLOBAL_BUFFERS 20
+#if HAVE_LLVM  0x0305
+#define NUM_USER_SGPRS 2
+#else
+#define NUM_USER_SGPRS 4
+#endif
 
 struct si_pipe_compute {
struct si_context *ctx;
@@ -132,7 +137,7 @@ static void si_launch_grid(
uint32_t *kernel_args;
uint64_t kernel_args_va;
uint64_t shader_va;
-   unsigned arg_user_sgpr_count = 2;
+   unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_pipe_shader *shader = program-kernels[pc];
unsigned lds_blocks;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] winsys/radeon: Query the kernel for the number of SEs and SHs per SE

2014-07-18 Thread Tom Stellard
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 576fea5..7cda70a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -389,6 +389,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 radeon_get_drm_value(ws-fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL,
  ws-info.max_compute_units);
 
+radeon_get_drm_value(ws-fd, RADEON_INFO_MAX_SE, NULL,
+ ws-info.max_se);
+
+radeon_get_drm_value(ws-fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
+ ws-info.max_sh_per_se);
+
 if (radeon_get_drm_value(ws-fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL,
  ws-info.si_tile_mode_array)) {
 ws-info.si_tile_mode_array_valid = TRUE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h 
b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 6df1987..a63a50b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -199,6 +199,8 @@ struct radeon_info {
 uint32_tvram_size;
 uint32_tmax_sclk;
 uint32_tmax_compute_units;
+uint32_tmax_se;
+uint32_tmax_sh_per_se;
 
 uint32_tdrm_major; /* version */
 uint32_tdrm_minor;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeonsi/compute: Add support scratch buffer support

2014-07-18 Thread Tom Stellard
The scratch buffer will be used for private memory and also register
spilling.
---
 src/gallium/drivers/radeonsi/si_compute.c | 85 ++-
 src/gallium/drivers/radeonsi/si_shader.c  |  5 ++
 src/gallium/drivers/radeonsi/si_shader.h  |  2 +
 3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index a7d61e7..d6cbbf4 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -122,6 +122,43 @@ static void si_set_global_binding(
}
 }
 
+/**
+ * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES
+ * /p block_layout is the number of threads in each work group.
+ * /p grid layout is the number of work groups.
+ */
+static unsigned compute_num_waves_for_scratch(
+   const struct radeon_info *info;
+   const uint *block_layout,
+   const uint *grid_layout)
+{
+   unsigned num_sh = MAX2(info-max_sh_per_se, 1);
+   unsigned num_se = MAX2(info-max_se, 1);
+   unsigned num_blocks = 1;
+   unsigned threads_per_block = 1;
+   unsigned waves_per_block;
+   unsigned waves_per_sh;
+   unsigned waves;
+   unsigned scratch_waves;
+   unsigned i;
+
+   for (i = 0; i  3; i++) {
+   threads_per_block *= block_layout[i];
+   num_blocks *= grid_layout[i];
+   }
+
+   waves_per_block = align(threads_per_block, 64) / 64;
+   waves = waves_per_block * num_blocks;
+   waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se);
+   scratch_waves = waves_per_sh * num_sh * num_se;
+
+   if (waves_per_block  waves_per_sh) {
+   scratch_waves = waves_per_block * num_sh * num_se;
+   }
+
+   return scratch_waves;
+}
+
 static void si_launch_grid(
struct pipe_context *ctx,
const uint *block_layout, const uint *grid_layout,
@@ -134,13 +171,16 @@ static void si_launch_grid(
unsigned kernel_args_size;
unsigned num_work_size_bytes = 36;
uint32_t kernel_args_offset = 0;
+   uint32_t scratch_offset = 0;
uint32_t *kernel_args;
uint64_t kernel_args_va;
+   uint64_t scratch_buffer_va = 0;
uint64_t shader_va;
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_pipe_shader *shader = program-kernels[pc];
unsigned lds_blocks;
+   unsigned num_waves_for_scratch;
 
pm4-compute_pkt = true;
si_cmd_context_control(pm4);
@@ -158,7 +198,9 @@ static void si_launch_grid(
/* Upload the kernel arguments */
 
/* The extra num_work_size_bytes are for work group / work item size 
information */
-   kernel_args_size = program-input_size + num_work_size_bytes;
+   kernel_args_size = program-input_size + num_work_size_bytes + 8 /* For 
scratch va */;
+   scratch_offset = program-input_size + num_work_size_bytes;
+
kernel_args = MALLOC(kernel_args_size);
for (i = 0; i  3; i++) {
kernel_args[i] = grid_layout[i];
@@ -166,8 +208,34 @@ static void si_launch_grid(
kernel_args[i + 6] = block_layout[i];
}
 
+   num_waves_for_scratch = compute_num_waves_for_scratch(
+   stcx-screen.info, block_layout, grid_layout);
+
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program-input_size);
 
+   if (shader-scratch_bytes_per_wave  0) {
+   float *ptr;
+
+   COMPUTE_DBG(sctx-screen, Waves: %u; Scratch per wave: %u 
bytes; 
+   Total Scratch: %u bytes\n, num_waves_for_scratch,
+   shader-scratch_bytes_per_wave, info.width0);
+   if (!shader-scratch_bo) {
+   shader-scratch_bo = (struct r600_resource*)
+   si_resource_create_custom(sctx-b.b.screen,
+   PIPE_USAGE_DEFAULT, info.width0);
+   }
+   ptr = sctx-b.ws-buffer_map(shader-scratch_bo-cs_buf, 
sctx-b.rings.gfx.cs,
+   PIPE_TRANSFER_READ);
+   scratch_buffer_va = r600_resource_va(ctx-screen,
+   (struct pipe_resource*)shader-scratch_bo);
+   si_pm4_add_bo(pm4, shader-scratch_bo,
+   RADEON_USAGE_READWRITE,
+   RADEON_PRIO_SHADER_RESOURCE_RW);
+
+   }
+   memcpy(kernel_args + (scratch_offset / 4), scratch_buffer_va,
+  sizeof(scratch_buffer_va));
+
for (i = 0; i  (kernel_args_size / 4); i++) {
COMPUTE_DBG(sctx-screen, input %u : %u\n, i,
kernel_args[i]);
@@ -183,6 +251,10 @@ static void si_launch_grid(
 
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, 

[Mesa-dev] [PATCH 3/3] radeonsi: Read rodata from ELF and append it to the end of shaders

2014-07-18 Thread Tom Stellard
The is used for programs that have arrays of constants that
are accessed using dynamic indices.  The shader will compute
the base address of the constants and then access them using
SMRD instructions.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  5 +
 src/gallium/drivers/radeon/radeon_elf_util.c  |  5 +
 src/gallium/drivers/radeonsi/si_shader.c  | 16 +---
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d82adf5..8f1a0a5 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -108,6 +108,11 @@ struct radeon_shader_binary {
unsigned char *config;
unsigned config_size;
 
+   /** Constant data accessed by the shader.  This will be uploaded
+* into a constant buffer. */
+   unsigned char *rodata;
+   unsigned rodata_size;
+
/** Set to 1 if the disassembly for this binary has been dumped to
 *  stderr. */
int disassembled;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c 
b/src/gallium/drivers/radeon/radeon_elf_util.c
index 7d92962..7c5f93e 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -80,6 +80,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
fprintf(stderr, \nShader Disassembly:\n\n);
fprintf(stderr, %.*s\n, (int)section_data-d_size,
  (char *)section_data-d_buf);
+   } else if (!strncmp(name, .rodata, 7)) {
+   section_data = elf_getdata(section, section_data);
+   binary-rodata_size = section_data-d_size;
+   binary-rodata = MALLOC(binary-rodata_size * 
sizeof(unsigned char));
+   memcpy(binary-rodata, section_data-d_buf, 
binary-rodata_size);
}
}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8593bca..641e563 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2499,11 +2499,12 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
 {
unsigned r; /* llvm_compile result */
unsigned i;
-   uint32_t *ptr;
+   unsigned char *ptr;
struct radeon_shader_binary binary;
bool dump = r600_can_dump_shader(sctx-screen-b,
shader-selector ? shader-selector-tokens : NULL);
const char * gpu_family = 
r600_get_llvm_processor_name(sctx-screen-b.family);
+   unsigned code_size;
 
/* Use LLVM to compile shader */
memset(binary, 0, sizeof(binary));
@@ -2551,19 +2552,28 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
}
 
/* copy new shader */
+   code_size = binary.code_size + binary.rodata_size;
r600_resource_reference(shader-bo, NULL);
shader-bo = si_resource_create_custom(sctx-b.b.screen, 
PIPE_USAGE_IMMUTABLE,
-  binary.code_size);
+  code_size);
if (shader-bo == NULL) {
return -ENOMEM;
}
 
-   ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, 
sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+   ptr = sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs,
+   PIPE_TRANSFER_WRITE);
util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
+   /* Copy read only data if any. */
+   if (binary.rodata_size  0) {
+   ptr += binary.code_size;
+   util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
+   }
+
sctx-b.ws-buffer_unmap(shader-bo-cs_buf);
 
free(binary.code);
free(binary.config);
+   free(binary.rodata);
 
return r;
 }
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] util: Add util_memcpy_cpu_to_le32() v2

2014-07-18 Thread Tom Stellard
v2:
  - Preserve word boundaries.
---
 src/gallium/auxiliary/util/u_math.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index b9ed197..5de181a 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -812,6 +812,23 @@ util_bswap16(uint16_t n)
   (n  8);
 }
 
+static INLINE void*
+util_memcpy_cpu_to_le32(void *dest, void *src, size_t n)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   size_t i, e;
+   asset(n % 4 == 0);
+
+   for (i = 0, e = n / 4; i  e; i++) {
+   uint32_t *d = (uint32_t*)dest;
+   uint32_t *s = (uint32_t*)src;
+   d[i] = util_bswap32(s[i]);
+   }
+   return dest;
+#else
+   return memcpy(dest, src, n);
+#endif
+}
 
 /**
  * Clamp X to [MIN, MAX].
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi: Use util_memcpy_cpu_to_le32()

2014-07-18 Thread Tom Stellard
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 4 +---
 src/gallium/drivers/radeonsi/si_shader.c  | 8 +---
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 38ad077..41c1b67 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -451,9 +451,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct 
r600_resource **rbuf
return;
}
 
-   for (i = 0; i  size / 4; ++i) {
-   tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]);
-   }
+   util_memcpy_cpu_to_le32(tmpPtr, ptr, size);
 
u_upload_data(sctx-b.uploader, 0, size, tmpPtr, const_offset,
(struct pipe_resource**)rbuffer);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f0650f4..8593bca 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
}
 
ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, 
sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
-   if (SI_BIG_ENDIAN) {
-   for (i = 0; i  binary.code_size / 4; ++i) {
-   ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + 
i*4)));
-   }
-   } else {
-   memcpy(ptr, binary.code, binary.code_size);
-   }
+   util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
sctx-b.ws-buffer_unmap(shader-bo-cs_buf);
 
free(binary.code);
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi/compute: Share COMPUTE_DBG macro with r600g

2014-07-18 Thread Tom Stellard
---
 src/gallium/drivers/r600/evergreen_compute.h  | 13 -
 src/gallium/drivers/radeon/r600_pipe_common.h |  5 +
 src/gallium/drivers/radeonsi/si_compute.c |  5 +
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.h 
b/src/gallium/drivers/r600/evergreen_compute.h
index e760790..4fb53a1 100644
--- a/src/gallium/drivers/r600/evergreen_compute.h
+++ b/src/gallium/drivers/r600/evergreen_compute.h
@@ -59,17 +59,4 @@ void r600_compute_global_transfer_flush_region( struct 
pipe_context *, struct pi
 void r600_compute_global_transfer_inline_write( struct pipe_context *, struct 
pipe_resource *, unsigned level,
 unsigned usage, const struct 
pipe_box *, const void *data, unsigned stride, unsigned layer_stride);
 
-
-static inline void COMPUTE_DBG(struct r600_screen *rscreen, const char *fmt, 
...)
-{
-   if (!(rscreen-b.debug_flags  DBG_COMPUTE)) {
-   return;
-   }
-
-   va_list ap;
-   va_start(ap, fmt);
-   _debug_vprintf(fmt, ap);
-   va_end(ap);
-}
-
 #endif
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 8f1a0a5..cee9622 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -515,6 +515,11 @@ static inline unsigned r600_tex_aniso_filter(unsigned 
filter)
 /* else */return 4;
 }
 
+#define COMPUTE_DBG(rscreen, fmt, args...) \
+   do { \
+   if ((rscreen-b.debug_flags  DBG_COMPUTE)) fprintf(stderr, 
fmt, ##args); \
+   } while (0);
+
 #define R600_ERR(fmt, args...) \
fprintf(stderr, EE %s:%d %s - fmt, __FILE__, __LINE__, __func__, 
##args)
 
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index c0637f6..3a9f00f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -163,6 +163,11 @@ static void si_launch_grid(
 
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program-input_size);
 
+   for (i = 0; i  (kernel_args_size / 4); i++) {
+   COMPUTE_DBG(sctx-screen, input %u : %u\n, i,
+   kernel_args[i]);
+   }
+
si_upload_const_buffer(sctx, kernel_args_buffer, (uint8_t*)kernel_args,
kernel_args_size, kernel_args_offset);
kernel_args_va = r600_resource_va(ctx-screen,
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: Add util_memcpy_cpu_to_le32() v3

2014-07-18 Thread Tom Stellard
v2:
  - Preserve word boundaries.

v3:
  - Use const and restrict.
  - Fix indentation.
---
 src/gallium/auxiliary/util/u_math.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index b9ed197..f6dcb22 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -812,6 +812,23 @@ util_bswap16(uint16_t n)
   (n  8);
 }
 
+static INLINE void*
+util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, 
size_t n)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   size_t i, e;
+   asset(n % 4 == 0);
+
+   for (i = 0, e = n / 4; i  e; i++) {
+  uint32_t * restrict d = (uint32_t* restrict)dest;
+  const uint32_t * restrict s = (const uint32_t* restrict)src;
+  d[i] = util_bswap32(s[i]);
+   }
+   return dest;
+#else
+   return memcpy(dest, src, n);
+#endif
+}
 
 /**
  * Clamp X to [MIN, MAX].
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi/compute: Add support scratch buffer support v2

2014-07-18 Thread Tom Stellard
The scratch buffer will be used for private memory and also register
spilling.

v2:
  - Code cleanups
---

I had some uncommitted changes left in my tree when I generated v1 of this 
patch.

 src/gallium/drivers/radeonsi/si_compute.c | 80 ++-
 src/gallium/drivers/radeonsi/si_shader.c  |  5 ++
 src/gallium/drivers/radeonsi/si_shader.h  |  2 +
 3 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index a7d61e7..42e4fec 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -122,6 +122,43 @@ static void si_set_global_binding(
}
 }
 
+/**
+ * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES
+ * /p block_layout is the number of threads in each work group.
+ * /p grid layout is the number of work groups.
+ */
+static unsigned compute_num_waves_for_scratch(
+   const struct radeon_info *info,
+   const uint *block_layout,
+   const uint *grid_layout)
+{
+   unsigned num_sh = MAX2(info-max_sh_per_se, 1);
+   unsigned num_se = MAX2(info-max_se, 1);
+   unsigned num_blocks = 1;
+   unsigned threads_per_block = 1;
+   unsigned waves_per_block;
+   unsigned waves_per_sh;
+   unsigned waves;
+   unsigned scratch_waves;
+   unsigned i;
+
+   for (i = 0; i  3; i++) {
+   threads_per_block *= block_layout[i];
+   num_blocks *= grid_layout[i];
+   }
+
+   waves_per_block = align(threads_per_block, 64) / 64;
+   waves = waves_per_block * num_blocks;
+   waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se);
+   scratch_waves = waves_per_sh * num_sh * num_se;
+
+   if (waves_per_block  waves_per_sh) {
+   scratch_waves = waves_per_block * num_sh * num_se;
+   }
+
+   return scratch_waves;
+}
+
 static void si_launch_grid(
struct pipe_context *ctx,
const uint *block_layout, const uint *grid_layout,
@@ -136,11 +173,13 @@ static void si_launch_grid(
uint32_t kernel_args_offset = 0;
uint32_t *kernel_args;
uint64_t kernel_args_va;
+   uint64_t scratch_buffer_va = 0;
uint64_t shader_va;
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_pipe_shader *shader = program-kernels[pc];
unsigned lds_blocks;
+   unsigned num_waves_for_scratch;
 
pm4-compute_pkt = true;
si_cmd_context_control(pm4);
@@ -158,7 +197,8 @@ static void si_launch_grid(
/* Upload the kernel arguments */
 
/* The extra num_work_size_bytes are for work group / work item size 
information */
-   kernel_args_size = program-input_size + num_work_size_bytes;
+   kernel_args_size = program-input_size + num_work_size_bytes + 8 /* For 
scratch va */;
+
kernel_args = MALLOC(kernel_args_size);
for (i = 0; i  3; i++) {
kernel_args[i] = grid_layout[i];
@@ -166,8 +206,31 @@ static void si_launch_grid(
kernel_args[i + 6] = block_layout[i];
}
 
+   num_waves_for_scratch = compute_num_waves_for_scratch(
+   sctx-screen-b.info, block_layout, grid_layout);
+
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program-input_size);
 
+   if (shader-scratch_bytes_per_wave  0) {
+   unsigned scratch_bytes = shader-scratch_bytes_per_wave *
+   num_waves_for_scratch;
+
+   COMPUTE_DBG(sctx-screen, Waves: %u; Scratch per wave: %u 
bytes; 
+   Total Scratch: %u bytes\n, num_waves_for_scratch,
+   shader-scratch_bytes_per_wave, scratch_bytes);
+   if (!shader-scratch_bo) {
+   shader-scratch_bo = (struct r600_resource*)
+   si_resource_create_custom(sctx-b.b.screen,
+   PIPE_USAGE_DEFAULT, scratch_bytes);
+   }
+   scratch_buffer_va = r600_resource_va(ctx-screen,
+   (struct pipe_resource*)shader-scratch_bo);
+   si_pm4_add_bo(pm4, shader-scratch_bo,
+   RADEON_USAGE_READWRITE,
+   RADEON_PRIO_SHADER_RESOURCE_RW);
+
+   }
+
for (i = 0; i  (kernel_args_size / 4); i++) {
COMPUTE_DBG(sctx-screen, input %u : %u\n, i,
kernel_args[i]);
@@ -183,6 +246,10 @@ static void si_launch_grid(
 
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, 
S_008F04_BASE_ADDRESS_HI (kernel_args_va  32) | S_008F04_STRIDE(0));
+   si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, 
scratch_buffer_va);
+   si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 

Re: [Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders

2014-07-17 Thread Tom Stellard
On Thu, Jul 17, 2014 at 06:44:25PM +0200, Grigori Goronzy wrote:
 Accuracy of some operations was recently improved in the R600 backend,
 at the cost of slower code. This is required for compute shaders,
 but not for graphics shaders. Add unsafe-fp-math hint to make LLVM
 generate faster but possibly less accurate code.
 
 Piglit didn't indicate any regressions.

Both patches are:
Reviewed-by: Tom Stellard thomas.stell...@amd.com

 ---
  src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 +
  1 file changed, 5 insertions(+)
 
 diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
 b/src/gallium/drivers/radeon/radeon_llvm_emit.c
 index 1b17dd4..171ccaa 100644
 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
 +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
 @@ -26,6 +26,7 @@
  #include radeon_llvm_emit.h
  #include radeon_elf_util.h
  #include util/u_memory.h
 +#include pipe/p_shader_tokens.h
  
  #include llvm-c/Target.h
  #include llvm-c/TargetMachine.h
 @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
sprintf(Str, %1d, type);
  
LLVMAddTargetDependentFunctionAttr(F, ShaderType, Str);
 +
 +  if (type != TGSI_PROCESSOR_COMPUTE) {
 +LLVMAddTargetDependentFunctionAttr(F, unsafe-fp-math, true);
 +  }
  }
  
  static void init_r600_target() {
 -- 
 1.8.3.2
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] [RFC] r600g/compute: Adding support for defragmenting compute_memory_pool

2014-07-17 Thread Tom Stellard
On Wed, Jul 16, 2014 at 11:12:42PM +0200, Bruno Jiménez wrote:
 Hi,
 
 This series finally adds support for defragmenting the pool for
 OpenCL buffers in the r600g driver. It is mostly a rewritten of
 the series that I wrote some months ago.
 
 For defragmenting the pool I have thought of two different
 possibilities:
 
 - Creating a new pool and moving every item here in the correct
 position. This has the advantage of being very simple to
 implement and that it allows the pool to be grown at the
 same time. But it has a couple of problems, namely that it
 has a high memory peak usage (sum of current pool + new pool)
 and that in the case of having a pool not very fragmented you
 have to copy every item to its new place.
 - Using the same pool by moving the items in it. This has the
 advantage of using less memory (sum of current pool + biggest
 item in it) and that it is easier to handle the case of
 only having few elements out of place. The disadvantages
 are that it doesn't allow growing the pool at the same time
 and that it may involve twice the number of item-copies in 
 the worst case.
 
 I have chosen to implement the second option, but if you think
 that it is better the first one I can rewrite the series for it.
 (^_^)
 
 The worst case I have mentioned is this: Imagine that you have
 a series of items in which the first is, at least, 1 'unit'
 smaller than the rest. You now free this item and create a new
 one with the same size [why would anyone do this? I don't know]
 For now, the defragmenter code is so dumb that it will move
 every item to the front of the pool without trying first to
 put this new item in the available space.
 
 Hopefully situations like this won't be very common.
 
 If you want me to explain any detail about any of the patches
 just ask. And as said, if you prefer the first version of the
 defragmenter, just ask. [In fact, after having written this,
 I may add it for the case grow+defrag]
 
 Also, no regressions found in piglit.
 
 Thanks in advance!
 Bruno
 
 Bruno Jiménez (5):
   r600g/compute: Add a function for moving items in the pool
   r600g/compute: Add a function for defragmenting the pool
   r600g/compute: Defrag the pool if it's necesary
   r600g/compute: Quick exit if there's nothing to add to the pool
   r600g/compute: Remove unneeded code from compute_memory_promote_item
 
  src/gallium/drivers/r600/compute_memory_pool.c | 196 
 ++---
  src/gallium/drivers/r600/compute_memory_pool.h |  13 +-
  2 files changed, 156 insertions(+), 53 deletions(-)

Hi,

A took a brief look at these patches and they look pretty good.  I will
look at them again tomorrow and then commit if I don't see any issues.

-Tom

 
 -- 
 2.0.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] clover: Call end_query before getting timestamp result

2014-07-16 Thread Tom Stellard
Also change the wait parameter from false to true.
---

I'm really not sure what is correct here, but this patch fixes event profiling 
on SI.

 src/gallium/state_trackers/clover/core/timestamp.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/core/timestamp.cpp 
b/src/gallium/state_trackers/clover/core/timestamp.cpp
index 481c4f9..a6edaf6 100644
--- a/src/gallium/state_trackers/clover/core/timestamp.cpp
+++ b/src/gallium/state_trackers/clover/core/timestamp.cpp
@@ -47,7 +47,8 @@ cl_ulong
 timestamp::query::operator()() const {
pipe_query_result result;
 
-   if (!q().pipe-get_query_result(q().pipe, _query, false, result))
+   q().pipe-end_query(q().pipe, _query);
+   if (!q().pipe-get_query_result(q().pipe, _query, true, result))
   throw error(CL_PROFILING_INFO_NOT_AVAILABLE);
 
return result.u64;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] clover: Use 1 as default value for CL_DEVICE_PROFILING_TIMER_RESOLUTION

2014-07-16 Thread Tom Stellard
We use PIPE_QUERY_TIMESTAMP for profiling events, and gallium specifies
that the timestamp be in nanoseconds.
---
 src/gallium/state_trackers/clover/api/device.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 1176668..25d29f5 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -249,7 +249,9 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
-  buf.as_scalarsize_t() = 0;
+  // PIPE_QUERY_TIMESTAMP returns a timestamp in units of nanoseconds,
+  // so we default to 1 here.
+  buf.as_scalarsize_t() = 1;
   break;
 
case CL_DEVICE_ENDIAN_LITTLE:
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] clover: Call end_query before getting timestamp result v2

2014-07-16 Thread Tom Stellard
v2:
  - Move the end_query() call into the timestamp constructor.
  - Still pass false as the wait parameter to get_query_result().
---
 src/gallium/state_trackers/clover/core/timestamp.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/clover/core/timestamp.cpp 
b/src/gallium/state_trackers/clover/core/timestamp.cpp
index 481c4f9..3fd341f 100644
--- a/src/gallium/state_trackers/clover/core/timestamp.cpp
+++ b/src/gallium/state_trackers/clover/core/timestamp.cpp
@@ -30,6 +30,7 @@ using namespace clover;
 timestamp::query::query(command_queue q) :
q(q),
_query(q.pipe-create_query(q.pipe, PIPE_QUERY_TIMESTAMP, 0)) {
+   q.pipe-end_query(q.pipe, _query);
 }
 
 timestamp::query::query(query other) :
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] util: Add util_memcpy_cpu_to_le()

2014-07-15 Thread Tom Stellard
---
 src/gallium/auxiliary/util/u_math.h  | 22 ++
 src/gallium/drivers/radeonsi/si_shader.c |  8 +---
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index b9ed197..cd3cf04 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -812,6 +812,28 @@ util_bswap16(uint16_t n)
   (n  8);
 }
 
+static INLINE void*
+util_memcpy_cpu_to_le(void *dest, void *src, size_t n)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   size_t i, e;
+   for (i = 0, e = n % 8; i  e; i++) {
+   char *d = (char*)dest;
+   char *s = (char*)src;
+   d[i] = s[e - i - 1];
+   }
+   dest += i;
+   n -= i;
+   for (i = 0, e = n / 8; i  e; i++) {
+   uint64_t *d = (uint64_t*)dest;
+   uint64_t *s = (uint64_t*)src;
+   d[i] = util_bswap64(s[e - i - 1]);
+   }
+   return dest;
+#else
+   return memcpy(dest, src, n);
+#endif
+}
 
 /**
  * Clamp X to [MIN, MAX].
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f0650f4..6f0504b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
}
 
ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, 
sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
-   if (SI_BIG_ENDIAN) {
-   for (i = 0; i  binary.code_size / 4; ++i) {
-   ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + 
i*4)));
-   }
-   } else {
-   memcpy(ptr, binary.code, binary.code_size);
-   }
+   util_memcpy_cpu_to_le(ptr, binary.code, binary.code_size);
sctx-b.ws-buffer_unmap(shader-bo-cs_buf);
 
free(binary.code);
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeonsi: Read rodata from ELF and append it to the end of shaders

2014-07-15 Thread Tom Stellard
The is used for programs that have arrays of constants that
are accessed using dynamic indices.  The shader will compute
the base address of the constants and then access them using
SMRD instructions.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  5 +
 src/gallium/drivers/radeon/radeon_elf_util.c  |  5 +
 src/gallium/drivers/radeonsi/si_shader.c  | 16 +---
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d82adf5..8f1a0a5 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -108,6 +108,11 @@ struct radeon_shader_binary {
unsigned char *config;
unsigned config_size;
 
+   /** Constant data accessed by the shader.  This will be uploaded
+* into a constant buffer. */
+   unsigned char *rodata;
+   unsigned rodata_size;
+
/** Set to 1 if the disassembly for this binary has been dumped to
 *  stderr. */
int disassembled;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c 
b/src/gallium/drivers/radeon/radeon_elf_util.c
index 7d92962..7c5f93e 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -80,6 +80,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
fprintf(stderr, \nShader Disassembly:\n\n);
fprintf(stderr, %.*s\n, (int)section_data-d_size,
  (char *)section_data-d_buf);
+   } else if (!strncmp(name, .rodata, 7)) {
+   section_data = elf_getdata(section, section_data);
+   binary-rodata_size = section_data-d_size;
+   binary-rodata = MALLOC(binary-rodata_size * 
sizeof(unsigned char));
+   memcpy(binary-rodata, section_data-d_buf, 
binary-rodata_size);
}
}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 6f0504b..f07dbab 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2499,11 +2499,12 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
 {
unsigned r; /* llvm_compile result */
unsigned i;
-   uint32_t *ptr;
+   unsigned char *ptr;
struct radeon_shader_binary binary;
bool dump = r600_can_dump_shader(sctx-screen-b,
shader-selector ? shader-selector-tokens : NULL);
const char * gpu_family = 
r600_get_llvm_processor_name(sctx-screen-b.family);
+   unsigned code_size;
 
/* Use LLVM to compile shader */
memset(binary, 0, sizeof(binary));
@@ -2551,19 +2552,28 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
}
 
/* copy new shader */
+   code_size = binary.code_size + binary.rodata_size;
r600_resource_reference(shader-bo, NULL);
shader-bo = si_resource_create_custom(sctx-b.b.screen, 
PIPE_USAGE_IMMUTABLE,
-  binary.code_size);
+  code_size);
if (shader-bo == NULL) {
return -ENOMEM;
}
 
-   ptr = (uint32_t*)sctx-b.ws-buffer_map(shader-bo-cs_buf, 
sctx-b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+   ptr = sctx-b.ws-buffer_map(shader-bo-cs_buf, sctx-b.rings.gfx.cs,
+   PIPE_TRANSFER_WRITE);
util_memcpy_cpu_to_le(ptr, binary.code, binary.code_size);
+   /* Copy read only data if any. */
+   if (binary.rodata_size  0) {
+   ptr += binary.code_size;
+   util_memcpy_cpu_to_le(ptr, binary.rodata, binary.rodata_size);
+   }
+
sctx-b.ws-buffer_unmap(shader-bo-cs_buf);
 
free(binary.code);
free(binary.config);
+   free(binary.rodata);
 
return r;
 }
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/llvm: Fix LLVM diagnostic error reporting

2014-07-14 Thread Tom Stellard
We were trying to print the error message after disposing the
message object.
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 6a394b2..1b17dd4 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -83,16 +83,13 @@ static LLVMTargetRef get_r600_target() {
 #if HAVE_LLVM = 0x0305
 
 static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) {
-   unsigned int *diagnosticflag;
-   char *diaginfo_message;
-
-   diaginfo_message = LLVMGetDiagInfoDescription(di);
-   LLVMDisposeMessage(diaginfo_message);
-
-   diagnosticflag = (unsigned int *)context;
if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
+   unsigned int *diagnosticflag = (unsigned int *)context;
+   char *diaginfo_message = LLVMGetDiagInfoDescription(di);
+
*diagnosticflag = 1;
fprintf(stderr,LLVM triggered Diagnostic Handler: %s\n, 
diaginfo_message);
+   LLVMDisposeMessage(diaginfo_message);
}
 }
 
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] r600g/compute: Add debug information to promote and demote functions

2014-07-11 Thread Tom Stellard
On Fri, Jul 11, 2014 at 10:20:54AM +0200, Bruno Jiménez wrote:
 v2: Add information about the item's starting point and size

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 9 +
  1 file changed, 9 insertions(+)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index 1d0ec85..6a525cf 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -339,6 +339,10 @@ int compute_memory_promote_item(struct 
 compute_memory_pool *pool,
   int64_t start_in_dw;
   int err = 0;
  
 + COMPUTE_DBG(pool-screen, * compute_memory_promote_item()\n
 +   + Promoting Item: %i , starting at: %u (%u bytes) 
 + size: %u (%u bytes)\n, item-id, item-start_in_dw,
 + item-start_in_dw * 4, item-size_in_dw, 
 item-size_in_dw * 4);
  
   /* Search for free space in the pool for this item. */
   while ((start_in_dw=compute_memory_prealloc_chunk(pool,
 @@ -409,6 +413,11 @@ void compute_memory_demote_item(struct 
 compute_memory_pool *pool,
   struct pipe_resource *dst;
   struct pipe_box box;
  
 + COMPUTE_DBG(pool-screen, * compute_memory_demote_item()\n
 +   + Demoting Item: %i, starting at: %u (%u bytes) 
 + size: %u (%u bytes)\n, item-id, item-start_in_dw,
 + item-start_in_dw * 4, item-size_in_dw, 
 item-size_in_dw * 4);
 +
   /* First, we remove the item from the item_list */
   list_del(item-link);
  
 -- 
 2.0.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: switch descriptors to i32 vectors

2014-07-11 Thread Tom Stellard
On Fri, Jul 11, 2014 at 01:00:34AM +0200, Marek Olšák wrote:
 I have just tested it and it works with LLVM 3.4.2.
 

Ok, thanks.

Both patches are:

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 Marek
 
 On Thu, Jul 10, 2014 at 5:11 PM, Tom Stellard t...@stellard.net wrote:
  On Tue, Jul 08, 2014 at 01:37:02AM +0200, Marek Olšák wrote:
  From: Marek Olšák marek.ol...@amd.com
 
  This is a follow-up to the commit which adds texture fetches with offsets.
 
  Will this still work with LLVM 3.4.2 ?
 
  -Tom
 
  ---
   src/gallium/drivers/radeonsi/si_shader.c | 29 
  -
   1 file changed, 16 insertions(+), 13 deletions(-)
 
  diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
  b/src/gallium/drivers/radeonsi/si_shader.c
  index 3dd6ad1..a28d682 100644
  --- a/src/gallium/drivers/radeonsi/si_shader.c
  +++ b/src/gallium/drivers/radeonsi/si_shader.c
  @@ -1574,7 +1574,7 @@ static void tex_fetch_args(
LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm-context);
LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
 
  - /* Truncate v32i8 to v16i8. */
  + /* Bitcast and truncate v8i32 to v16i8. */
LLVMValueRef res = si_shader_ctx-resources[sampler_index];
res = LLVMBuildBitCast(gallivm-builder, res, v2i128, );
res = LLVMBuildExtractElement(gallivm-builder, res, 
  bld_base-uint_bld.zero, );
  @@ -2305,12 +2305,18 @@ static void create_meta_data(struct 
  si_shader_context *si_shader_ctx)
si_shader_ctx-const_md = LLVMMDNodeInContext(gallivm-context, 
  args, 3);
   }
 
  +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
  +{
  + return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
  +CONST_ADDR_SPACE);
  +}
  +
   static void create_function(struct si_shader_context *si_shader_ctx)
   {
struct lp_build_tgsi_context *bld_base = 
  si_shader_ctx-radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base-base.gallivm;
struct si_pipe_shader *shader = si_shader_ctx-shader;
  - LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32;
  + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, 
  v16i8, v4i32, v8i32;
unsigned i, last_sgpr, num_params;
 
i8 = LLVMInt8TypeInContext(gallivm-context);
  @@ -2318,21 +2324,18 @@ static void create_function(struct 
  si_shader_context *si_shader_ctx)
f32 = LLVMFloatTypeInContext(gallivm-context);
v2i32 = LLVMVectorType(i32, 2);
v3i32 = LLVMVectorType(i32, 3);
  + v4i32 = LLVMVectorType(i32, 4);
  + v8i32 = LLVMVectorType(i32, 8);
  + v16i8 = LLVMVectorType(i8, 16);
 
  - params[SI_PARAM_CONST] = LLVMPointerType(
  - LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), 
  CONST_ADDR_SPACE);
  - params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST];
  -
  - /* We assume at most 16 textures per program at the moment.
  -  * This need probably need to be changed to support bindless 
  textures */
  - params[SI_PARAM_SAMPLER] = LLVMPointerType(
  - LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_STATES), 
  CONST_ADDR_SPACE);
  - params[SI_PARAM_RESOURCE] = LLVMPointerType(
  - LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_VIEWS), 
  CONST_ADDR_SPACE);
  + params[SI_PARAM_CONST] = const_array(v16i8, NUM_CONST_BUFFERS);
  + params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, 6); /* XXX 
  hardcoded */
  + params[SI_PARAM_SAMPLER] = const_array(v4i32, NUM_SAMPLER_STATES);
  + params[SI_PARAM_RESOURCE] = const_array(v8i32, NUM_SAMPLER_VIEWS);
 
switch (si_shader_ctx-type) {
case TGSI_PROCESSOR_VERTEX:
  - params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
  + params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, 16); /* 
  XXX hardcoded */
params[SI_PARAM_START_INSTANCE] = i32;
num_params = SI_PARAM_START_INSTANCE+1;
if (shader-key.vs.as_es) {
  --
  1.9.1
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] configure.ac: require LLVM 3.4.2 for radeon

2014-07-10 Thread Tom Stellard
On Tue, Jul 08, 2014 at 03:37:02AM +0200, Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com
 
 Needed by ARB_draw_indirect.

I think we should come up with a rule for how long we should support
older versions of LLVM.  Do you have any thoughts about this?  I was
thinking we could have each Mesa release support current stable LLVM
and also the development version from SVN.

-Tom

 ---
  configure.ac | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)
 
 diff --git a/configure.ac b/configure.ac
 index 4646212..9d5cd89 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -1888,8 +1888,9 @@ radeon_llvm_check() {
  AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
  fi
  LLVM_REQUIRED_VERSION_MAJOR=3
 -LLVM_REQUIRED_VERSION_MINOR=3
 -if test $LLVM_VERSION_INT -lt 
 ${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}; then
 +LLVM_REQUIRED_VERSION_MINOR=4
 +LLVM_REQUIRED_VERSION_PATCH=2
 +if test ${LLVM_VERSION_INT}${LLVM_VERSION_PATCH} -lt 
 ${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}${LLVM_REQUIRED_VERSION_PATCH};
  then
  AC_MSG_ERROR([LLVM 
 $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
 required for $1])
  fi
  if test true  $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then
 -- 
 1.9.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: switch descriptors to i32 vectors

2014-07-10 Thread Tom Stellard
On Tue, Jul 08, 2014 at 01:37:02AM +0200, Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com
 
 This is a follow-up to the commit which adds texture fetches with offsets.

Will this still work with LLVM 3.4.2 ?

-Tom

 ---
  src/gallium/drivers/radeonsi/si_shader.c | 29 -
  1 file changed, 16 insertions(+), 13 deletions(-)
 
 diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
 b/src/gallium/drivers/radeonsi/si_shader.c
 index 3dd6ad1..a28d682 100644
 --- a/src/gallium/drivers/radeonsi/si_shader.c
 +++ b/src/gallium/drivers/radeonsi/si_shader.c
 @@ -1574,7 +1574,7 @@ static void tex_fetch_args(
   LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm-context);
   LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
  
 - /* Truncate v32i8 to v16i8. */
 + /* Bitcast and truncate v8i32 to v16i8. */
   LLVMValueRef res = si_shader_ctx-resources[sampler_index];
   res = LLVMBuildBitCast(gallivm-builder, res, v2i128, );
   res = LLVMBuildExtractElement(gallivm-builder, res, 
 bld_base-uint_bld.zero, );
 @@ -2305,12 +2305,18 @@ static void create_meta_data(struct si_shader_context 
 *si_shader_ctx)
   si_shader_ctx-const_md = LLVMMDNodeInContext(gallivm-context, args, 
 3);
  }
  
 +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
 +{
 + return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
 +CONST_ADDR_SPACE);
 +}
 +
  static void create_function(struct si_shader_context *si_shader_ctx)
  {
   struct lp_build_tgsi_context *bld_base = 
 si_shader_ctx-radeon_bld.soa.bld_base;
   struct gallivm_state *gallivm = bld_base-base.gallivm;
   struct si_pipe_shader *shader = si_shader_ctx-shader;
 - LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32;
 + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, 
 v4i32, v8i32;
   unsigned i, last_sgpr, num_params;
  
   i8 = LLVMInt8TypeInContext(gallivm-context);
 @@ -2318,21 +2324,18 @@ static void create_function(struct si_shader_context 
 *si_shader_ctx)
   f32 = LLVMFloatTypeInContext(gallivm-context);
   v2i32 = LLVMVectorType(i32, 2);
   v3i32 = LLVMVectorType(i32, 3);
 + v4i32 = LLVMVectorType(i32, 4);
 + v8i32 = LLVMVectorType(i32, 8);
 + v16i8 = LLVMVectorType(i8, 16);
  
 - params[SI_PARAM_CONST] = LLVMPointerType(
 - LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), 
 CONST_ADDR_SPACE);
 - params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST];
 -
 - /* We assume at most 16 textures per program at the moment.
 -  * This need probably need to be changed to support bindless textures */
 - params[SI_PARAM_SAMPLER] = LLVMPointerType(
 - LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_STATES), 
 CONST_ADDR_SPACE);
 - params[SI_PARAM_RESOURCE] = LLVMPointerType(
 - LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_VIEWS), 
 CONST_ADDR_SPACE);
 + params[SI_PARAM_CONST] = const_array(v16i8, NUM_CONST_BUFFERS);
 + params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, 6); /* XXX hardcoded */
 + params[SI_PARAM_SAMPLER] = const_array(v4i32, NUM_SAMPLER_STATES);
 + params[SI_PARAM_RESOURCE] = const_array(v8i32, NUM_SAMPLER_VIEWS);
  
   switch (si_shader_ctx-type) {
   case TGSI_PROCESSOR_VERTEX:
 - params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
 + params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, 16); /* XXX 
 hardcoded */
   params[SI_PARAM_START_INSTANCE] = i32;
   num_params = SI_PARAM_START_INSTANCE+1;
   if (shader-key.vs.as_es) {
 -- 
 1.9.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radeonsi: rename definitions of shader limits

2014-07-10 Thread Tom Stellard
On Tue, Jul 08, 2014 at 01:37:03AM +0200, Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 
 ---
  src/gallium/drivers/radeonsi/si_blit.c|  2 +-
  src/gallium/drivers/radeonsi/si_descriptors.c | 12 +-
  src/gallium/drivers/radeonsi/si_pipe.c|  6 ++---
  src/gallium/drivers/radeonsi/si_pipe.h|  4 +---
  src/gallium/drivers/radeonsi/si_shader.c  | 34 
 +--
  src/gallium/drivers/radeonsi/si_state.c   | 12 +-
  src/gallium/drivers/radeonsi/si_state.h   | 31 +---
  7 files changed, 57 insertions(+), 44 deletions(-)
 
 diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
 b/src/gallium/drivers/radeonsi/si_blit.c
 index 8c3e136..072024a 100644
 --- a/src/gallium/drivers/radeonsi/si_blit.c
 +++ b/src/gallium/drivers/radeonsi/si_blit.c
 @@ -76,7 +76,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
 si_blitter_op op)
  
   util_blitter_save_fragment_sampler_views(sctx-blitter,
   
 util_last_bit(sctx-samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask 
 -   ((1  NUM_TEX_UNITS) - 1)),
 +   ((1  SI_NUM_USER_SAMPLERS) - 1)),
   sctx-samplers[PIPE_SHADER_FRAGMENT].views.views);
   }
  
 diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
 b/src/gallium/drivers/radeonsi/si_descriptors.c
 index 38ad077..6ae9b82 100644
 --- a/src/gallium/drivers/radeonsi/si_descriptors.c
 +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
 @@ -289,7 +289,7 @@ static void si_init_sampler_views(struct si_context *sctx,
   si_init_descriptors(sctx, views-desc,
   si_get_shader_user_data_base(shader) +
   SI_SGPR_RESOURCE * 4,
 - 8, NUM_SAMPLER_VIEWS, si_emit_sampler_views);
 + 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views);
  }
  
  static void si_release_sampler_views(struct si_sampler_views *views)
 @@ -643,7 +643,7 @@ static void si_set_streamout_targets(struct pipe_context 
 *ctx,
  
   /* Set the shader resources.*/
   for (i = 0; i  num_targets; i++) {
 - bufidx = SI_RW_SO + i;
 + bufidx = SI_SO_BUF_OFFSET + i;
  
   if (targets[i]) {
   struct pipe_resource *buffer = targets[i]-buffer;
 @@ -677,7 +677,7 @@ static void si_set_streamout_targets(struct pipe_context 
 *ctx,
   buffers-desc.dirty_mask |= 1  bufidx;
   }
   for (; i  old_num_targets; i++) {
 - bufidx = SI_RW_SO + i;
 + bufidx = SI_SO_BUF_OFFSET + i;
   /* Clear the descriptor and unset the resource. */
   memset(buffers-desc_data[bufidx], 0, sizeof(uint32_t) * 4);
   pipe_resource_reference(buffers-buffers[bufidx], NULL);
 @@ -755,7 +755,7 @@ static void si_invalidate_buffer(struct pipe_context 
 *ctx, struct pipe_resource
   buffers-desc.dirty_mask |= 1  i;
   found = true;
  
 - if (i = SI_RW_SO  shader == 
 PIPE_SHADER_VERTEX) {
 + if (i = SI_SO_BUF_OFFSET  shader == 
 PIPE_SHADER_VERTEX) {
   /* Update the streamout state. */
   if (sctx-b.streamout.begin_emitted) {
   
 r600_emit_streamout_end(sctx-b);
 @@ -977,11 +977,11 @@ void si_init_all_descriptors(struct si_context *sctx)
  
   for (i = 0; i  SI_NUM_SHADERS; i++) {
   si_init_buffer_resources(sctx, sctx-const_buffers[i],
 -  NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
 +  SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
RADEON_USAGE_READ, 
 RADEON_PRIO_SHADER_BUFFER_RO);
   si_init_buffer_resources(sctx, sctx-rw_buffers[i],
i == PIPE_SHADER_VERTEX ?
 -  SI_RW_SO + 4 : SI_RW_SO,
 +  SI_NUM_RW_BUFFERS : 
 SI_NUM_RING_BUFFERS,
i, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, 
 RADEON_PRIO_SHADER_RESOURCE_RW);
  
 diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
 b/src/gallium/drivers/radeonsi/si_pipe.c
 index 184235d..0f99e44 100644
 --- a/src/gallium/drivers/radeonsi/si_pipe.c
 +++ b/src/gallium/drivers/radeonsi/si_pipe.c
 @@ -146,7 +146,7 @@ static struct pipe_context *si_create_context(struct 
 pipe_screen *screen, void *
   sctx-null_const_buf.buffer_size = 
 sctx-null_const_buf.buffer-width0;
  
   for (shader = 0; shader  SI_NUM_SHADERS; shader++) {
 - for (i = 0; i  NUM_CONST_BUFFERS; i

Re: [Mesa-dev] [PATCH] r600g/compute: Try to use a temporary resource when growing the pool

2014-07-10 Thread Tom Stellard
On Mon, Jul 07, 2014 at 05:50:05PM +0200, Bruno Jiménez wrote:
 Now, before moving everything to host memory, we try to create a
 new resource to use as a pool. I we succeed we just use this resource
 and delete the previous one. If we fail we fallback to using the
 shadow.
 
 This should make growing the pool faster, and we can also save
 64KB of memory that were allocated for the 'shadow', even if they
 weren't used.

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 61 
 ++
  1 file changed, 43 insertions(+), 18 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index fe19d9e..db6d937 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -73,10 +73,6 @@ static void compute_memory_pool_init(struct 
 compute_memory_pool * pool,
   COMPUTE_DBG(pool-screen, * compute_memory_pool_init() 
 initial_size_in_dw = %ld\n,
   initial_size_in_dw);
  
 - pool-shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
 - if (pool-shadow == NULL)
 - return;
 -
   pool-size_in_dw = initial_size_in_dw;
   pool-bo = (struct 
 r600_resource*)r600_compute_buffer_alloc_vram(pool-screen,
   pool-size_in_dw * 4);
 @@ -184,27 +180,56 @@ int compute_memory_grow_pool(struct 
 compute_memory_pool* pool,
  
   if (!pool-bo) {
   compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
 - if (pool-shadow == NULL)
 - return -1;
   } else {
 + struct r600_resource *temp = NULL;
 +
   new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
  
   COMPUTE_DBG(pool-screen,   Aligned size = %d (%d bytes)\n,
   new_size_in_dw, new_size_in_dw * 4);
  
 - compute_memory_shadow(pool, pipe, 1);
 - pool-shadow = realloc(pool-shadow, new_size_in_dw*4);
 - if (pool-shadow == NULL)
 - return -1;
 + temp = (struct r600_resource *)r600_compute_buffer_alloc_vram(
 + pool-screen, 
 new_size_in_dw * 4);
  
 - pool-size_in_dw = new_size_in_dw;
 - pool-screen-b.b.resource_destroy(
 - (struct pipe_screen *)pool-screen,
 - (struct pipe_resource *)pool-bo);
 - pool-bo = (struct 
 r600_resource*)r600_compute_buffer_alloc_vram(
 - pool-screen,
 - pool-size_in_dw * 4);
 - compute_memory_shadow(pool, pipe, 0);
 + if (temp != NULL) {
 + struct r600_context *rctx = (struct r600_context *)pipe;
 + struct pipe_resource *src = (struct pipe_resource 
 *)pool-bo;
 + struct pipe_resource *dst = (struct pipe_resource 
 *)temp;
 + struct pipe_box box;
 +
 + COMPUTE_DBG(pool-screen,   Growing the pool using a 
 temporary resource\n);
 +
 + u_box_1d(0, pool-size_in_dw * 4, box);
 +
 + rctx-b.b.resource_copy_region(pipe,
 + dst, 0, 0, 0 ,0,
 + src, 0, box);
 +
 + pool-screen-b.b.resource_destroy(
 + (struct pipe_screen *)pool-screen,
 + src);
 +
 + pool-bo = temp;
 + pool-size_in_dw = new_size_in_dw;
 + }
 + else {
 + COMPUTE_DBG(pool-screen,   The creation of the 
 temporary resource failed\n
 +   Falling back to using 'shadow'\n);
 +
 + compute_memory_shadow(pool, pipe, 1);
 + pool-shadow = realloc(pool-shadow, new_size_in_dw * 
 4);
 + if (pool-shadow == NULL)
 + return -1;
 +
 + pool-size_in_dw = new_size_in_dw;
 + pool-screen-b.b.resource_destroy(
 + (struct pipe_screen *)pool-screen,
 + (struct pipe_resource *)pool-bo);
 + pool-bo = (struct 
 r600_resource*)r600_compute_buffer_alloc_vram(
 + pool-screen,
 + pool-size_in_dw * 4);
 + compute_memory_shadow(pool, pipe, 0);
 + }
   }
  
   return 0;
 -- 
 2.0.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] What are some good beginner's tasks for Mesa?

2014-07-04 Thread Tom Stellard
On Thu, Jul 03, 2014 at 10:33:41PM -0500, Darius Goad wrote:
 Hello. I'm trying to get my feet wet with Mesa, and I was wondering
 what some good tasks for me would be. Thanks again.
 

What hardware do you have?  What are you interested in working on?

-Tom

 - Darius Goad
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2

2014-07-04 Thread Tom Stellard
On Fri, Jul 04, 2014 at 12:28:05PM +0200, Francisco Jerez wrote:
 Tom Stellard t...@stellard.net writes:
 
  On Fri, Jul 04, 2014 at 12:28:20AM +0200, Francisco Jerez wrote:
  Tom Stellard t...@stellard.net writes:
  
   On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote:
   Tom Stellard t...@stellard.net writes:
   
On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote:
Tom Stellard thomas.stell...@amd.com writes:

 v2:
   - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 
 and
 CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
   - Only define cl_khr_fp64 if the extension is supported.
   - Remove trailing space from extension string.
   - Rename device query function from cl_khr_fp86() to 
 has_doubles().
 ---
  src/gallium/state_trackers/clover/api/device.cpp  | 6 +++---
  src/gallium/state_trackers/clover/core/device.cpp | 6 ++
  src/gallium/state_trackers/clover/core/device.hpp | 1 +
  src/gallium/state_trackers/clover/core/program.cpp| 5 -
  src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
  5 files changed, 14 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
 b/src/gallium/state_trackers/clover/api/device.cpp
 index 7006702..1176668 100644
 --- a/src/gallium/state_trackers/clover/api/device.cpp
 +++ b/src/gallium/state_trackers/clover/api/device.cpp
 @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
 cl_device_info param,
break;
  
 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
 -  buf.as_scalarcl_uint() = 2;
 +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
break;
  
 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
 @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
 cl_device_info param,
break;
  
 case CL_DEVICE_EXTENSIONS:
 -  buf.as_string() = ;
 +  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
break;
  
 case CL_DEVICE_PLATFORM:
 @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
 cl_device_info param,
break;
  
 case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
 -  buf.as_scalarcl_uint() = 2;
 +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
break;
  
 case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
 diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
 b/src/gallium/state_trackers/clover/core/device.cpp
 index bc6b761..6bf33e0 100644
 --- a/src/gallium/state_trackers/clover/core/device.cpp
 +++ b/src/gallium/state_trackers/clover/core/device.cpp
 @@ -193,6 +193,12 @@ device::half_fp_config() const {
 return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
  }
  
 +bool
 +device::has_doubles() const {
 +   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
 + PIPE_SHADER_CAP_DOUBLES);
 +}
 +
  std::vectorsize_t
  device::max_block_size() const {
 auto v = get_compute_paramuint64_t(pipe, 
 PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
 diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
 b/src/gallium/state_trackers/clover/core/device.hpp
 index 16831ab..025c648 100644
 --- a/src/gallium/state_trackers/clover/core/device.hpp
 +++ b/src/gallium/state_trackers/clover/core/device.hpp
 @@ -66,6 +66,7 @@ namespace clover {
cl_device_fp_config single_fp_config() const;
cl_device_fp_config double_fp_config() const;
cl_device_fp_config half_fp_config() const;
 +  bool has_doubles() const;
  
std::vectorsize_t max_block_size() const;
std::string device_name() const;
 diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
 b/src/gallium/state_trackers/clover/core/program.cpp
 index e09c3aa..f65f321 100644
 --- a/src/gallium/state_trackers/clover/core/program.cpp
 +++ b/src/gallium/state_trackers/clover/core/program.cpp
 @@ -95,7 +95,10 @@ program::build_status(const device dev) const {
  
  std::string
  program::build_opts(const device dev) const {
 -   return _opts.count(dev) ? _opts.find(dev)-second : ;
 +   std::string opts = _opts.count(dev) ? 
 _opts.find(dev)-second : ;
 +   if (dev.has_doubles())
 +  opts.append( -Dcl_khr_fp64);
 +   return opts;

This define belongs in the target-specific part of libclc.  With this
hunk removed this patch is:

   
The declarations for double functions in the libclc headers are 
wrapped in this
macro, so we need to set it here in order to be able to use them from 
clover.
   
   
   This abuses the ::build_opts() accessor to that end, which is only
   supposed to return the compiler

Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2

2014-07-04 Thread Tom Stellard
On Fri, Jul 04, 2014 at 05:25:42PM +0200, Francisco Jerez wrote:
 Tom Stellard t...@stellard.net writes:
 
  On Fri, Jul 04, 2014 at 12:28:05PM +0200, Francisco Jerez wrote:
  Tom Stellard t...@stellard.net writes:
  
   On Fri, Jul 04, 2014 at 12:28:20AM +0200, Francisco Jerez wrote:
   Tom Stellard t...@stellard.net writes:
   
On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote:
Tom Stellard t...@stellard.net writes:

 On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote:
 Tom Stellard thomas.stell...@amd.com writes:
 
  v2:
- Report correct values for 
  CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and
  CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
- Only define cl_khr_fp64 if the extension is supported.
- Remove trailing space from extension string.
- Rename device query function from cl_khr_fp86() to 
  has_doubles().
  ---
   src/gallium/state_trackers/clover/api/device.cpp  | 6 
  +++---
   src/gallium/state_trackers/clover/core/device.cpp | 6 
  ++
   src/gallium/state_trackers/clover/core/device.hpp | 1 +
   src/gallium/state_trackers/clover/core/program.cpp| 5 -
   src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
   5 files changed, 14 insertions(+), 5 deletions(-)
 
  diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
  b/src/gallium/state_trackers/clover/api/device.cpp
  index 7006702..1176668 100644
  --- a/src/gallium/state_trackers/clover/api/device.cpp
  +++ b/src/gallium/state_trackers/clover/api/device.cpp
  @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
  cl_device_info param,
 break;
   
  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
  -  buf.as_scalarcl_uint() = 2;
  +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
 break;
   
  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
  @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
  cl_device_info param,
 break;
   
  case CL_DEVICE_EXTENSIONS:
  -  buf.as_string() = ;
  +  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
 break;
   
  case CL_DEVICE_PLATFORM:
  @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
  cl_device_info param,
 break;
   
  case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
  -  buf.as_scalarcl_uint() = 2;
  +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
 break;
   
  case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
  diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
  b/src/gallium/state_trackers/clover/core/device.cpp
  index bc6b761..6bf33e0 100644
  --- a/src/gallium/state_trackers/clover/core/device.cpp
  +++ b/src/gallium/state_trackers/clover/core/device.cpp
  @@ -193,6 +193,12 @@ device::half_fp_config() const {
  return CL_FP_DENORM | CL_FP_INF_NAN | 
  CL_FP_ROUND_TO_NEAREST;
   }
   
  +bool
  +device::has_doubles() const {
  +   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
  + PIPE_SHADER_CAP_DOUBLES);
  +}
  +
   std::vectorsize_t
   device::max_block_size() const {
  auto v = get_compute_paramuint64_t(pipe, 
  PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
  diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
  b/src/gallium/state_trackers/clover/core/device.hpp
  index 16831ab..025c648 100644
  --- a/src/gallium/state_trackers/clover/core/device.hpp
  +++ b/src/gallium/state_trackers/clover/core/device.hpp
  @@ -66,6 +66,7 @@ namespace clover {
 cl_device_fp_config single_fp_config() const;
 cl_device_fp_config double_fp_config() const;
 cl_device_fp_config half_fp_config() const;
  +  bool has_doubles() const;
   
 std::vectorsize_t max_block_size() const;
 std::string device_name() const;
  diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
  b/src/gallium/state_trackers/clover/core/program.cpp
  index e09c3aa..f65f321 100644
  --- a/src/gallium/state_trackers/clover/core/program.cpp
  +++ b/src/gallium/state_trackers/clover/core/program.cpp
  @@ -95,7 +95,10 @@ program::build_status(const device dev) 
  const {
   
   std::string
   program::build_opts(const device dev) const {
  -   return _opts.count(dev) ? _opts.find(dev)-second : ;
  +   std::string opts = _opts.count(dev) ? 
  _opts.find(dev)-second : ;
  +   if (dev.has_doubles())
  +  opts.append( -Dcl_khr_fp64);
  +   return opts;
 
 This define belongs in the target-specific part of libclc.  With 
 this
 hunk removed this patch is:
 

 The declarations for double

Re: [Mesa-dev] [PATCH 1/2] R600/SI: fix shadow mapping for 1D and 2D array textures

2014-07-04 Thread Tom Stellard
On Thu, Jul 03, 2014 at 06:26:04PM +0200, Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 
 It was conflicting with def TEX_SHADOW_ARRAY, which also handles them.
 ---
  lib/Target/R600/R600Instructions.td | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/lib/Target/R600/R600Instructions.td 
 b/lib/Target/R600/R600Instructions.td
 index 73fa345..704507d 100644
 --- a/lib/Target/R600/R600Instructions.td
 +++ b/lib/Target/R600/R600Instructions.td
 @@ -216,7 +216,7 @@ class R600_REDUCTION bits11 inst, dag ins, string asm, 
 listdag pattern,
  def TEX_SHADOW : PatLeaf
(imm),
[{uint32_t TType = (uint32_t)N-getZExtValue();
 -return (TType = 6  TType = 8) || (TType = 11  TType = 13);
 +return (TType = 6  TType = 8) || TType == 13;
}]
  ;
  
 -- 
 1.9.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2

2014-07-03 Thread Tom Stellard
On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote:
 Tom Stellard t...@stellard.net writes:
 
  On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote:
  Tom Stellard thomas.stell...@amd.com writes:
  
   v2:
 - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and
   CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
 - Only define cl_khr_fp64 if the extension is supported.
 - Remove trailing space from extension string.
 - Rename device query function from cl_khr_fp86() to has_doubles().
   ---
src/gallium/state_trackers/clover/api/device.cpp  | 6 +++---
src/gallium/state_trackers/clover/core/device.cpp | 6 ++
src/gallium/state_trackers/clover/core/device.hpp | 1 +
src/gallium/state_trackers/clover/core/program.cpp| 5 -
src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
5 files changed, 14 insertions(+), 5 deletions(-)
  
   diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
   b/src/gallium/state_trackers/clover/api/device.cpp
   index 7006702..1176668 100644
   --- a/src/gallium/state_trackers/clover/api/device.cpp
   +++ b/src/gallium/state_trackers/clover/api/device.cpp
   @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
   param,
  break;

   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
   -  buf.as_scalarcl_uint() = 2;
   +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
  break;

   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
   @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
   param,
  break;

   case CL_DEVICE_EXTENSIONS:
   -  buf.as_string() = ;
   +  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
  break;

   case CL_DEVICE_PLATFORM:
   @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
   param,
  break;

   case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
   -  buf.as_scalarcl_uint() = 2;
   +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
  break;

   case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
   diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
   b/src/gallium/state_trackers/clover/core/device.cpp
   index bc6b761..6bf33e0 100644
   --- a/src/gallium/state_trackers/clover/core/device.cpp
   +++ b/src/gallium/state_trackers/clover/core/device.cpp
   @@ -193,6 +193,12 @@ device::half_fp_config() const {
   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
}

   +bool
   +device::has_doubles() const {
   +   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
   + PIPE_SHADER_CAP_DOUBLES);
   +}
   +
std::vectorsize_t
device::max_block_size() const {
   auto v = get_compute_paramuint64_t(pipe, 
   PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
   diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
   b/src/gallium/state_trackers/clover/core/device.hpp
   index 16831ab..025c648 100644
   --- a/src/gallium/state_trackers/clover/core/device.hpp
   +++ b/src/gallium/state_trackers/clover/core/device.hpp
   @@ -66,6 +66,7 @@ namespace clover {
  cl_device_fp_config single_fp_config() const;
  cl_device_fp_config double_fp_config() const;
  cl_device_fp_config half_fp_config() const;
   +  bool has_doubles() const;

  std::vectorsize_t max_block_size() const;
  std::string device_name() const;
   diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
   b/src/gallium/state_trackers/clover/core/program.cpp
   index e09c3aa..f65f321 100644
   --- a/src/gallium/state_trackers/clover/core/program.cpp
   +++ b/src/gallium/state_trackers/clover/core/program.cpp
   @@ -95,7 +95,10 @@ program::build_status(const device dev) const {

std::string
program::build_opts(const device dev) const {
   -   return _opts.count(dev) ? _opts.find(dev)-second : ;
   +   std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ;
   +   if (dev.has_doubles())
   +  opts.append( -Dcl_khr_fp64);
   +   return opts;
  
  This define belongs in the target-specific part of libclc.  With this
  hunk removed this patch is:
  
 
  The declarations for double functions in the libclc headers are wrapped in 
  this
  macro, so we need to set it here in order to be able to use them from 
  clover.
 
 
 This abuses the ::build_opts() accessor to that end, which is only
 supposed to return the compiler options that were specified by the user
 at build time, as required by the CL_PROGRAM_BUILD_OPTIONS build param.
 

You are right, I can fix that.

 IMO preprocessor macros defined by the spec belong in the standard
 library.  We probably need a specialization of libclc's header files for
 each triple (I hadn't noticed you didn't have one already -- it will
 probably be useful for other reasons too), as you have target-specific
 specializations of the LLVM bitcode

Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands

2014-07-03 Thread Tom Stellard
On Wed, Jul 02, 2014 at 04:34:24PM -0500, Aaron Watry wrote:
 Previously, we were assuming that kernel metadata nodes only had 1 operand.
 
 Kernels which have attributes can have more than 1, e.g.:
 !0 = metadata !{void (i32 addrspace(1)*)* @testKernel, metadata !1}
 !1 = metadata !{metadata !work_group_size_hint, i32 4, i32 1, i32 1}
 
 Attempting to get the kernel without the correct number of attributes led
 to memory corruption and luxrays crashing out.
 
 Fixes the cl/program/execute/attributes.cl piglit test.


Thanks for tracking this down.

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 Signed-off-by: Aaron Watry awa...@gmail.com
 CC: Tom Stellard thomas.stell...@amd.com
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223
 ---
  src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)
 
 diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c 
 b/src/gallium/drivers/radeon/radeon_llvm_util.c
 index 2ace91f..ec11559 100644
 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
 +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
 @@ -100,13 +100,17 @@ LLVMModuleRef 
 radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
   kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
   LLVMGetNamedMetadataOperands(mod, opencl.kernels, kernel_metadata);
   for (i = 0; i  num_kernels; i++) {
 - LLVMValueRef kernel_signature, kernel_function;
 + LLVMValueRef kernel_signature, *kernel_function;
 + unsigned num_kernel_md_operands;
   if (i == index) {
   continue;
   }
   kernel_signature = kernel_metadata[i];
 - LLVMGetMDNodeOperands(kernel_signature, kernel_function);
 - LLVMDeleteFunction(kernel_function);
 + num_kernel_md_operands = 
 LLVMGetMDNodeNumOperands(kernel_signature);
 + kernel_function = MALLOC(num_kernel_md_operands * sizeof 
 (LLVMValueRef));
 + LLVMGetMDNodeOperands(kernel_signature, kernel_function);
 + LLVMDeleteFunction(*kernel_function);
 + FREE(kernel_function);
   }
   FREE(kernel_metadata);
   radeon_llvm_optimize(mod);
 -- 
 1.9.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands

2014-07-03 Thread Tom Stellard
On Thu, Jul 03, 2014 at 10:56:24AM -0400, Tom Stellard wrote:
 On Wed, Jul 02, 2014 at 04:34:24PM -0500, Aaron Watry wrote:
  Previously, we were assuming that kernel metadata nodes only had 1 operand.
  
  Kernels which have attributes can have more than 1, e.g.:
  !0 = metadata !{void (i32 addrspace(1)*)* @testKernel, metadata !1}
  !1 = metadata !{metadata !work_group_size_hint, i32 4, i32 1, i32 1}
  
  Attempting to get the kernel without the correct number of attributes led
  to memory corruption and luxrays crashing out.
  
  Fixes the cl/program/execute/attributes.cl piglit test.
 
 
 Thanks for tracking this down.
 
 Reviewed-by: Tom Stellard thomas.stell...@amd.com

I forgot to mention we should CC stable on this patch.
 
  Signed-off-by: Aaron Watry awa...@gmail.com
  CC: Tom Stellard thomas.stell...@amd.com
  Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223
  ---
   src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++---
   1 file changed, 7 insertions(+), 3 deletions(-)
  
  diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c 
  b/src/gallium/drivers/radeon/radeon_llvm_util.c
  index 2ace91f..ec11559 100644
  --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
  +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
  @@ -100,13 +100,17 @@ LLVMModuleRef 
  radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
  kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
  LLVMGetNamedMetadataOperands(mod, opencl.kernels, kernel_metadata);
  for (i = 0; i  num_kernels; i++) {
  -   LLVMValueRef kernel_signature, kernel_function;
  +   LLVMValueRef kernel_signature, *kernel_function;
  +   unsigned num_kernel_md_operands;
  if (i == index) {
  continue;
  }
  kernel_signature = kernel_metadata[i];
  -   LLVMGetMDNodeOperands(kernel_signature, kernel_function);
  -   LLVMDeleteFunction(kernel_function);
  +   num_kernel_md_operands = 
  LLVMGetMDNodeNumOperands(kernel_signature);
  +   kernel_function = MALLOC(num_kernel_md_operands * sizeof 
  (LLVMValueRef));
  +   LLVMGetMDNodeOperands(kernel_signature, kernel_function);
  +   LLVMDeleteFunction(*kernel_function);
  +   FREE(kernel_function);
  }
  FREE(kernel_metadata);
  radeon_llvm_optimize(mod);
  -- 
  1.9.1
  
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands\

2014-07-03 Thread Tom Stellard
On Thu, Jul 03, 2014 at 11:59:00AM -0400, Alex Deucher wrote:
 On Thu, Jul 3, 2014 at 11:46 AM, Aaron Watry awa...@gmail.com wrote:
  On Thu, Jul 3, 2014 at 9:56 AM, Tom Stellard t...@stellard.net wrote:
  On Wed, Jul 02, 2014 at 04:34:24PM -0500, Aaron Watry wrote:
  Previously, we were assuming that kernel metadata nodes only had 1 
  operand.
 
  Kernels which have attributes can have more than 1, e.g.:
  !0 = metadata !{void (i32 addrspace(1)*)* @testKernel, metadata !1}
  !1 = metadata !{metadata !work_group_size_hint, i32 4, i32 1, i32 1}
 
  Attempting to get the kernel without the correct number of attributes led
  to memory corruption and luxrays crashing out.
 
  Fixes the cl/program/execute/attributes.cl piglit test.
 
 
  Thanks for tracking this down.
 
 
  no problem.  It was driving me nuts.  I've now got the luxmark kernels
  building successfully on evergreen (followed by a machine hang and
  loss of signal to the monitor, but that could be the kernel or the
  fact that CEDAR seems extra crashy compared to my other EG/NI cards)
 
 Someone mentioned stability issues with cedar with the golden register
 kernel patch.  Can you see if skipping the golden register setup
 helps?  If so can you narrow down which registers are problematic?
 

Another possibility is that we aren't correctly implementing the
workaround for the control flow stack hw bug on Cedar.  Since
it has a different wavefront size than other GPUs the bug is not handled
the same way.

You could try using FeatureWavefrontSize64 for cedar in Processors.td,
which would force the backend to use the same work-around on cedar as
it does for other GPUs.

-Tom

 Alex
 
  and I'm getting an instruction selection error on radeonsi.  Haven't
  managed to track that down yet, but at least it means that all
  required built-ins/defines for luxrays are now present (at least with
  my own libclc tree), at least with image support disabled in luxrays.
 
  If you enable image support, I believe that it is still going to fail
  due to mismatches/oddness with the number of supported pixel formats.
 
  --Aaron
 
  Reviewed-by: Tom Stellard thomas.stell...@amd.com
 
  Signed-off-by: Aaron Watry awa...@gmail.com
  CC: Tom Stellard thomas.stell...@amd.com
  Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223
  ---
   src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++---
   1 file changed, 7 insertions(+), 3 deletions(-)
 
  diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c 
  b/src/gallium/drivers/radeon/radeon_llvm_util.c
  index 2ace91f..ec11559 100644
  --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
  +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
  @@ -100,13 +100,17 @@ LLVMModuleRef 
  radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
LLVMGetNamedMetadataOperands(mod, opencl.kernels, 
  kernel_metadata);
for (i = 0; i  num_kernels; i++) {
  - LLVMValueRef kernel_signature, kernel_function;
  + LLVMValueRef kernel_signature, *kernel_function;
  + unsigned num_kernel_md_operands;
if (i == index) {
continue;
}
kernel_signature = kernel_metadata[i];
  - LLVMGetMDNodeOperands(kernel_signature, kernel_function);
  - LLVMDeleteFunction(kernel_function);
  + num_kernel_md_operands = 
  LLVMGetMDNodeNumOperands(kernel_signature);
  + kernel_function = MALLOC(num_kernel_md_operands * sizeof 
  (LLVMValueRef));
  + LLVMGetMDNodeOperands(kernel_signature, kernel_function);
  + LLVMDeleteFunction(*kernel_function);
  + FREE(kernel_function);
}
FREE(kernel_metadata);
radeon_llvm_optimize(mod);
  --
  1.9.1
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/llvm: Allocate space for kernel metadata operands

2014-07-03 Thread Tom Stellard
On Thu, Jul 03, 2014 at 11:55:25AM -0500, Aaron Watry wrote:
 On Thu, Jul 3, 2014 at 10:59 AM, Alex Deucher alexdeuc...@gmail.com wrote:
  Someone mentioned stability issues with cedar with the golden register
  kernel patch.  Can you see if skipping the golden register setup
  helps?  If so can you narrow down which registers are problematic?
 
 I'll give it a shot and see if it helps.  I can reliably break the
 machine currently with luxrays' slg4 program, and the gpu doesn't
 recover after 10-sec as it should.


Can you file a bug for this and post the output of R600_DEBUG=cs for this
program?  If I look at it I should be able to get an idea of what might
be causing the lockup.

-Tom

 I tried Tom's wavefront-size workaround with no apparent change.  It's
 possible that we're messing something up in the instruction
 selection/lowering, but I'll give the kernel change a try first.
 
 --Aaron
 
 
  Alex
 
  and I'm getting an instruction selection error on radeonsi.  Haven't
  managed to track that down yet, but at least it means that all
  required built-ins/defines for luxrays are now present (at least with
  my own libclc tree), at least with image support disabled in luxrays.
 
  If you enable image support, I believe that it is still going to fail
  due to mismatches/oddness with the number of supported pixel formats.
 
  --Aaron
 
  Reviewed-by: Tom Stellard thomas.stell...@amd.com
 
  Signed-off-by: Aaron Watry awa...@gmail.com
  CC: Tom Stellard thomas.stell...@amd.com
  Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76223
  ---
   src/gallium/drivers/radeon/radeon_llvm_util.c | 10 +++---
   1 file changed, 7 insertions(+), 3 deletions(-)
 
  diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c 
  b/src/gallium/drivers/radeon/radeon_llvm_util.c
  index 2ace91f..ec11559 100644
  --- a/src/gallium/drivers/radeon/radeon_llvm_util.c
  +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
  @@ -100,13 +100,17 @@ LLVMModuleRef 
  radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
LLVMGetNamedMetadataOperands(mod, opencl.kernels, 
  kernel_metadata);
for (i = 0; i  num_kernels; i++) {
  - LLVMValueRef kernel_signature, kernel_function;
  + LLVMValueRef kernel_signature, *kernel_function;
  + unsigned num_kernel_md_operands;
if (i == index) {
continue;
}
kernel_signature = kernel_metadata[i];
  - LLVMGetMDNodeOperands(kernel_signature, kernel_function);
  - LLVMDeleteFunction(kernel_function);
  + num_kernel_md_operands = 
  LLVMGetMDNodeNumOperands(kernel_signature);
  + kernel_function = MALLOC(num_kernel_md_operands * sizeof 
  (LLVMValueRef));
  + LLVMGetMDNodeOperands(kernel_signature, kernel_function);
  + LLVMDeleteFunction(*kernel_function);
  + FREE(kernel_function);
}
FREE(kernel_metadata);
radeon_llvm_optimize(mod);
  --
  1.9.1
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2

2014-07-03 Thread Tom Stellard
On Fri, Jul 04, 2014 at 12:28:20AM +0200, Francisco Jerez wrote:
 Tom Stellard t...@stellard.net writes:
 
  On Thu, Jul 03, 2014 at 01:12:07AM +0200, Francisco Jerez wrote:
  Tom Stellard t...@stellard.net writes:
  
   On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote:
   Tom Stellard thomas.stell...@amd.com writes:
   
v2:
  - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
  - Only define cl_khr_fp64 if the extension is supported.
  - Remove trailing space from extension string.
  - Rename device query function from cl_khr_fp86() to has_doubles().
---
 src/gallium/state_trackers/clover/api/device.cpp  | 6 +++---
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 src/gallium/state_trackers/clover/core/program.cpp| 5 -
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
 5 files changed, 14 insertions(+), 5 deletions(-)
   
diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 7006702..1176668 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
cl_device_info param,
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalarcl_uint() = 2;
+  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
@@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = ;
+  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
   break;
 
case CL_DEVICE_PLATFORM:
@@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, 
cl_device_info param,
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalarcl_uint() = 2;
+  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index bc6b761..6bf33e0 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -193,6 +193,12 @@ device::half_fp_config() const {
return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
 }
 
+bool
+device::has_doubles() const {
+   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, 
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 16831ab..025c648 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -66,6 +66,7 @@ namespace clover {
   cl_device_fp_config single_fp_config() const;
   cl_device_fp_config double_fp_config() const;
   cl_device_fp_config half_fp_config() const;
+  bool has_doubles() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
b/src/gallium/state_trackers/clover/core/program.cpp
index e09c3aa..f65f321 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -95,7 +95,10 @@ program::build_status(const device dev) const {
 
 std::string
 program::build_opts(const device dev) const {
-   return _opts.count(dev) ? _opts.find(dev)-second : ;
+   std::string opts = _opts.count(dev) ? _opts.find(dev)-second : 
;
+   if (dev.has_doubles())
+  opts.append( -Dcl_khr_fp64);
+   return opts;
   
   This define belongs in the target-specific part of libclc.  With this
   hunk removed this patch is:
   
  
   The declarations for double functions in the libclc headers are wrapped 
   in this
   macro, so we need to set it here in order to be able to use them from 
   clover.
  
  
  This abuses the ::build_opts() accessor to that end, which is only
  supposed to return the compiler options that were specified by the user
  at build time, as required by the CL_PROGRAM_BUILD_OPTIONS build param.
  
 
  You are right, I can fix that.
 
  IMO preprocessor macros defined by the spec belong in the standard
  library.  We

Re: [Mesa-dev] [PATCH 3/9] gallium: add PIPE_BIND_COMMAND_ARGS_BUFFER

2014-07-02 Thread Tom Stellard
On Sat, Jun 28, 2014 at 01:12:49PM +0200, Marek Olšák wrote:
 The one that increases the number of input SGPRs to 22 (16 user + 6
 streamout) in the calling convention. I don't remember the name.
 Please git blame on the calling convention.
 

I merged this patch into LLVM 3.4.2, which was released last week, so
you don't have to wait until LLVM 3.5.

-Tom

 Marek
 
 On Fri, Jun 27, 2014 at 5:26 PM, Tom Stellard t...@stellard.net wrote:
  On Tue, Jun 17, 2014 at 01:51:10AM +0200, Marek Olšák wrote:
  Since LLVM 3.5 will be released in August and my radeon patches adding
 
  Which LLVM patches are required for ARB_draw_indirect?
 
  -Tom
 
  ARB_draw_indirect depend on it, I will commit ARB_draw_indirect
  support for Gallium with softpipe and llvmpipe support earlier. My
  plan is for patches 3,4,5,6 to get committed in one week from now, or
  sooner if somebody reviews them.
 
  Marek
 
  On Sat, Apr 26, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote:
   From: Christoph Bumiller e0425...@student.tuwien.ac.at
  
   Intended for use with GL_ARB_draw_indirect's DRAW_INDIRECT_BUFFER
   target or for D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS.
   ---
src/gallium/docs/source/screen.rst   | 3 +++
src/gallium/include/pipe/p_defines.h | 1 +
2 files changed, 4 insertions(+)
  
   diff --git a/src/gallium/docs/source/screen.rst 
   b/src/gallium/docs/source/screen.rst
   index 89cbdbf..65885b9 100644
   --- a/src/gallium/docs/source/screen.rst
   +++ b/src/gallium/docs/source/screen.rst
   @@ -354,6 +354,9 @@ resources might be created and handled quite 
   differently.
  bound to the graphics pipeline as a shader resource.
* ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be
  bound to the compute program as a shader resource.
   +* ``PIPE_BIND_COMMAND_ARGS_BUFFER``: A buffer that may be sourced by the
   +  GPU command processor. It can contain, for example, the arguments to
   +  indirect draw calls.
  
.. _pipe_usage:
  
   diff --git a/src/gallium/include/pipe/p_defines.h 
   b/src/gallium/include/pipe/p_defines.h
   index a3a1ae1..4d5d833 100644
   --- a/src/gallium/include/pipe/p_defines.h
   +++ b/src/gallium/include/pipe/p_defines.h
   @@ -350,6 +350,7 @@ enum pipe_flush_flags {
#define PIPE_BIND_GLOBAL   (1  18) /* set_global_binding 
   */
#define PIPE_BIND_SHADER_RESOURCE  (1  19) /* 
   set_shader_resources */
#define PIPE_BIND_COMPUTE_RESOURCE (1  20) /* 
   set_compute_resources */
   +#define PIPE_BIND_COMMAND_ARGS_BUFFER  (1  21) /* 
   pipe_draw_info.indirect */
  
/* The first two flags above were previously part of the amorphous
 * TEXTURE_USAGE, most of which are now descriptions of the ways a
   --
   1.8.3.2
  
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/llvm: Use the llvm.rsq.clamped intrinsic for RSQ

2014-07-02 Thread Tom Stellard
On Wed, Jul 02, 2014 at 06:29:25PM +0200, Laurent Carlier wrote:
 Le mercredi 25 juin 2014, 11:58:47 Michel Dänzer a écrit :
  On 25.06.2014 09:15, Tom Stellard wrote:
   https://bugs.freedesktop.org/show_bug.cgi?id=80015
   
   CC: 10.1 10.2 mesa-sta...@lists.freedesktop.org
   ---
   
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
   
   diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
   b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index
   217fa32..119e613 100644
   --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
   +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
   @@ -1385,7 +1385,7 @@ void radeon_llvm_context_init(struct
   radeon_llvm_context * ctx) 
 bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem;

#if HAVE_LLVM = 0x0305
   
   - bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.;
   + bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.clamped.f32;
   
#else

 bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq;

#endif
  
  Reviewed-and-Tested-by: Michel Dänzer michel.daen...@amd.com
 
 Tested-by: Laurent Carlier lordhea...@gmail.com
 
 Anyone to commit it?

I've committed it.  Thanks for testing.

-Tom

 -- 
 Laurent Carlier
 http://www.archlinux.org



 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2

2014-07-02 Thread Tom Stellard
On Thu, Jun 26, 2014 at 04:15:39PM +0200, Francisco Jerez wrote:
 Tom Stellard thomas.stell...@amd.com writes:
 
  v2:
- Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and
  CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
- Only define cl_khr_fp64 if the extension is supported.
- Remove trailing space from extension string.
- Rename device query function from cl_khr_fp86() to has_doubles().
  ---
   src/gallium/state_trackers/clover/api/device.cpp  | 6 +++---
   src/gallium/state_trackers/clover/core/device.cpp | 6 ++
   src/gallium/state_trackers/clover/core/device.hpp | 1 +
   src/gallium/state_trackers/clover/core/program.cpp| 5 -
   src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
   5 files changed, 14 insertions(+), 5 deletions(-)
 
  diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
  b/src/gallium/state_trackers/clover/api/device.cpp
  index 7006702..1176668 100644
  --- a/src/gallium/state_trackers/clover/api/device.cpp
  +++ b/src/gallium/state_trackers/clover/api/device.cpp
  @@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
  param,
 break;
   
  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
  -  buf.as_scalarcl_uint() = 2;
  +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
 break;
   
  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
  @@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
  param,
 break;
   
  case CL_DEVICE_EXTENSIONS:
  -  buf.as_string() = ;
  +  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
 break;
   
  case CL_DEVICE_PLATFORM:
  @@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
  param,
 break;
   
  case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
  -  buf.as_scalarcl_uint() = 2;
  +  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
 break;
   
  case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
  diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
  b/src/gallium/state_trackers/clover/core/device.cpp
  index bc6b761..6bf33e0 100644
  --- a/src/gallium/state_trackers/clover/core/device.cpp
  +++ b/src/gallium/state_trackers/clover/core/device.cpp
  @@ -193,6 +193,12 @@ device::half_fp_config() const {
  return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
   }
   
  +bool
  +device::has_doubles() const {
  +   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
  + PIPE_SHADER_CAP_DOUBLES);
  +}
  +
   std::vectorsize_t
   device::max_block_size() const {
  auto v = get_compute_paramuint64_t(pipe, 
  PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
  diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
  b/src/gallium/state_trackers/clover/core/device.hpp
  index 16831ab..025c648 100644
  --- a/src/gallium/state_trackers/clover/core/device.hpp
  +++ b/src/gallium/state_trackers/clover/core/device.hpp
  @@ -66,6 +66,7 @@ namespace clover {
 cl_device_fp_config single_fp_config() const;
 cl_device_fp_config double_fp_config() const;
 cl_device_fp_config half_fp_config() const;
  +  bool has_doubles() const;
   
 std::vectorsize_t max_block_size() const;
 std::string device_name() const;
  diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
  b/src/gallium/state_trackers/clover/core/program.cpp
  index e09c3aa..f65f321 100644
  --- a/src/gallium/state_trackers/clover/core/program.cpp
  +++ b/src/gallium/state_trackers/clover/core/program.cpp
  @@ -95,7 +95,10 @@ program::build_status(const device dev) const {
   
   std::string
   program::build_opts(const device dev) const {
  -   return _opts.count(dev) ? _opts.find(dev)-second : ;
  +   std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ;
  +   if (dev.has_doubles())
  +  opts.append( -Dcl_khr_fp64);
  +   return opts;
 
 This define belongs in the target-specific part of libclc.  With this
 hunk removed this patch is:
 

The declarations for double functions in the libclc headers are wrapped in this
macro, so we need to set it here in order to be able to use them from clover.

-Tom

 Reviewed-by: Francisco Jerez curroje...@riseup.net
 
   }
   
   std::string
  diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
  b/src/gallium/state_trackers/clover/llvm/invocation.cpp
  index 5d2efc4..f2b4fd9 100644
  --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
  +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
  @@ -183,7 +183,6 @@ namespace {
   
 // clc.h requires that this macro be defined:
 
  c.getPreprocessorOpts().addMacroDef(cl_clang_storage_class_specifiers);
  -  c.getPreprocessorOpts().addMacroDef(cl_khr_fp64);
   
 c.getLangOpts().NoBuiltin = true;
 c.getTargetOpts().Triple = triple;
  -- 
  1.8.1.5
 
  ___
  mesa-dev mailing list
  mesa

[Mesa-dev] [PATCH 2/2] clover: Enable cl_khr_fp64 for devices that support doubles v3

2014-07-02 Thread Tom Stellard
v2:
  - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
  - Only define cl_khr_fp64 if the extension is supported.
  - Remove trailing space from extension string.
  - Rename device query function from cl_khr_fp86() to
has_doubles().

v3:
  - Return 0 for device::doubled_fp_confg() when doubles aren't
supported.
---
 src/gallium/state_trackers/clover/api/device.cpp| 10 +++---
 src/gallium/state_trackers/clover/core/device.cpp   | 21 +
 src/gallium/state_trackers/clover/core/device.hpp   |  2 ++
 src/gallium/state_trackers/clover/core/program.cpp  |  5 -
 .../state_trackers/clover/llvm/invocation.cpp   |  1 -
 5 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 3b91e9e..5427492 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalarcl_uint() = 2;
+  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
@@ -204,6 +204,10 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   buf.as_scalarcl_device_fp_config() = dev.single_fp_config();
   break;
 
+   case CL_DEVICE_DOUBLE_FP_CONFIG:
+  buf.as_scalarcl_device_fp_config() = dev.double_fp_config();
+  break;
+
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
   buf.as_scalarcl_device_mem_cache_type() = CL_NONE;
   break;
@@ -282,7 +286,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = ;
+  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
   break;
 
case CL_DEVICE_PLATFORM:
@@ -314,7 +318,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalarcl_uint() = 2;
+  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 498e7d9..cfbe95a 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -175,6 +175,27 @@ device::single_fp_config() const {
return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
 }
 
+cl_device_fp_config
+device::double_fp_config() const {
+   if (!has_doubles())
+  return 0;
+
+   // TODO: Get these from somewhere. This is the mandated minimum double
+   // precision floating-point capability
+   return CL_FP_FMA
+   | CL_FP_ROUND_TO_NEAREST
+   | CL_FP_ROUND_TO_ZERO
+   | CL_FP_ROUND_TO_INF
+   | CL_FP_INF_NAN
+   | CL_FP_DENORM;
+}
+
+bool
+device::has_doubles() const {
+   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 93f9091..a1899bc 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -64,6 +64,8 @@ namespace clover {
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
   cl_device_fp_config single_fp_config() const;
+  cl_device_fp_config double_fp_config() const;
+  bool has_doubles() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
b/src/gallium/state_trackers/clover/core/program.cpp
index e09c3aa..f65f321 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -95,7 +95,10 @@ program::build_status(const device dev) const {
 
 std::string
 program::build_opts(const device dev) const {
-   return _opts.count(dev) ? _opts.find(dev)-second : ;
+   std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ;
+   if (dev.has_doubles())
+  opts.append( -Dcl_khr_fp64);
+   return opts;
 }
 
 std::string
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 5d2efc4..f2b4fd9 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -183,7 +183,6 @@ namespace {
 
   // clc.h requires that this macro be defined:
   

[Mesa-dev] [PATCH 1/2] clover: Report a default value for CL_DEVICE_SINGLE_FP_CONFIG

2014-07-02 Thread Tom Stellard
---
 src/gallium/state_trackers/clover/api/device.cpp  | 3 +--
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 97b2cf9..3b91e9e 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -201,8 +201,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_SINGLE_FP_CONFIG:
-  buf.as_scalarcl_device_fp_config() =
- CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+  buf.as_scalarcl_device_fp_config() = dev.single_fp_config();
   break;
 
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index b6078db..498e7d9 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -169,6 +169,12 @@ device::max_compute_units() const {
   PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
 }
 
+cl_device_fp_config
+device::single_fp_config() const {
+   // TODO: Get these from somewhere.
+   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 731c31e..93f9091 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -63,6 +63,7 @@ namespace clover {
   cl_ulong max_mem_alloc_size() const;
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
+  cl_device_fp_config single_fp_config() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] gallium: add PIPE_BIND_COMMAND_ARGS_BUFFER

2014-06-27 Thread Tom Stellard
On Tue, Jun 17, 2014 at 01:51:10AM +0200, Marek Olšák wrote:
 Since LLVM 3.5 will be released in August and my radeon patches adding

Which LLVM patches are required for ARB_draw_indirect?

-Tom

 ARB_draw_indirect depend on it, I will commit ARB_draw_indirect
 support for Gallium with softpipe and llvmpipe support earlier. My
 plan is for patches 3,4,5,6 to get committed in one week from now, or
 sooner if somebody reviews them.
 
 Marek
 
 On Sat, Apr 26, 2014 at 3:27 PM, Marek Olšák mar...@gmail.com wrote:
  From: Christoph Bumiller e0425...@student.tuwien.ac.at
 
  Intended for use with GL_ARB_draw_indirect's DRAW_INDIRECT_BUFFER
  target or for D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS.
  ---
   src/gallium/docs/source/screen.rst   | 3 +++
   src/gallium/include/pipe/p_defines.h | 1 +
   2 files changed, 4 insertions(+)
 
  diff --git a/src/gallium/docs/source/screen.rst 
  b/src/gallium/docs/source/screen.rst
  index 89cbdbf..65885b9 100644
  --- a/src/gallium/docs/source/screen.rst
  +++ b/src/gallium/docs/source/screen.rst
  @@ -354,6 +354,9 @@ resources might be created and handled quite 
  differently.
 bound to the graphics pipeline as a shader resource.
   * ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be
 bound to the compute program as a shader resource.
  +* ``PIPE_BIND_COMMAND_ARGS_BUFFER``: A buffer that may be sourced by the
  +  GPU command processor. It can contain, for example, the arguments to
  +  indirect draw calls.
 
   .. _pipe_usage:
 
  diff --git a/src/gallium/include/pipe/p_defines.h 
  b/src/gallium/include/pipe/p_defines.h
  index a3a1ae1..4d5d833 100644
  --- a/src/gallium/include/pipe/p_defines.h
  +++ b/src/gallium/include/pipe/p_defines.h
  @@ -350,6 +350,7 @@ enum pipe_flush_flags {
   #define PIPE_BIND_GLOBAL   (1  18) /* set_global_binding */
   #define PIPE_BIND_SHADER_RESOURCE  (1  19) /* set_shader_resources */
   #define PIPE_BIND_COMPUTE_RESOURCE (1  20) /* set_compute_resources 
  */
  +#define PIPE_BIND_COMMAND_ARGS_BUFFER  (1  21) /* 
  pipe_draw_info.indirect */
 
   /* The first two flags above were previously part of the amorphous
* TEXTURE_USAGE, most of which are now descriptions of the ways a
  --
  1.8.3.2
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles v2

2014-06-25 Thread Tom Stellard
v2:
  - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
  - Only define cl_khr_fp64 if the extension is supported.
  - Remove trailing space from extension string.
  - Rename device query function from cl_khr_fp86() to has_doubles().
---
 src/gallium/state_trackers/clover/api/device.cpp  | 6 +++---
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 src/gallium/state_trackers/clover/core/program.cpp| 5 -
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 7006702..1176668 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalarcl_uint() = 2;
+  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
@@ -290,7 +290,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = ;
+  buf.as_string() = dev.has_doubles() ? cl_khr_fp64 : ;
   break;
 
case CL_DEVICE_PLATFORM:
@@ -322,7 +322,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalarcl_uint() = 2;
+  buf.as_scalarcl_uint() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index bc6b761..6bf33e0 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -193,6 +193,12 @@ device::half_fp_config() const {
return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
 }
 
+bool
+device::has_doubles() const {
+   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 16831ab..025c648 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -66,6 +66,7 @@ namespace clover {
   cl_device_fp_config single_fp_config() const;
   cl_device_fp_config double_fp_config() const;
   cl_device_fp_config half_fp_config() const;
+  bool has_doubles() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
b/src/gallium/state_trackers/clover/core/program.cpp
index e09c3aa..f65f321 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -95,7 +95,10 @@ program::build_status(const device dev) const {
 
 std::string
 program::build_opts(const device dev) const {
-   return _opts.count(dev) ? _opts.find(dev)-second : ;
+   std::string opts = _opts.count(dev) ? _opts.find(dev)-second : ;
+   if (dev.has_doubles())
+  opts.append( -Dcl_khr_fp64);
+   return opts;
 }
 
 std::string
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 5d2efc4..f2b4fd9 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -183,7 +183,6 @@ namespace {
 
   // clc.h requires that this macro be defined:
   c.getPreprocessorOpts().addMacroDef(cl_clang_storage_class_specifiers);
-  c.getPreprocessorOpts().addMacroDef(cl_khr_fp64);
 
   c.getLangOpts().NoBuiltin = true;
   c.getTargetOpts().Triple = triple;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] clover: Have compat::string allocate its own memory.

2014-06-25 Thread Tom Stellard
From: Francisco Jerez curroje...@riseup.net

---
 src/gallium/state_trackers/clover/api/kernel.cpp  | 4 +++-
 src/gallium/state_trackers/clover/util/compat.hpp | 8 
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
b/src/gallium/state_trackers/clover/api/kernel.cpp
index 96cf302..05cc392 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -58,7 +58,9 @@ clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
 
if (rd_kerns)
   copy(map([](const module::symbol sym) {
-   return desc(new kernel(prog, compat::string(sym.name),
+   return desc(new kernel(prog,
+  std::string(sym.name.begin(),
+  sym.name.end()),
   range(sym.args)));
 }, syms),
  rd_kerns);
diff --git a/src/gallium/state_trackers/clover/util/compat.hpp 
b/src/gallium/state_trackers/clover/util/compat.hpp
index e68d9df..28601e8 100644
--- a/src/gallium/state_trackers/clover/util/compat.hpp
+++ b/src/gallium/state_trackers/clover/util/compat.hpp
@@ -72,7 +72,7 @@ namespace clover {
  vector(const vector v) : p(alloc(v.n, v.p, v.n)), n(v.n) {
  }
 
- vector(iterator p, size_type n) : p(alloc(n, p, n)), n(n) {
+ vector(const_iterator p, size_type n) : p(alloc(n, p, n)), n(n) {
  }
 
  templatetypename C
@@ -263,13 +263,13 @@ namespace clover {
  size_t offset;
   };
 
-  class string : public vector_refconst char {
+  class string : public vectorchar {
   public:
- string(const char *p) : vector_ref(p, std::strlen(p)) {
+ string(const char *p) : vector(p, std::strlen(p)) {
  }
 
  templatetypename C
- string(const C v) : vector_ref(v) {
+ string(const C v) : vector(v) {
  }
 
  operator std::string() const {
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES

2014-06-25 Thread Tom Stellard
This is for reporting whether or not double precision floating-point
operations are supported.

Reviewed-by: Francisco Jerez curroje...@riseup.net
---
 src/gallium/auxiliary/gallivm/lp_bld_limits.h | 2 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.h| 2 ++
 src/gallium/docs/source/screen.rst| 2 ++
 src/gallium/drivers/radeonsi/si_pipe.c| 5 +
 src/gallium/include/pipe/p_defines.h  | 3 ++-
 5 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 6cb0949..9ccaf46 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -126,6 +126,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
   return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
   return 1;
+   case PIPE_SHADER_CAP_DOUBLES:
+  return 0;
}
/* if we get here, we missed a shader cap above (and should have seen
 * a compiler warning.)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index d53c4ba..56a7034 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -456,6 +456,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
   return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
   return 1;
+   case PIPE_SHADER_CAP_DOUBLES:
+  return 0;
}
/* if we get here, we missed a shader cap above (and should have seen
 * a compiler warning.)
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 1a80b04..9522090 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -282,6 +282,8 @@ to be 0.
   program.  It should be one of the ``pipe_shader_ir`` enum values.
 * ``PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS``: The maximum number of texture
   sampler views. Must not be lower than PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS.
+* ``PIPE_SHADER_CAP_DOUBLES``: Whether double precision floating-point
+  operations are supported.
 
 
 .. _pipe_compute_cap:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 07f4291..6263c70 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -325,6 +325,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, 
unsigned shader, enu
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_LLVM;
+   case PIPE_SHADER_CAP_DOUBLES:
+   return 0; /* XXX: Enable doubles once the compiler can
+handle them. */
default:
return 0;
}
@@ -376,6 +379,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, 
unsigned shader, enu
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_DOUBLES:
+   return 0;
}
return 0;
 }
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 90f6493..9a9963d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -617,7 +617,8 @@ enum pipe_shader_cap
PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS,
PIPE_SHADER_CAP_PREFERRED_IR,
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED,
-   PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS
+   PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS,
+   PIPE_SHADER_CAP_DOUBLES
 };
 
 /**
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] clover: Report default values for half and double fp configs v2

2014-06-25 Thread Tom Stellard
From: Matt Arsenault arse...@gmail.com

v2:
  -Fix indentation
---
 src/gallium/state_trackers/clover/api/device.cpp  | 11 +--
 src/gallium/state_trackers/clover/core/device.cpp | 24 +++
 src/gallium/state_trackers/clover/core/device.hpp |  3 +++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 97b2cf9..7006702 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -201,8 +201,15 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_SINGLE_FP_CONFIG:
-  buf.as_scalarcl_device_fp_config() =
- CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+  buf.as_scalarcl_device_fp_config() = dev.single_fp_config();
+  break;
+
+   case CL_DEVICE_DOUBLE_FP_CONFIG:
+  buf.as_scalarcl_device_fp_config() = dev.double_fp_config();
+  break;
+
+   case CL_DEVICE_HALF_FP_CONFIG:
+  buf.as_scalarcl_device_fp_config() = dev.half_fp_config();
   break;
 
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index b6078db..bc6b761 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -169,6 +169,30 @@ device::max_compute_units() const {
   PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
 }
 
+cl_device_fp_config
+device::single_fp_config() const {
+   // TODO: Get these from somewhere.
+   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+}
+
+cl_device_fp_config
+device::double_fp_config() const {
+   // TODO: Get these from somewhere. This is the mandated minimum double
+   // precision floating-point capability
+   return CL_FP_FMA
+   | CL_FP_ROUND_TO_NEAREST
+   | CL_FP_ROUND_TO_ZERO
+   | CL_FP_ROUND_TO_INF
+   | CL_FP_INF_NAN
+   | CL_FP_DENORM;
+}
+
+cl_device_fp_config
+device::half_fp_config() const {
+   // TODO: Get these from somewhere.
+   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 731c31e..16831ab 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -63,6 +63,9 @@ namespace clover {
   cl_ulong max_mem_alloc_size() const;
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
+  cl_device_fp_config single_fp_config() const;
+  cl_device_fp_config double_fp_config() const;
+  cl_device_fp_config half_fp_config() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] clover: Fix not setting build log if the build succeeds v2

2014-06-25 Thread Tom Stellard
From: Matt Arsenault arse...@gmail.com

If there were only warnings, they would not be added to the log.
Also fixes valgrind use after free errors.

v2:
  - Use compat::string.
---
 src/gallium/state_trackers/clover/core/compiler.hpp   |  3 ++-
 src/gallium/state_trackers/clover/core/error.hpp  |  4 ++--
 src/gallium/state_trackers/clover/core/program.cpp| 11 +++
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 16 ++--
 src/gallium/state_trackers/clover/util/compat.hpp |  3 +++
 5 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp 
b/src/gallium/state_trackers/clover/core/compiler.hpp
index 49cd022..6ef84d1 100644
--- a/src/gallium/state_trackers/clover/core/compiler.hpp
+++ b/src/gallium/state_trackers/clover/core/compiler.hpp
@@ -32,7 +32,8 @@ namespace clover {
module compile_program_llvm(const compat::string source,
pipe_shader_ir ir,
const compat::string target,
-   const compat::string opts);
+   const compat::string opts,
+   compat::string r_log);
 
module compile_program_tgsi(const compat::string source);
 }
diff --git a/src/gallium/state_trackers/clover/core/error.hpp 
b/src/gallium/state_trackers/clover/core/error.hpp
index 28459f3..cecbe9b 100644
--- a/src/gallium/state_trackers/clover/core/error.hpp
+++ b/src/gallium/state_trackers/clover/core/error.hpp
@@ -66,8 +66,8 @@ namespace clover {
 
class build_error : public error {
public:
-  build_error(const compat::string log) :
- error(CL_BUILD_PROGRAM_FAILURE, log) {
+  build_error(const compat::string what = ) :
+ error(CL_BUILD_PROGRAM_FAILURE, what) {
   }
};
 
diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
b/src/gallium/state_trackers/clover/core/program.cpp
index 3aaa652..e09c3aa 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -52,15 +52,18 @@ program::build(const ref_vectordevice devs, const char 
*opts) {
 
  _opts.insert({ dev, opts });
 
+ compat::string log;
+
  try {
 auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
compile_program_tgsi(_source) :
compile_program_llvm(_source, dev.ir_format(),
-dev.ir_target(), 
build_opts(dev)));
+dev.ir_target(), 
build_opts(dev),
+log));
 _binaries.insert({ dev, module });
-
- } catch (build_error e) {
-_logs.insert({ dev, e.what() });
+_logs.insert({ dev, std::string(log.c_str()) });
+ } catch (const build_error ) {
+_logs.insert({ dev, std::string(log.c_str()) });
 throw;
  }
   }
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index c3daa81..5d2efc4 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -120,7 +120,7 @@ namespace {
compile(llvm::LLVMContext llvm_ctx, const std::string source,
const std::string name, const std::string triple,
const std::string processor, const std::string opts,
-   clang::LangAS::Map address_spaces) {
+   clang::LangAS::Map address_spaces, compat::string r_log) {
 
   clang::CompilerInstance c;
   clang::EmitLLVMOnlyAction act(llvm_ctx);
@@ -224,11 +224,14 @@ namespace {
   c.getCodeGenOpts().LinkBitcodeFile = libclc_path;
 
   // Compile the code
-  if (!c.ExecuteAction(act))
- throw build_error(log);
+  bool ExecSuccess = c.ExecuteAction(act);
+  r_log = log;
+
+  if (!ExecSuccess)
+ throw build_error();
 
   // Get address spaces map to be able to find kernel argument address 
space
-  memcpy(address_spaces, c.getTarget().getAddressSpaceMap(), 
+  memcpy(address_spaces, c.getTarget().getAddressSpaceMap(),
 
sizeof(address_spaces));
 
   return act.takeModule();
@@ -391,7 +394,8 @@ module
 clover::compile_program_llvm(const compat::string source,
  enum pipe_shader_ir ir,
  const compat::string target,
- const compat::string opts) {
+ const compat::string opts,
+ compat::string r_log) {
 
std::vectorllvm::Function * kernels;
size_t processor_str_len = std::string(target.begin()).find_first_of(-);
@@ -405,7 +409,7 @@ clover::compile_program_llvm(const compat::string source,
// The input file 

Re: [Mesa-dev] [PATCH 1/1] r600: Fix use after free in compute_memory_promote_item.

2014-06-24 Thread Tom Stellard
On Tue, Jun 24, 2014 at 08:39:28AM -0400, Jan Vesely wrote:
 On Mon, 2014-06-23 at 17:15 +0200, Bruno Jimenez wrote:
  On Mon, 2014-06-23 at 10:39 -0400, Jan Vesely wrote:
   The dst pointer needs to be initialized after any calls to
compute_memory_grow_pool, as the function might change the pool-vbo 
   pointer.
  
  You are completely right. Good catch.
  
  Reviewed-by: Bruno Jiménez brunoji...@gmail.com
 
 Thanks for review, now we just need a volunteer to push this...
 

I just pushed this patch, thanks!

-Tom

  Sorry for any inconvenience this may have caused.
 
 no worries, none caused
 
  
   This fixes crashes and assertion failures in two gegl tests.
   
   Signed-off-by: Jan Vesely jan.ves...@rutgers.edu
   CC: Bruno Jimenez brunoji...@gmail.com
   CC: Tom Stellard thomas.stell...@amd.com
   ---
src/gallium/drivers/r600/compute_memory_pool.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
   
   diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
   b/src/gallium/drivers/r600/compute_memory_pool.c
   index a78ff1e..9cb16f8 100644
   --- a/src/gallium/drivers/r600/compute_memory_pool.c
   +++ b/src/gallium/drivers/r600/compute_memory_pool.c
   @@ -308,8 +308,8 @@ int compute_memory_promote_item(struct 
   compute_memory_pool *pool,
{
 struct pipe_screen *screen = (struct pipe_screen *)pool-screen;
 struct r600_context *rctx = (struct r600_context *)pipe;
   - struct pipe_resource *dst = (struct pipe_resource *)pool-bo;
 struct pipe_resource *src = (struct pipe_resource *)item-real_buffer;
   + struct pipe_resource *dst = NULL;
 struct pipe_box box;

 struct list_head *pos;
   @@ -339,6 +339,7 @@ int compute_memory_promote_item(struct 
   compute_memory_pool *pool,
 if (err == -1)
 return -1;
 }
   + dst = (struct pipe_resource *)pool-bo;
 COMPUTE_DBG(pool-screen,   + Found space for Item %p id = %u 
 start_in_dw = %u (%u bytes) size_in_dw = %u (%u 
   bytes)\n,
 item, item-id, start_in_dw, start_in_dw * 4,
  
  
 
 -- 
 Jan Vesely jan.ves...@rutgers.edu



 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/llvm: Use the llvm.rsq.clamped intrinsic for RSQ

2014-06-24 Thread Tom Stellard
https://bugs.freedesktop.org/show_bug.cgi?id=80015

CC: 10.1 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 217fa32..119e613 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1385,7 +1385,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx)
 
bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem;
 #if HAVE_LLVM = 0x0305
-   bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.;
+   bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.clamped.f32;
 #else
bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq;
 #endif
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] clover: Fix not setting build log if the build succeeds.

2014-06-23 Thread Tom Stellard
On Sat, Jun 21, 2014 at 06:33:17PM +0200, Francisco Jerez wrote:
 Tom Stellard thomas.stell...@amd.com writes:
 
  From: Matt Arsenault arse...@gmail.com
 
  If there were only warnings, they would not be added to the log.
  Also fixes valgrind use after free errors.
  ---
   src/gallium/state_trackers/clover/core/compiler.hpp   |  3 ++-
   src/gallium/state_trackers/clover/core/error.hpp  |  2 +-
   src/gallium/state_trackers/clover/core/program.cpp| 11 +++
   src/gallium/state_trackers/clover/llvm/invocation.cpp | 14 +++---
   4 files changed, 17 insertions(+), 13 deletions(-)
 
  diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp 
  b/src/gallium/state_trackers/clover/core/compiler.hpp
  index 49cd022..3ce132f 100644
  --- a/src/gallium/state_trackers/clover/core/compiler.hpp
  +++ b/src/gallium/state_trackers/clover/core/compiler.hpp
  @@ -32,7 +32,8 @@ namespace clover {
  module compile_program_llvm(const compat::string source,
  pipe_shader_ir ir,
  const compat::string target,
  -   const compat::string opts);
  +   const compat::string opts,
  +   std::string log_out);
   
 
 This doesn't work.  I'm afraid you need to use compat::string on the
 compiler interface because the C++98 and C++11 versions of std::string
 are not guaranteed to be binary compatible.  This mess will go away once
 we can drop support for the non-C++11 versions of LLVM.  Have a look at
 the attached patch for the memory management issues with compat::string.
 

Even with your patch, I'm still having trouble getting this to work.  What
is the correct pattern here?  I know I need to use compat::string in the 
function
signature, but what type should I pass to the compile_program_llvm() function 
from
program::build()?  A std::string a compat::string, something else?

-Tom

 And maybe rename the output argument to r_log as is usual everywhere
 else in clover?
 
  module compile_program_tgsi(const compat::string source);
   }
  diff --git a/src/gallium/state_trackers/clover/core/error.hpp 
  b/src/gallium/state_trackers/clover/core/error.hpp
  index 28459f3..9802195 100644
  --- a/src/gallium/state_trackers/clover/core/error.hpp
  +++ b/src/gallium/state_trackers/clover/core/error.hpp
  @@ -66,7 +66,7 @@ namespace clover {
   
  class build_error : public error {
  public:
  -  build_error(const compat::string log) :
  +  build_error(const compat::string log = ) :
 
 Can you rename the argument to what as it's no longer going to hold
 the compilation log?
 
error(CL_BUILD_PROGRAM_FAILURE, log) {
 }
  };
  diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
  b/src/gallium/state_trackers/clover/core/program.cpp
  index 3aaa652..91ee553 100644
  --- a/src/gallium/state_trackers/clover/core/program.cpp
  +++ b/src/gallium/state_trackers/clover/core/program.cpp
  @@ -52,15 +52,18 @@ program::build(const ref_vectordevice devs, const 
  char *opts) {
   
_opts.insert({ dev, opts });
   
  + std::string build_log;
  +
try {
   auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
  compile_program_tgsi(_source) :
  compile_program_llvm(_source, dev.ir_format(),
  -dev.ir_target(), 
  build_opts(dev)));
  +dev.ir_target(), 
  build_opts(dev),
  +build_log));
   _binaries.insert({ dev, module });
  -
  - } catch (build_error e) {
  -_logs.insert({ dev, e.what() });
  +_logs.insert({ dev, build_log });
  + } catch (const build_error ) {
  +_logs.insert({ dev, build_log });
   throw;
}
 }
  diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
  b/src/gallium/state_trackers/clover/llvm/invocation.cpp
  index 48810bd..0dc1f50 100644
  --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
  +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
  @@ -120,12 +120,11 @@ namespace {
  compile(llvm::LLVMContext llvm_ctx, const std::string source,
  const std::string name, const std::string triple,
  const std::string processor, const std::string opts,
  -   clang::LangAS::Map address_spaces) {
  +   clang::LangAS::Map address_spaces, std::string log_out) {
   
 clang::CompilerInstance c;
 clang::EmitLLVMOnlyAction act(llvm_ctx);
  -  std::string log;
  -  llvm::raw_string_ostream s_log(log);
  +  llvm::raw_string_ostream s_log(log_out);
 std::string libclc_path = LIBCLC_LIBEXECDIR + processor + -
 + triple + .bc

Re: [Mesa-dev] [PATCH 00/11] [RFC v2] Solve the mapping bug

2014-06-23 Thread Tom Stellard
On Sun, Jun 22, 2014 at 04:05:49PM +0200, Francisco Jerez wrote:
 Bruno Jimenez brunoji...@gmail.com writes:
 
  On Sat, 2014-06-21 at 17:39 +0200, Francisco Jerez wrote:
 [...]
  The implementation of PIPE_TRANSFER_MAP_DIRECTLY introduced in PATCH 10
  has somewhat worrying semantics: A mapping with this flag might become
  stale unpredictably if a kernel is run, maybe from a different command
  queue.  Clover's transfer functions don't hit that path right now on
  single-threaded applications, but they might in the future as we start
  accelerating the APIs we currently implement with soft_copy_op().  This
  is a bug IMHO: even direct mappings should last until the corresponding
  unmap call.
 
  I think I'm not fully understanding you here. I tried to use
  PIPE_TRANSFER_MAP_DIRECTLY only with clEnqueue{Write,Read} functions,
  which map the resource, copy it and unmap it when finished. Is it
  possible for another kernel to access the memory of a buffer that is
  being read/written?
 
 AFAICT, yes.  All command queues created on the same device share the
 same memory pool, so a kernel being queued for execution in one could
 invalidate a concurrent mapping done with PIPE_TRANSFER_MAP_DIRECTLY by
 one of the transfer functions.  On top of that the transfer functions
 might start queuing kernels themselves in the future to accelerate
 certain operations we currently do on the CPU, which would make this
 scenario more likely.
 
  I had no intention of having user mappings made with that flag.
  [Although a possible solution, with a lot of warnings of course, for the
  avobe problem could be to allow a user to use this flag]
 
  I'm not advocating a revert of the series because it fixes a serious
  bug, but please don't push patches 10-11, we should probably start
  looking for a different solution.  Some suggestions are:
 
  I also asked for them to not to be pushed. And with your reasons, until
  we find a better way or we change how buffers are handled, I won't
  propose them again.
 
   - Why do you even need a pool?  Wouldn't it be possible to create a
 huge RAT, e.g. covering a 4GB portion of the GPU memory and then use
 a special memory domain or some sort of flag to tell the kernel to
 allocate a buffer from that region (or relocate if it's already been
 allocated elsewhere)?  This is especially easy on hardware with
 virtual memory, as you could simply reserve an arbitrarily large
 block of virtual memory, bind it as e.g. RAT0, and then map other
 buffer objects into the block on-demand as they're bound to the
 compute pipeline -- There would be no need to move the actual bits
 around.  This is similar to the approach I used in my original
 proof-of-concept implementation of the compute API on nv50.
 
  This is one of the things I have been wondering recently, given that
  radeonsi doesn't use a pool, why r600 needs one? I still have to
  understand AMD docs and how *exactly* everything works.
 
 
 Probably because on SI compute kernels can access random locations of
 memory without going through an RAT?  I have little actual experience
 with radeons, Tom should know the low-level details.


The reason there is no memory pool in radeonsi is because SI and newer support
virtual memory, so there is already one contiguous address space and also
because there is no limit to the number of resources that can be accessed by
a shader.

-Tom
 
  4GB seems like a big amount of memory for me, my little cedar has only
  512MB :)
 
   - If you insist on using a pool, you could (mostly) avoid the storage
 duplication and the mapping copies by allocating buffer objects
 directly from the pool as it was before this series, and then keep
 some sort of reference count specific to the pool storage that would
 be incremented on map and decremented on unmap.  Once you need to
 grow the pool you'd keep the old storage around temporarily and
 migrate buffers to the new storage lazily as they are required or
 unmapped.  Once the reference count drops to zero you'd be free to
 release the backing BO to the system.  The fact that you'd keep both
 storage buffers around for a bit means that you'd be able to use DMA
 to migrate the pool contents instead of the CPU copies you're doing
 now, which is likely to be substantially more efficient.
 
  I see how this would solve the slow mappings problem, but I think that
  it could mean a higher memory usage. In the case of a user creating some
  buffers, mapping one of them and them adding more so that the pool has
  to grow, we would have to keep the full size of the old pool just for a
  buffer, plus the new pool.
 
 
 That's a fair point, this solution would only get rid of the extra
 copying but it wouldn't solve memory usage problem in some situations
 (long-lived mappings).  IMHO the former is more worrying because it has
 an impact on every map operation no matter what, while the increased
 

Re: [Mesa-dev] [PATCH 00/11] [RFC v2] Solve the mapping bug

2014-06-20 Thread Tom Stellard
On Wed, Jun 18, 2014 at 05:01:50PM +0200, Bruno Jiménez wrote:
 Hi,
 
 This is my second attempt to fix the mapping bug adding all the
 suggestions that Tom Stellard sent, and, so far, it seems that
 it is resolved.
 
 This series changes completely how OpenCL buffers are handled
 by the r600g driver. Before this, we would add them directly to
 a pool, and this pool would grow whenever we needed more space.
 But this process implied destroying the pool and creating a new
 one. There could be cases where a buffer would be mapped and
 the pool would grow, leaving one side of the mapping pointed
 to where the item was. This is the 'mapping bug'
 
 Now, Items will have an intermediate resource, where all mappings
 can be done, and when a buffer is going to be used with a kernel
 it is promoted to the pool. In the case where a promoted item
 is going to be mapped, it is previously demoted, so even if
 the pool changes its location due to growing, the map remains
 valid. In the case of a buffer mapped for reading, and used
 by a kernel to read from it, we will duplicate this buffer,
 having the intermediate buffer, where the user has its map, and
 an item in the pool, which is the one that the kernel is going
 to use.


I've just pushed patches 1-9.  Nice work!

-Tom

 As a summary for v2:
 Patches 1-8: These are the main part of the series, and solve
 the mapping bug.
 Patches 1 and 7 now use less explicit castings
 Patch 2 is new and introduces the 'is_item_in_pool'
 function, which is used in patches 3 and 8
 
 Patch 9: Is a complete rewrite of v1 patch 8 using gallium
 utils for double lists
 
 Patches 10 and 11: These are just a proof of concept for avoiding
 transfers GPU - GPU when using all CL Read/Write functions.
 They are v1 patch 9 splited in two to separate r600g changes
 from clover changes.
 Now, in clover's side it introduces and uses
 'CLOVER_TRANSFER_MAP_DIRECTLY' so it doesen't collide with
 any other OpenCL flag.
 
 Please review and Thanks :)
 
 Bruno Jiménez (11):
   r600g/compute: Add an intermediate resource for OpenCL buffers
   r600g/compute: Add an util function to know if an item is in the pool
   r600g/compute: Add statuses to the compute_memory_items
   r600g/compute: divide the item list in two
   r600g/compute: Only move to the pool the buffers marked for promoting
   r600g/compute: Avoid problems when promoting items mapped for reading
   r600g/compute: Implement compute_memory_demote_item
   r600g/compute: Map only against intermediate buffers
   r600g/compute: Use gallium util functions for double lists
   r600g/compute: Map directly the pool in some cases
   clover: Use PIPE_TRANSFER_MAP_DIRECTLY when writing/reading buffers
 
  src/gallium/drivers/r600/compute_memory_pool.c | 294 
 -
  src/gallium/drivers/r600/compute_memory_pool.h |  31 ++-
  src/gallium/drivers/r600/evergreen_compute.c   |  38 ++-
  src/gallium/state_trackers/clover/api/transfer.cpp |   4 +-
  src/gallium/state_trackers/clover/core/object.hpp  |   4 +
  .../state_trackers/clover/core/resource.cpp|   2 +
  6 files changed, 233 insertions(+), 140 deletions(-)
 
 -- 
 2.0.0
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: Fix build after LLVM commit 211259

2014-06-20 Thread Tom Stellard
On Fri, Jun 20, 2014 at 07:14:43PM -0500, Aaron Watry wrote:
 Signed-off-by: Aaron Watry awa...@gmail.com
Reviewed-by: Tom Stellard thomas.stell...@amd.com
 ---
  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
 b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 index df26883..413a0c2 100644
 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
 @@ -51,7 +51,9 @@
  #include llvm/MC/MCInstPrinter.h
  #include llvm/MC/MCRegisterInfo.h
  
 -#if HAVE_LLVM = 0x0303
 +#if HAVE_LLVM = 0x0305
 +#define OwningPtr std::unique_ptr
 +#elif HAVE_LLVM = 0x0303
  #include llvm/ADT/OwningPtr.h
  #endif
  
 -- 
 1.9.1
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/llvm: Adapt to AMDGPU.rsq intrinsic change in LLVM 3.5

2014-06-19 Thread Tom Stellard
On Thu, Jun 19, 2014 at 03:53:42PM +0900, Michel Dänzer wrote:
 From: Michel Dänzer michel.daen...@amd.com
 

I just pushed this patch to fix the regressions.  We can update the
other intrinsics in a follow on patch.

We also need to change RSQ to use llvm.AMDGPU.rsq.clamped once
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140616/80.html
is committed in order to fix
https://bugs.freedesktop.org/show_bug.cgi?id=80015

Also, I forgot to do this before I committed it, but I think this patch
should go to stable.

-Tom

 Signed-off-by: Michel Dänzer michel.daen...@amd.com
 ---
  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 4 
  1 file changed, 4 insertions(+)
 
 diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
 b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 index f8be0df..217fa32 100644
 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 @@ -1384,7 +1384,11 @@ void radeon_llvm_context_init(struct 
 radeon_llvm_context * ctx)
   bld_base-op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
  
   bld_base-rsq_action.emit = build_tgsi_intrinsic_nomem;
 +#if HAVE_LLVM = 0x0305
 + bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq.;
 +#else
   bld_base-rsq_action.intr_name = llvm.AMDGPU.rsq;
 +#endif
  }
  
  void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
 -- 
 2.0.0
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] r600: Fix possible endless loop in compute_memory_pool allocations.

2014-06-19 Thread Tom Stellard
On Thu, Jun 19, 2014 at 10:21:32AM -0400, Jan Vesely wrote:
 The important part is the change of the condition to = 0. Otherwise the loop
 gets stuck never actually growing the pool.
 
 The change in the aux-need calculation guarantees max 2 iterations, and
 avoids wasting memory in case a smaller item can't fit into a relatively 
 larger
 pool.


Does this patch obsolete the XXX comment around line 292 of this file?  If so,
we should remove it.

Also have tried this with patches 1-9 of this series:
http://lists.freedesktop.org/archives/mesa-dev/2014-June/061742.html

-Tom
 
 Signed-off-by: Jan Vesely jan.ves...@rutgers.edu
 CC: Bruno Jimenez brunoji...@gmail.com
 ---
 
 This fixes hang in gegl colors.xml test
 
  src/gallium/drivers/r600/compute_memory_pool.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index ec8c470..0b6d2da6 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -320,8 +320,11 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   int64_t need = item-size_in_dw+2048 -
   (pool-size_in_dw - allocated);
  
 - if (need  0) {
 - need = pool-size_in_dw / 10;
 + if (need = 0) {
 + /* There's enough free space, but it's too
 +  * fragmented. Assume half of the item can fit
 +  * int the last chunk */
 + need = (item-size_in_dw / 2) + ITEM_ALIGNMENT;
   }
  
   need = align(need, ITEM_ALIGNMENT);
 -- 
 1.9.3
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600: Handle failures in compute_memory_pool_finalize

2014-06-19 Thread Tom Stellard
On Thu, Jun 19, 2014 at 11:22:28AM -0400, Jan Vesely wrote:
 On Thu, 2014-06-19 at 17:12 +0200, Bruno Jimenez wrote:
  Hi,
  
  To which failure are you refering? Could you please send me a
  test/program that I can try to track this down?
 
 well, the compute_memory_finalize_pending() function can possibly return
 -1 so it's prudent to check for it.
 
 as for the testcase, I replaced the inside of 'if (need = 0)' in the
 previous patch with return -1 (to simulate failure). The I used GEGL
 test op colors.xml to trigger the situation.
 
 but gegl needs some extra patches to get working on current mesa/clover.
 I can send you log with R600_DEBUG=compute if it helps.
 

Have you ever looked into integrated the gegl tests with piglit, like we've
done for opencv.  This would make it much easier for other devs to execute
these tests.

-Tom

 
 regards,
 Jan
 
 
  
  Thanks!
  Bruno
  
  On Thu, 2014-06-19 at 10:21 -0400, Jan Vesely wrote:
   Signed-off-by: Jan Vesely jan.ves...@rutgers.edu
   CC: Bruno Jimenez brunoji...@gmail.com
   ---
   
   The failure now hits assertion compute_memory_pool.c:408, instead of
   u_inlines.h:275:pipe_buffer_map_range: Assertion `offset  buffer-width0'
   
src/gallium/drivers/r600/evergreen_compute.c | 9 +++--
1 file changed, 7 insertions(+), 2 deletions(-)
   
   diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
   b/src/gallium/drivers/r600/evergreen_compute.c
   index a2abf15..bd6e720 100644
   --- a/src/gallium/drivers/r600/evergreen_compute.c
   +++ b/src/gallium/drivers/r600/evergreen_compute.c
   @@ -659,7 +659,10 @@ static void evergreen_set_global_binding(
 return;
 }

   - compute_memory_finalize_pending(pool, ctx_);
   + if (compute_memory_finalize_pending(pool, ctx_) == -1) {
   + /* XXX: Unset */
   + return;
   + }

 for (int i = 0; i  n; i++)
 {
   @@ -967,7 +970,9 @@ void *r600_compute_global_transfer_map(
 %u (box.x)\n, buffer-chunk-id, box-x);

   
   - compute_memory_finalize_pending(pool, ctx_);
   + if (compute_memory_finalize_pending(pool, ctx_) == -1) {
   + return NULL;
   + }

 assert(resource-target == PIPE_BUFFER);
 assert(resource-bind  PIPE_BIND_GLOBAL);
  
  
 
 -- 
 Jan Vesely jan.ves...@rutgers.edu



 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/23] Megadrivers galore

2014-06-18 Thread Tom Stellard
On Tue, Jun 17, 2014 at 07:38:16PM +0100, Emil Velikov wrote:
 Hi all,
 
 As a follow up to the static/shared pipe-drivers series here is the final
 series (if anyone is interested I may take a look at egl + opencl) of
 refactoring the gallium dri targets into a single library/provider.
 

Hi Emil,

One common problem I run into when using the pipe drivers is if there
is an undefined symbol in the pipe_*.so then it will silently fail to
load, and with OpenCL for example it won't report any devices which can
be confusing for users.  I would recommend adding some error handling to
util_dlopen, so that it prints an error message when it fails to load a
shared object.

Other than that, it's hard to review a series like this, but I'll
give it an:

Acked-by: Tom Stellard thomas.stell...@amd.com

Since I like the general approach.

-Tom

 In a nutshell:
  - Convert one target per patch.
  - Merge the drm and sw backends of our dri state-tracker.
  - Adds __driDriverGetExtensions_$drivername symbol for each driver.
  - Megadrivers.
  - ***
  - Profit.
 
 Things works like a charm for nouveau and swrast, and testing on other
 platforms is greatly appreciated.
 
 The complete series can be found in the static-or-shared-pipe-drivers-v2
 branch at my github repo.
 
 I would like to get this reviewed/pushed over the next month, although
 that depends on the number of bugs that come up with the previous batch.
 
 As always comments, suggestions and flame is welcome.
 
 Cheers,
 Emil
 
 Emil Velikov (23):
   targets/dri-swrast: use drm aware dricommon when building more than 
 swrast
   st/dri: Allow separate dri-targets
   st/dri/drm: Add a second libdridrm library
   targets/dri-nouveau: Convert to static/shared pipe-drivers
   targets/(r300|r600|radeonsi)/dri: Convert to static/shared pipe-drivers
   targets/dri-freedreno: Convert to static/shared pipe-drivers
   targets/dri-i915: Convert to static/shared pipe-drivers
   targets/dri-ilo: Convert to static/shared pipe-driver
   targets/dri-vmwgfx: Convert to static/shared pipe-drivers
   st/dri: Remove the old libdridrm library
   targets/dri: Add __driDriverGetExtensions_nouveau symbol
   targets/dri: Add __driDriverGetExtensions_(r300|r600|radeonsi) symbols
   targets/dri: Add __driDriverGetExtensions_freedreno symbol
   targets/dri: Add __driDriverGetExtensions_i915 symbol
   targets/dri: Add __driDriverGetExtensions_i965 symbol
   targets/dri: Add __driDriverGetExtensions_vmwgfx
   targets/dri: update scons build to handle 
 __driDriverGetExtensions_vmwgfx
   targets/dri: cleanup conversion leftovers
   st/dri/drm: remove __driDriverExtensions and driDriverAPI
   scons: build and use a single dri_common library
   targets/dri-swrast: convert to gallium megadrivers :)
   st/dri: merge dri/drm and dri/sw backends
   targets/dri-swrast: Convert to static/shared pipe-driver
 
  61 files changed, 536 insertions(+), 1375 deletions(-)
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES

2014-06-18 Thread Tom Stellard
On Wed, Jun 18, 2014 at 09:54:28AM +1000, Dave Airlie wrote:
 On 18 June 2014 05:08, Roland Scheidegger srol...@vmware.com wrote:
  This looks ok to me though since tgsi currently doesn't have any double
  opcodes (well the docs have them...) it doesn't really apply to most
  drivers (at least I assume you don't want to add support for it for tgsi).
 
 I've mostly forward ported the old gallium double code, and have
 written most of ARB_gpu_shader_fp64 on top,
 
 Though the question I did want to ask Tom is if he is just going to
 expose hw that has doubles, or does
 he plan on emulating doubles.
 

My intention was that this CAP would be enabled for drivers with either
native double support or with double emulation implemented in the driver.
I'm not currently planning to implement double emulation, so I would
only enable this for hardware with native double support.

 For a lot of GLSL4.0 GPUs from AMD fglrx emulates doubles using
 massive magic shaders, I'm unsure
 if we should have a lowering pass above/below the TGSI line for these
 types of situations and what that
 would mean for this CAP.


If someone implemented double lowering above the TGSI line, then it
should only been done with drivers that report 0 for this CAP.

-Tom

 Dave.
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/radeon: Only print a message for LLVM diagnostic errors

2014-06-18 Thread Tom Stellard
We were printing messages for all diagnostic types, which was
spamming the console for some OpenCL programs.
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 891781f..6a394b2 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -87,11 +87,13 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef 
di, void *context) {
char *diaginfo_message;
 
diaginfo_message = LLVMGetDiagInfoDescription(di);
-   fprintf(stderr,LLVM triggered Diagnostic Handler: %s\n, 
diaginfo_message);
LLVMDisposeMessage(diaginfo_message);
 
diagnosticflag = (unsigned int *)context;
-   *diagnosticflag = ((LLVMDSError == LLVMGetDiagInfoSeverity(di)) ? 1 : 
0);
+   if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
+   *diagnosticflag = 1;
+   fprintf(stderr,LLVM triggered Diagnostic Handler: %s\n, 
diaginfo_message);
+   }
 }
 
 #endif
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] R600/SI: add Gather4 intrinsics (v2)

2014-06-17 Thread Tom Stellard
On Mon, Jun 16, 2014 at 09:19:59PM +0200, Marek Olšák wrote:
 Why are there SDNodes for the other sample intrinsics then?
 

The reason there are SDnodes for sample intrinsics is because at one point he 
had
to modify the type of the resource description and the easiest way to do that
was by replacing the intrinsic with an SDNode.

-Tom

 Marek
 
 On Mon, Jun 16, 2014 at 5:45 PM, Tom Stellard t...@stellard.net wrote:
  On Thu, Jun 12, 2014 at 02:11:10AM +0200, Marek Olšák wrote:
  From: Marek Olšák marek.ol...@amd.com
 
  This adds a new type of intrinsic and SDNode: SampleRaw.
  All fields of the MIMG opcodes are exposed and can be set by Mesa,
  even DMASK. All GATHER4 variants are added and there are a lot of them.
 
  v2: document DMASK behavior
  ---
   lib/Target/R600/AMDGPUISelLowering.cpp | 24 +
   lib/Target/R600/AMDGPUISelLowering.h   | 31 +++
   lib/Target/R600/SIISelLowering.cpp | 72 +
   lib/Target/R600/SIISelLowering.h   |  2 +
   lib/Target/R600/SIInstrInfo.td | 91 
  
   lib/Target/R600/SIInstructions.td  | 96 
  +-
   lib/Target/R600/SIIntrinsics.td| 48 +
   7 files changed, 340 insertions(+), 24 deletions(-)
 
  diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
  b/lib/Target/R600/AMDGPUISelLowering.cpp
  index 849f169..359161c 100644
  --- a/lib/Target/R600/AMDGPUISelLowering.cpp
  +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
  @@ -1542,6 +1542,30 @@ const char* 
  AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
 NODE_NAME_CASE(SAMPLEB)
 NODE_NAME_CASE(SAMPLED)
 NODE_NAME_CASE(SAMPLEL)
  +  NODE_NAME_CASE(GATHER4)
  +  NODE_NAME_CASE(GATHER4_CL)
  +  NODE_NAME_CASE(GATHER4_L)
  +  NODE_NAME_CASE(GATHER4_B)
  +  NODE_NAME_CASE(GATHER4_B_CL)
  +  NODE_NAME_CASE(GATHER4_LZ)
  +  NODE_NAME_CASE(GATHER4_C)
  +  NODE_NAME_CASE(GATHER4_C_CL)
  +  NODE_NAME_CASE(GATHER4_C_L)
  +  NODE_NAME_CASE(GATHER4_C_B)
  +  NODE_NAME_CASE(GATHER4_C_B_CL)
  +  NODE_NAME_CASE(GATHER4_C_LZ)
  +  NODE_NAME_CASE(GATHER4_O)
  +  NODE_NAME_CASE(GATHER4_CL_O)
  +  NODE_NAME_CASE(GATHER4_L_O)
  +  NODE_NAME_CASE(GATHER4_B_O)
  +  NODE_NAME_CASE(GATHER4_B_CL_O)
  +  NODE_NAME_CASE(GATHER4_LZ_O)
  +  NODE_NAME_CASE(GATHER4_C_O)
  +  NODE_NAME_CASE(GATHER4_C_CL_O)
  +  NODE_NAME_CASE(GATHER4_C_L_O)
  +  NODE_NAME_CASE(GATHER4_C_B_O)
  +  NODE_NAME_CASE(GATHER4_C_B_CL_O)
  +  NODE_NAME_CASE(GATHER4_C_LZ_O)
 
  You don't need to add new SDNodes for all these instructions, you can just 
  use
  the intrinsic directly in the pattern.
 
  The only reason to add SDNodes, is if there are optimizations / special 
  lowering
  we can do for these instructions.
 
 NODE_NAME_CASE(STORE_MSKOR)
 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
 }
  diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
  b/lib/Target/R600/AMDGPUISelLowering.h
  index d5d821d..a9af195 100644
  --- a/lib/Target/R600/AMDGPUISelLowering.h
  +++ b/lib/Target/R600/AMDGPUISelLowering.h
  @@ -203,6 +203,37 @@ enum {
 SAMPLEB,
 SAMPLED,
 SAMPLEL,
  +
  +  // Gather4 opcodes
  +  GATHER4,
  +  GATHER4_CL,
  +  GATHER4_L,
  +  GATHER4_B,
  +  GATHER4_B_CL,
  +  GATHER4_LZ,
  +
  +  GATHER4_C,
  +  GATHER4_C_CL,
  +  GATHER4_C_L,
  +  GATHER4_C_B,
  +  GATHER4_C_B_CL,
  +  GATHER4_C_LZ,
  +
  +  GATHER4_O,
  +  GATHER4_CL_O,
  +  GATHER4_L_O,
  +  GATHER4_B_O,
  +  GATHER4_B_CL_O,
  +  GATHER4_LZ_O,
  +
  +  GATHER4_C_O,
  +  GATHER4_C_CL_O,
  +  GATHER4_C_L_O,
  +  GATHER4_C_B_O,
  +  GATHER4_C_B_CL_O,
  +  GATHER4_C_LZ_O,
  +
  +  // Nemory opcodes
 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
 STORE_MSKOR,
 LOAD_CONSTANT,
  diff --git a/lib/Target/R600/SIISelLowering.cpp 
  b/lib/Target/R600/SIISelLowering.cpp
  index 1a861d4..909255d 100644
  --- a/lib/Target/R600/SIISelLowering.cpp
  +++ b/lib/Target/R600/SIISelLowering.cpp
  @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
  SelectionDAG DAG) const {
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
  +
  +// Gather4 intrinsics
  +case AMDGPUIntrinsic::SI_gather4:
  +  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
  +case AMDGPUIntrinsic::SI_gather4_cl:
  +  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
  +case AMDGPUIntrinsic::SI_gather4_l:
  +  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
  +case AMDGPUIntrinsic::SI_gather4_b:
  +  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
  +case AMDGPUIntrinsic::SI_gather4_b_cl:
  +  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
  +case AMDGPUIntrinsic::SI_gather4_lz:
  +  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
  +
  +case AMDGPUIntrinsic::SI_gather4_c:
  +  return

[Mesa-dev] [PATCH 4/5] gallium: Add PIPE_SHADER_CAP_DOUBLES

2014-06-17 Thread Tom Stellard
This is for reporting whether or not double precision floating-point
operations are supported.
---
 src/gallium/auxiliary/gallivm/lp_bld_limits.h | 2 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.h| 2 ++
 src/gallium/docs/source/screen.rst| 2 ++
 src/gallium/drivers/r300/r300_screen.c| 4 
 src/gallium/drivers/r600/r600_pipe.c  | 2 ++
 src/gallium/drivers/radeonsi/si_pipe.c| 5 +
 src/gallium/drivers/svga/svga_screen.c| 4 
 src/gallium/include/pipe/p_defines.h  | 3 ++-
 8 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 6cb0949..9ccaf46 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -126,6 +126,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
   return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
   return 1;
+   case PIPE_SHADER_CAP_DOUBLES:
+  return 0;
}
/* if we get here, we missed a shader cap above (and should have seen
 * a compiler warning.)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index d53c4ba..56a7034 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -456,6 +456,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
   return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
   return 1;
+   case PIPE_SHADER_CAP_DOUBLES:
+  return 0;
}
/* if we get here, we missed a shader cap above (and should have seen
 * a compiler warning.)
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index b8e356f..2867bfc 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -282,6 +282,8 @@ to be 0.
   program.  It should be one of the ``pipe_shader_ir`` enum values.
 * ``PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS``: The maximum number of texture
   sampler views. Must not be lower than PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS.
+* ``PIPE_SHADER_CAP_DOUBLES``: Whether double precision floating-point
+  operations are supported.
 
 
 .. _pipe_compute_cap:
diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index 82d30e7..e5ed59a 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -262,6 +262,8 @@ static int r300_get_shader_param(struct pipe_screen 
*pscreen, unsigned shader, e
 return 0;
 case PIPE_SHADER_CAP_PREFERRED_IR:
 return PIPE_SHADER_IR_TGSI;
+case PIPE_SHADER_CAP_DOUBLES:
+return 0;
 }
 break;
 case PIPE_SHADER_VERTEX:
@@ -313,6 +315,8 @@ static int r300_get_shader_param(struct pipe_screen 
*pscreen, unsigned shader, e
 return 0;
 case PIPE_SHADER_CAP_PREFERRED_IR:
 return PIPE_SHADER_IR_TGSI;
+case PIPE_SHADER_CAP_DOUBLES:
+return 0;
 }
 break;
 }
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 2b65056..24e3c1a 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -444,6 +444,8 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
} else {
return PIPE_SHADER_IR_TGSI;
}
+case PIPE_SHADER_CAP_DOUBLES:
+   return 0;
}
return 0;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 4b96f20..9eab1fe 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -321,6 +321,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, 
unsigned shader, enu
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_LLVM;
+   case PIPE_SHADER_CAP_DOUBLES:
+   return 0; /* TODO: Report doubles as supported once
+   * the compiler is ready. */
default:
return 0;
}
@@ -372,6 +375,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, 
unsigned shader, enu
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_DOUBLES:
+   return 0;
}
return 0;
 }
diff --git a/src/gallium/drivers/svga/svga_screen.c 
b/src/gallium/drivers/svga/svga_screen.c
index 4e1e331..89d3c49 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -347,6 +347,8 @@ static int svga_get_shader_param(struct pipe_screen 
*screen, unsigned shader, en
  return 16;
 

[Mesa-dev] [PATCH 3/5] clover: Report default values for half and double fp configs

2014-06-17 Thread Tom Stellard
From: Matt Arsenault arse...@gmail.com

---
 src/gallium/state_trackers/clover/api/device.cpp  | 11 +--
 src/gallium/state_trackers/clover/core/device.cpp | 24 +++
 src/gallium/state_trackers/clover/core/device.hpp |  3 +++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index 1bc2692..dc8e22c 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -201,8 +201,15 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_SINGLE_FP_CONFIG:
-  buf.as_scalarcl_device_fp_config() =
- CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+  buf.as_scalarcl_device_fp_config() = dev.single_fp_config();
+  break;
+
+   case CL_DEVICE_DOUBLE_FP_CONFIG:
+  buf.as_scalarcl_device_fp_config() = dev.double_fp_config();
+  break;
+
+   case CL_DEVICE_HALF_FP_CONFIG:
+  buf.as_scalarcl_device_fp_config() = dev.half_fp_config();
   break;
 
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index bc3e3e6..6d52dd4 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -163,6 +163,30 @@ device::max_clock_frequency() const {
   PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
 }
 
+cl_device_fp_config
+device::single_fp_config() const {
+   // TODO: Get these from somewhere.
+   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+}
+
+cl_device_fp_config
+device::double_fp_config() const {
+   // TODO: Get these from somewhere. This is the mandated minimum double
+   // precision floating-point capability
+return CL_FP_FMA
+   | CL_FP_ROUND_TO_NEAREST
+   | CL_FP_ROUND_TO_ZERO
+   | CL_FP_ROUND_TO_INF
+   | CL_FP_INF_NAN
+   | CL_FP_DENORM;
+}
+
+cl_device_fp_config
+device::half_fp_config() const {
+   // TODO: Get these from somewhere.
+   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 3662c6b..380029e 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -62,6 +62,9 @@ namespace clover {
   size_t max_threads_per_block() const;
   cl_ulong max_mem_alloc_size() const;
   cl_uint max_clock_frequency() const;
+  cl_device_fp_config single_fp_config() const;
+  cl_device_fp_config double_fp_config() const;
+  cl_device_fp_config half_fp_config() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] clover: Enable cl_khr_fp64 for devices that support doubles

2014-06-17 Thread Tom Stellard
---
 src/gallium/state_trackers/clover/api/device.cpp  | 4 +++-
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index dc8e22c..275542d 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -290,7 +290,9 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = ;
+  // The trailing space is intentional. It is a spec-ism that there is a
+  // trailing space at the end of the list of extensions.
+  buf.as_string() = dev.cl_khr_fp64() ? cl_khr_fp64  : ;
   break;
 
case CL_DEVICE_PLATFORM:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 6d52dd4..51b54fa 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -187,6 +187,12 @@ device::half_fp_config() const {
return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
 }
 
+bool
+device::cl_khr_fp64() const {
+   return pipe-get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vectorsize_t
 device::max_block_size() const {
auto v = get_compute_paramuint64_t(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 380029e..38bea54 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -65,6 +65,7 @@ namespace clover {
   cl_device_fp_config single_fp_config() const;
   cl_device_fp_config double_fp_config() const;
   cl_device_fp_config half_fp_config() const;
+  bool cl_khr_fp64() const;
 
   std::vectorsize_t max_block_size() const;
   std::string device_name() const;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] clover: Don't use llvm's global context

2014-06-17 Thread Tom Stellard
An LLVMContext should only be accessed by a single and using the global
context was causing crashes in multi-threaded environments.  Now we use
a separate context for each compile.
---
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 82e262f..48810bd 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -117,12 +117,13 @@ namespace {
 #endif
 
llvm::Module *
-   compile(const std::string source, const std::string name,
-   const std::string triple, const std::string processor,
-   const std::string opts, clang::LangAS::Map address_spaces) {
+   compile(llvm::LLVMContext llvm_ctx, const std::string source,
+   const std::string name, const std::string triple,
+   const std::string processor, const std::string opts,
+   clang::LangAS::Map address_spaces) {
 
   clang::CompilerInstance c;
-  clang::EmitLLVMOnlyAction act(llvm::getGlobalContext());
+  clang::EmitLLVMOnlyAction act(llvm_ctx);
   std::string log;
   llvm::raw_string_ostream s_log(log);
   std::string libclc_path = LIBCLC_LIBEXECDIR + processor + -
@@ -394,10 +395,12 @@ clover::compile_program_llvm(const compat::string source,
   target.size() - processor_str_len - 1);
clang::LangAS::Map address_spaces;
 
+   llvm::LLVMContext llvm_ctx;
+
// The input file name must have the .cl extension in order for the
// CompilerInvocation class to recognize it as an OpenCL source file.
-   llvm::Module *mod = compile(source, input.cl, triple, processor, opts,
-
address_spaces);
+   llvm::Module *mod = compile(llvm_ctx, source, input.cl, triple, processor,
+   opts, address_spaces);
 
find_kernels(mod, kernels);
 
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] clover: Fix not setting build log if the build succeeds.

2014-06-17 Thread Tom Stellard
From: Matt Arsenault arse...@gmail.com

If there were only warnings, they would not be added to the log.
Also fixes valgrind use after free errors.
---
 src/gallium/state_trackers/clover/core/compiler.hpp   |  3 ++-
 src/gallium/state_trackers/clover/core/error.hpp  |  2 +-
 src/gallium/state_trackers/clover/core/program.cpp| 11 +++
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 14 +++---
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp 
b/src/gallium/state_trackers/clover/core/compiler.hpp
index 49cd022..3ce132f 100644
--- a/src/gallium/state_trackers/clover/core/compiler.hpp
+++ b/src/gallium/state_trackers/clover/core/compiler.hpp
@@ -32,7 +32,8 @@ namespace clover {
module compile_program_llvm(const compat::string source,
pipe_shader_ir ir,
const compat::string target,
-   const compat::string opts);
+   const compat::string opts,
+   std::string log_out);
 
module compile_program_tgsi(const compat::string source);
 }
diff --git a/src/gallium/state_trackers/clover/core/error.hpp 
b/src/gallium/state_trackers/clover/core/error.hpp
index 28459f3..9802195 100644
--- a/src/gallium/state_trackers/clover/core/error.hpp
+++ b/src/gallium/state_trackers/clover/core/error.hpp
@@ -66,7 +66,7 @@ namespace clover {
 
class build_error : public error {
public:
-  build_error(const compat::string log) :
+  build_error(const compat::string log = ) :
  error(CL_BUILD_PROGRAM_FAILURE, log) {
   }
};
diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
b/src/gallium/state_trackers/clover/core/program.cpp
index 3aaa652..91ee553 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -52,15 +52,18 @@ program::build(const ref_vectordevice devs, const char 
*opts) {
 
  _opts.insert({ dev, opts });
 
+ std::string build_log;
+
  try {
 auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
compile_program_tgsi(_source) :
compile_program_llvm(_source, dev.ir_format(),
-dev.ir_target(), 
build_opts(dev)));
+dev.ir_target(), 
build_opts(dev),
+build_log));
 _binaries.insert({ dev, module });
-
- } catch (build_error e) {
-_logs.insert({ dev, e.what() });
+_logs.insert({ dev, build_log });
+ } catch (const build_error ) {
+_logs.insert({ dev, build_log });
 throw;
  }
   }
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 48810bd..0dc1f50 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -120,12 +120,11 @@ namespace {
compile(llvm::LLVMContext llvm_ctx, const std::string source,
const std::string name, const std::string triple,
const std::string processor, const std::string opts,
-   clang::LangAS::Map address_spaces) {
+   clang::LangAS::Map address_spaces, std::string log_out) {
 
   clang::CompilerInstance c;
   clang::EmitLLVMOnlyAction act(llvm_ctx);
-  std::string log;
-  llvm::raw_string_ostream s_log(log);
+  llvm::raw_string_ostream s_log(log_out);
   std::string libclc_path = LIBCLC_LIBEXECDIR + processor + -
   + triple + .bc;
 
@@ -220,10 +219,10 @@ namespace {
 
   // Compile the code
   if (!c.ExecuteAction(act))
- throw build_error(log);
+ throw build_error();
 
   // Get address spaces map to be able to find kernel argument address 
space
-  memcpy(address_spaces, c.getTarget().getAddressSpaceMap(), 
+  memcpy(address_spaces, c.getTarget().getAddressSpaceMap(),
 
sizeof(address_spaces));
 
   return act.takeModule();
@@ -386,7 +385,8 @@ module
 clover::compile_program_llvm(const compat::string source,
  enum pipe_shader_ir ir,
  const compat::string target,
- const compat::string opts) {
+ const compat::string opts,
+ std::string log_out) {
 
std::vectorllvm::Function * kernels;
size_t processor_str_len = std::string(target.begin()).find_first_of(-);
@@ -400,7 +400,7 @@ clover::compile_program_llvm(const compat::string source,
// The input file name must have the .cl extension in order for the
// 

Re: [Mesa-dev] [PATCH 1/9] r600g/compute: Add an intermediate resource for OpenCL buffers

2014-06-16 Thread Tom Stellard
On Fri, Jun 13, 2014 at 10:35:30PM +0200, Bruno Jiménez wrote:
 This patch changes completely the way buffers are added to the
 compute_memory_pool. Before this, whenever we were going to
 map a buffer or write to or read from it, it would get placed
 into the pool. Now, every unallocated buffer has its own
 r600_resource until it is allocated in the pool.
 
 NOTE: This patch also increase the GPU memory usage at the moment
 of putting every buffer in it's place. More or less, the memory
 usage is ~2x(sum of every buffer size)
 
 v2: Cleanup
 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 21 -
  src/gallium/drivers/r600/compute_memory_pool.h |  2 ++
  src/gallium/drivers/r600/evergreen_compute.c   | 18 +-
  3 files changed, 35 insertions(+), 6 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index ec8c470..94ddcde 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -71,7 +71,6 @@ static void compute_memory_pool_init(struct 
 compute_memory_pool * pool,
   if (pool-shadow == NULL)
   return;
  
 - pool-next_id = 1;
   pool-size_in_dw = initial_size_in_dw;
   pool-bo = (struct 
 r600_resource*)r600_compute_buffer_alloc_vram(pool-screen,
   pool-size_in_dw * 4);
 @@ -365,6 +364,18 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   pool-item_list = item;
   }
  
 + ((struct r600_context *)pipe)-b.b.resource_copy_region(pipe,
 + (struct pipe_resource *)pool-bo,
 + 0, item-start_in_dw * 4, 0 ,0,
 + (struct pipe_resource *)item-real_buffer,
 + 0, (struct pipe_box) {.width = 
 item-size_in_dw * 4,
 + .height = 1, .depth = 1});
 +
 + pool-screen-b.b.resource_destroy(
 + (struct pipe_screen *)pool-screen,
 + (struct pipe_resource *)item-real_buffer);

You should use temporary variables rather than inlining the casts everywhere.
It will make the code easier to read. Make sure to declare the at the beginning 
of
the function or basic block.

 + item-real_buffer = NULL;
 +
   allocated += item-size_in_dw;
   }
  
 @@ -393,6 +404,12 @@ void compute_memory_free(struct compute_memory_pool* 
 pool, int64_t id)
   item-next-prev = item-prev;
   }
  
 + if (item-real_buffer) {
 + pool-screen-b.b.resource_destroy(
 + (struct pipe_screen 
 *)pool-screen,
 + (struct pipe_resource 
 *)item-real_buffer);
 + }
 +

Same thing here with the casts.

   free(item);
  
   return;
 @@ -426,6 +443,8 @@ struct compute_memory_item* compute_memory_alloc(
   new_item-start_in_dw = -1; /* mark pending */
   new_item-id = pool-next_id++;
   new_item-pool = pool;
 + new_item-real_buffer = (struct 
 r600_resource*)r600_compute_buffer_alloc_vram(
 + pool-screen, 
 size_in_dw * 4);
  
   if (pool-item_list) {
   for (last_item = pool-item_list; last_item-next;
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.h 
 b/src/gallium/drivers/r600/compute_memory_pool.h
 index c711c59..e94159c 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.h
 +++ b/src/gallium/drivers/r600/compute_memory_pool.h
 @@ -38,6 +38,8 @@ struct compute_memory_item
   int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo
   int64_t size_in_dw; ///Size of the chunk in dwords
  
 + struct r600_resource *real_buffer;
 +
   struct compute_memory_pool* pool;
  
   struct compute_memory_item* prev;
 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index a2abf15..c152e54 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -958,6 +958,17 @@ void *r600_compute_global_transfer_map(
   struct r600_resource_global* buffer =
   (struct r600_resource_global*)resource;
  
 + struct pipe_resource *dst;
 + unsigned offset = box-x;
 +
 + if (buffer-chunk-real_buffer) {
 + dst = (struct pipe_resource*)buffer-chunk-real_buffer;
 + }
 + else {
 + dst = (struct pipe_resource*)buffer-chunk-pool-bo;
 + offset += (buffer-chunk-start_in_dw * 4);
 + }
 +
   COMPUTE_DBG(rctx-screen, * r600_compute_global_transfer_map()\n
   level = %u, usage = %u, box(x = %u, y 

Re: [Mesa-dev] [PATCH 2/9] r600g/compute: Add statuses to the compute_memory_items

2014-06-16 Thread Tom Stellard
On Fri, Jun 13, 2014 at 10:35:31PM +0200, Bruno Jiménez wrote:
 These statuses will help track whether the items are mapped
 or if they should be promoted to or demoted from the pool
 ---
  src/gallium/drivers/r600/compute_memory_pool.h |  7 ++-
  src/gallium/drivers/r600/evergreen_compute.c   | 12 
  2 files changed, 18 insertions(+), 1 deletion(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.h 
 b/src/gallium/drivers/r600/compute_memory_pool.h
 index e94159c..166093d 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.h
 +++ b/src/gallium/drivers/r600/compute_memory_pool.h
 @@ -27,13 +27,18 @@
  
  #include stdlib.h
  
 +#define ITEM_MAPPED_FOR_READING (10)
 +#define ITEM_MAPPED_FOR_WRITING (11)
 +#define ITEM_FOR_PROMOTING  (12)
 +#define ITEM_FOR_DEMOTING   (13)
 +
  struct compute_memory_pool;
  
  struct compute_memory_item
  {
   int64_t id; ///ID of the memory chunk
  
 - int untouched; ///True if the memory contains only junk, no need to 
 save it for defrag
 + uint32_t status; ///Will track the status of the item
  
   int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo
   int64_t size_in_dw; ///Size of the chunk in dwords

This whole structure should use c style comments /* */, but that should be
fixed in a follow up patch.

 diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
 b/src/gallium/drivers/r600/evergreen_compute.c
 index c152e54..9123a40 100644
 --- a/src/gallium/drivers/r600/evergreen_compute.c
 +++ b/src/gallium/drivers/r600/evergreen_compute.c
 @@ -659,6 +659,15 @@ static void evergreen_set_global_binding(
   return;
   }
  
 + /* We mark these items for promotion to the pool if they
 +  * aren't already there */
 + for (int i = 0; i  n; i++) {
 + struct compute_memory_item *item = buffers[i]-chunk;
 +
 + if (item-start_in_dw == -1)

I would prefer to encapsulate this in an is_item_in_pool() helper function.

 + buffers[i]-chunk-status |= ITEM_FOR_PROMOTING;
 + }
 +
   compute_memory_finalize_pending(pool, ctx_);
  
   for (int i = 0; i  n; i++)
 @@ -969,6 +978,9 @@ void *r600_compute_global_transfer_map(
   offset += (buffer-chunk-start_in_dw * 4);
   }
  
 + if (usage  PIPE_TRANSFER_READ)
 + buffer-chunk-status |= ITEM_MAPPED_FOR_READING;
 +
   COMPUTE_DBG(rctx-screen, * r600_compute_global_transfer_map()\n
   level = %u, usage = %u, box(x = %u, y = %u, z = %u 
   width = %u, height = %u, depth = %u)\n, level, usage,
 -- 
 2.0.0
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] r600g/compute: divide the item list in two

2014-06-16 Thread Tom Stellard
On Fri, Jun 13, 2014 at 10:35:32PM +0200, Bruno Jiménez wrote:
 Now we will have a list with the items that are in the pool
 (item_list) and the items that are outside it (unallocated_list)

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 ---
  src/gallium/drivers/r600/compute_memory_pool.c | 99 
 +-
  src/gallium/drivers/r600/compute_memory_pool.h |  1 +
  2 files changed, 49 insertions(+), 51 deletions(-)
 
 diff --git a/src/gallium/drivers/r600/compute_memory_pool.c 
 b/src/gallium/drivers/r600/compute_memory_pool.c
 index 94ddcde..5a5ef12 100644
 --- a/src/gallium/drivers/r600/compute_memory_pool.c
 +++ b/src/gallium/drivers/r600/compute_memory_pool.c
 @@ -108,13 +108,11 @@ int64_t compute_memory_prealloc_chunk(
   size_in_dw);
  
   for (item = pool-item_list; item; item = item-next) {
 - if (item-start_in_dw  -1) {
 - if (last_end + size_in_dw = item-start_in_dw) {
 - return last_end;
 - }
 -
 - last_end = item-start_in_dw + align(item-size_in_dw, 
 ITEM_ALIGNMENT);
 + if (last_end + size_in_dw = item-start_in_dw) {
 + return last_end;
   }
 +
 + last_end = item-start_in_dw + align(item-size_in_dw, 
 ITEM_ALIGNMENT);
   }
  
   if (pool-size_in_dw - last_end  size_in_dw) {
 @@ -226,7 +224,6 @@ void compute_memory_shadow(struct compute_memory_pool* 
 pool,
  int compute_memory_finalize_pending(struct compute_memory_pool* pool,
   struct pipe_context * pipe)
  {
 - struct compute_memory_item *pending_list = NULL, *end_p = NULL;
   struct compute_memory_item *item, *next;
  
   int64_t allocated = 0;
 @@ -244,45 +241,16 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
   item-size_in_dw, item-size_in_dw * 4);
   }
  
 - /* Search through the list of memory items in the pool */
 + /* Calculate the total allocated size */
   for (item = pool-item_list; item; item = next) {
   next = item-next;
 + allocated += align(item-size_in_dw, ITEM_ALIGNMENT);
 + }
  
 - /* Check if the item is pending. */
 - if (item-start_in_dw == -1) {
 - /* It is pending, so add it to the pending_list... */
 - if (end_p) {
 - end_p-next = item;
 - }
 - else {
 - pending_list = item;
 - }
 -
 - /* ... and then remove it from the item list. */
 - if (item-prev) {
 - item-prev-next = next;
 - }
 - else {
 - pool-item_list = next;
 - }
 -
 - if (next) {
 - next-prev = item-prev;
 - }
 -
 - /* This sequence makes the item be at the end of the 
 list */
 - item-prev = end_p;
 - item-next = NULL;
 - end_p = item;
 -
 - /* Update the amount of space we will need to allocate. 
 */
 - unallocated += item-size_in_dw+1024;
 - }
 - else {
 - /* The item is not pending, so update the amount of 
 space
 -  * that has already been allocated. */
 - allocated += item-size_in_dw;
 - }
 + /* Calculate the total unallocated size */
 + for (item = pool-unallocated_list; item; item = next) {
 + next = item-next;
 + unallocated += align(item-size_in_dw, ITEM_ALIGNMENT);
   }
  
   /* If we require more space than the size of the pool, then grow the
 @@ -302,15 +270,15 @@ int compute_memory_finalize_pending(struct 
 compute_memory_pool* pool,
* In this case, there are 300 units of free space in the pool, but
* they aren't contiguous, so it will be impossible to allocate Item D.
*/
 - if (pool-size_in_dw  allocated+unallocated) {
 - err = compute_memory_grow_pool(pool, pipe, 
 allocated+unallocated);
 + if (pool-size_in_dw  allocated + unallocated) {
 + err = compute_memory_grow_pool(pool, pipe, allocated + 
 unallocated);
   if (err == -1)
   return -1;
   }
  
 - /* Loop through all the pending items, allocate space for them and
 -  * add them back to the item_list. */
 - for (item = pending_list; item; item = next) {
 + /* Loop through all the unallocated items, allocate space for them
 +  * and add them to the item_list. */
 + for (item = pool-unallocated_list; item; item = next) {
   next = item-next;
  
   /* Search for free space in the pool for this item

Re: [Mesa-dev] [PATCH] clover: Cache serialized binaries

2014-06-16 Thread Tom Stellard
On Sun, Jun 15, 2014 at 01:08:14PM +0200, Francisco Jerez wrote:
 Tom Stellard thomas.stell...@amd.com writes:
 
  We were serializing the binaries once when clGetProgramInfo was called
  with CL_PROGRAM_BINARY_SIZES and then again when it was called with
  CL_PROGRAM_BINARIES.  This was slowing down some OpenCV tests which were
  building binary kernel caches.
 
  This improves the run-time of OpenCV's OCL_ImgProc/CvtColor8u.*
  test from 7 minutes to 1 minute.
  ---
 
 Can you give the attached two patches a try?  I'm curious to see if they
 have a comparable effect -- If they do I'd prefer to fix the underlying
 object rather than caching binaries in serialized form.
 
 Thanks.
 
 [...]
 

These patches improve performance even more.  Now it only takes 10 seconds to
run the tests instead of 7 minutes.

For both patches:

Tested-by: Tom Stellard thomas.stell...@amd.com

 From a500126213b073793184b0b6f170a58229340778 Mon Sep 17 00:00:00 2001
 From: Francisco Jerez curroje...@riseup.net
 Date: Sat, 14 Jun 2014 20:53:35 +0200
 Subject: [PATCH 1/2] clover: Optimize module serialization for vectors of
  fundamental types.
 
 ---
  src/gallium/state_trackers/clover/core/module.cpp | 23 
 ++-
  1 file changed, 22 insertions(+), 1 deletion(-)
 
 diff --git a/src/gallium/state_trackers/clover/core/module.cpp 
 b/src/gallium/state_trackers/clover/core/module.cpp
 index 3e3ad99..41de734 100644
 --- a/src/gallium/state_trackers/clover/core/module.cpp
 +++ b/src/gallium/state_trackers/clover/core/module.cpp
 @@ -69,7 +69,9 @@ namespace {
  
 /// (De)serialize a vector.
 templatetypename T
 -   struct _serializercompat::vectorT {
 +   struct _serializercompat::vectorT,
 +  typename std::enable_if
 + !std::is_scalarT::value::type {
static void
proc(compat::ostream os, const compat::vectorT v) {
   _procuint32_t(os, v.size());
 @@ -87,6 +89,25 @@ namespace {
}
 };
  
 +   templatetypename T
 +   struct _serializercompat::vectorT,
 +  typename std::enable_if
 + std::is_scalarT::value::type {
 +  static void
 +  proc(compat::ostream os, const compat::vectorT v) {
 + _procuint32_t(os, v.size());
 + os.write(reinterpret_castconst char *(v.begin()),
 +  v.size() * sizeof(T));
 +  }
 +
 +  static void
 +  proc(compat::istream is, compat::vectorT v) {
 + v.reserve(_procuint32_t(is));
 + is.read(reinterpret_castchar *(v.begin()),
 + v.size() * sizeof(T));
 +  }
 +   };
 +
 /// (De)serialize a module::section.
 template
 struct _serializermodule::section {
 -- 
 1.9.2
 

 From 1267038c2b0621dddc3d5c7718eed7ef2beb111b Mon Sep 17 00:00:00 2001
 From: Francisco Jerez curroje...@riseup.net
 Date: Sat, 14 Jun 2014 21:03:02 +0200
 Subject: [PATCH 2/2] clover: Calculate the serialized size of a module
  efficiently.
 
 ---
  src/gallium/state_trackers/clover/api/program.cpp |  5 +---
  src/gallium/state_trackers/clover/core/module.cpp | 32 
 +++
  src/gallium/state_trackers/clover/core/module.hpp |  1 +
  3 files changed, 34 insertions(+), 4 deletions(-)
 
 diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
 b/src/gallium/state_trackers/clover/api/program.cpp
 index fedc91d..a14baa3 100644
 --- a/src/gallium/state_trackers/clover/api/program.cpp
 +++ b/src/gallium/state_trackers/clover/api/program.cpp
 @@ -190,10 +190,7 @@ clGetProgramInfo(cl_program d_prog, cl_program_info 
 param,
  
 case CL_PROGRAM_BINARY_SIZES:
buf.as_vectorsize_t() = map([](const device dev) {
 -compat::ostream::buffer_t bin;
 -compat::ostream s(bin);
 -prog.binary(dev).serialize(s);
 -return bin.size();
 +return prog.binary(dev).size();
   },
   prog.devices());
break;
 diff --git a/src/gallium/state_trackers/clover/core/module.cpp 
 b/src/gallium/state_trackers/clover/core/module.cpp
 index 41de734..55ed91a 100644
 --- a/src/gallium/state_trackers/clover/core/module.cpp
 +++ b/src/gallium/state_trackers/clover/core/module.cpp
 @@ -52,6 +52,13 @@ namespace {
return x;
 }
  
 +   /// Calculate the size of the specified object.
 +   templatetypename T
 +   void
 +   _proc(module::size_t sz, const T x) {
 +  _serializerT::proc(sz, x);
 +   }
 +
 /// (De)serialize a scalar value.
 templatetypename T
 struct _serializerT, typename std::enable_if
 @@ -65,6 +72,11 @@ namespace {
proc(compat::istream is, T x) {
   is.read(reinterpret_castchar *(x), sizeof(x));
}
 +
 +  static void
 +  proc(module::size_t sz, const T x) {
 + sz += sizeof(x);
 +  }
 };
  
 /// (De)serialize a vector.
 @@ -87,6 +99,14 @@ namespace {
   for (size_t i = 0; i  v.size(); i++)
  new(v[i]) T(_procT

<    1   2   3   4   5   6   7   8   9   10   >