loads in this case can be merged to 4 from 8 Signed-off-by: rander.wang <rander.w...@intel.com> --- kernels/compiler_load_store_merging.cl | 18 ++++++++++++ utests/CMakeLists.txt | 3 +- utests/compiler_load_store_merging.cpp | 51 ++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 kernels/compiler_load_store_merging.cl create mode 100644 utests/compiler_load_store_merging.cpp
diff --git a/kernels/compiler_load_store_merging.cl b/kernels/compiler_load_store_merging.cl new file mode 100644 index 0000000..4d78ec8 --- /dev/null +++ b/kernels/compiler_load_store_merging.cl @@ -0,0 +1,18 @@ +kernel void compiler_load_store_merging(global float *src, global float *dst) { + float result ; + + int idx = get_global_id(0); + float p2 = src[idx+1]; + float p4 = src[idx+32+4]; + float p5 = src[idx+32+6]; + float p3 = src[idx+2]; + float p8 = src[idx+32*2+10]; + float p6 = src[idx+32*2+8]; + float p1 = src[idx]; + float p7 = src[idx+32*2+9]; + + float dx = mad(2, p5 - p4, p3 - p1 + p8 - p6); + float dy = mad(2, p2 - p7, p3 - p8 + p1 - p6); + + dst[idx] = dx*dy; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index cd061b2..cd5c4fb 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -304,7 +304,8 @@ set (utests_sources runtime_pipe_query.cpp compiler_pipe_builtin.cpp compiler_device_enqueue.cpp - compiler_global_immediate_optimized) + compiler_global_immediate_optimized.cpp + compiler_load_store_merging.cpp) if (LLVM_VERSION_NODOT VERSION_GREATER 34) SET(utests_sources diff --git a/utests/compiler_load_store_merging.cpp b/utests/compiler_load_store_merging.cpp new file mode 100644 index 0000000..d89342d --- /dev/null +++ b/utests/compiler_load_store_merging.cpp @@ -0,0 +1,51 @@ +#include <cmath> +#include "utest_helper.hpp" + +void compiler_load_store_merging(void) { + const int n = 128; + float src[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_load_store_merging"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = 1; + locals[0] = 1; + + for (int j = 0; j < n ; j++) { + OCL_MAP_BUFFER(0); + for (int i = 0; i < n; ++i) { + src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 0.001f; + } + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + float *dst = (float*) buf_data[0]; + + float result ; + + int idx = 0; + float p2 = src[idx+1]; + float p4 = src[idx+32+4]; + float p5 = src[idx+32+6]; + float p3 = src[idx+2]; + float p8 = src[idx+32*2+10]; + float p6 = src[idx+32*2+8]; + float p1 = src[idx]; + float p7 = src[idx+32*2+9]; + + float dx = 2.0f * (p5 - p4) + (p3 - p1 + p8 - p6); + float dy = 2.0f * (p2 - p7) + (p3 - p8 + p1 - p6); + + result = dx*dy; + OCL_ASSERT(((float*)buf_data[1])[0] == result); + + OCL_UNMAP_BUFFER(1); + } +} + +MAKE_UTEST_FROM_FUNCTION (compiler_load_store_merging); -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet