loads in this case can be merged to 4 from 8

Signed-off-by: rander.wang <rander.w...@intel.com>
---
 kernels/compiler_load_store_merging.cl | 18 ++++++++++++
 utests/CMakeLists.txt                  |  3 +-
 utests/compiler_load_store_merging.cpp | 51 ++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 kernels/compiler_load_store_merging.cl
 create mode 100644 utests/compiler_load_store_merging.cpp

diff --git a/kernels/compiler_load_store_merging.cl 
b/kernels/compiler_load_store_merging.cl
new file mode 100644
index 0000000..4d78ec8
--- /dev/null
+++ b/kernels/compiler_load_store_merging.cl
@@ -0,0 +1,18 @@
+kernel void compiler_load_store_merging(global float *src, global float *dst) {
+      float result ;
+
+      int idx = get_global_id(0);
+      float p2 = src[idx+1];
+      float p4 = src[idx+32+4];
+      float p5 = src[idx+32+6];
+      float p3 = src[idx+2];
+      float p8 = src[idx+32*2+10];
+      float p6 = src[idx+32*2+8];
+      float p1 = src[idx];
+      float p7 = src[idx+32*2+9];
+
+      float dx = mad(2, p5 - p4, p3 - p1 + p8 - p6);
+      float dy = mad(2, p2 - p7, p3 - p8 + p1 - p6);
+
+      dst[idx] =  dx*dy;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index cd061b2..cd5c4fb 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -304,7 +304,8 @@ set (utests_sources
   runtime_pipe_query.cpp
   compiler_pipe_builtin.cpp
   compiler_device_enqueue.cpp
-  compiler_global_immediate_optimized)
+  compiler_global_immediate_optimized.cpp
+  compiler_load_store_merging.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/compiler_load_store_merging.cpp 
b/utests/compiler_load_store_merging.cpp
new file mode 100644
index 0000000..d89342d
--- /dev/null
+++ b/utests/compiler_load_store_merging.cpp
@@ -0,0 +1,51 @@
+#include <cmath>
+#include "utest_helper.hpp"
+
+void compiler_load_store_merging(void) {
+       const int n = 128;
+       float src[n];
+
+       // Setup kernel and buffers
+       OCL_CREATE_KERNEL("compiler_load_store_merging");
+       OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+       OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+       OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+       OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+       globals[0] = 1;
+       locals[0] = 1;
+
+       for (int j = 0; j < n ; j++) {
+               OCL_MAP_BUFFER(0);
+               for (int i = 0; i < n; ++i) {
+                       src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 
0.001f;
+               }
+               OCL_UNMAP_BUFFER(0);
+
+               OCL_NDRANGE(1);
+
+               OCL_MAP_BUFFER(1);
+               float *dst = (float*) buf_data[0];
+
+               float result ;
+
+               int idx = 0;
+               float p2 = src[idx+1];
+               float p4 = src[idx+32+4];
+               float p5 = src[idx+32+6];
+               float p3 = src[idx+2];
+               float p8 = src[idx+32*2+10];
+               float p6 = src[idx+32*2+8];
+               float p1 = src[idx];
+               float p7 = src[idx+32*2+9];
+
+               float dx = 2.0f * (p5 - p4) + (p3 - p1 + p8 - p6);
+               float dy = 2.0f * (p2 - p7) + (p3 - p8 + p1 - p6);
+
+               result =  dx*dy;
+               OCL_ASSERT(((float*)buf_data[1])[0] == result);
+
+               OCL_UNMAP_BUFFER(1);
+       }
+}
+
+MAKE_UTEST_FROM_FUNCTION (compiler_load_store_merging);
-- 
2.7.4

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to