Signed-off-by: Grigore Lupescu <grigore.lupe...@intel.com> --- benchmark/CMakeLists.txt | 3 +- benchmark/benchmark_math.cpp | 151 ++++++++++++++++++++++++ kernels/bench_math.cl | 269 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 benchmark/benchmark_math.cpp create mode 100644 kernels/bench_math.cl
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index dd33829..4c3c933 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -18,7 +18,8 @@ set (benchmark_sources benchmark_copy_buffer_to_image.cpp benchmark_copy_image_to_buffer.cpp benchmark_copy_buffer.cpp - benchmark_copy_image.cpp) + benchmark_copy_image.cpp + benchmark_math.cpp) SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") diff --git a/benchmark/benchmark_math.cpp b/benchmark/benchmark_math.cpp new file mode 100644 index 0000000..969aa08 --- /dev/null +++ b/benchmark/benchmark_math.cpp @@ -0,0 +1,151 @@ +#include "utests/utest_helper.hpp" +#include <sys/time.h> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include "utest_helper.hpp" +#include <sys/time.h> + +double benchmark_generic_math(const char* str_filename, + const char* str_kernel) +{ + double elapsed = 0; + struct timeval start,stop; + const size_t global_size = 1024 * 1024; + const size_t local_size = 64; + + /* Compute math OP, loop times on global size */ + cl_float base = 1.000002; + cl_float pwr = 1.0102003; + uint32_t loop = 1000; + + /* Input set will be generated */ + float* src = (float*)calloc(sizeof(float), global_size); + OCL_ASSERT(src != NULL); + for(uint32_t i = 0; i < global_size; i++) + src[i] = base + i * (base - 1); + + /* Setup kernel and buffers */ + OCL_CALL(cl_kernel_init, str_filename, str_kernel, SOURCE, NULL); + + OCL_CREATE_BUFFER(buf[0], 0, (global_size) * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, (global_size) * sizeof(float), NULL); + + OCL_MAP_BUFFER(0); + memcpy(buf_data[0], src, global_size * sizeof(float)); + OCL_UNMAP_BUFFER(0); + + globals[0] = global_size; + locals[0] = local_size; + + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_float), &pwr); + OCL_SET_ARG(3, sizeof(cl_uint), &loop); + + /* Measure performance */ + gettimeofday(&start,0); + OCL_NDRANGE(1); + clFinish(queue); + gettimeofday(&stop,0); + elapsed = time_subtract(&stop, &start, 0); + + /* Show compute results */ + OCL_MAP_BUFFER(1); + for(uint32_t i = 0; i < global_size; i += 8192) + printf("\t%.3f", ((float*)buf_data[1])[i]); + OCL_UNMAP_BUFFER(1); + + return BANDWIDTH(global_size * loop, elapsed); +} + +double benchmark_math_pow(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_pow"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_pow, "Mop/s"); + +double benchmark_math_native_powr(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_powr"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_powr, "Mop/s"); + +double benchmark_math_exp2(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_exp2"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp2, "Mop/s"); + +double benchmark_math_exp10(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_exp10"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp10, "Mop/s"); + +double benchmark_math_native_exp10(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_exp10"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_exp10, "Mop/s"); + +double benchmark_math_log2(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_log2"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log2, "Mop/s"); + +double benchmark_math_native_log2(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_log2"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_log2, "Mop/s"); + +double benchmark_math_log10(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_log10"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log10, "Mop/s"); + +double benchmark_math_native_log10(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_log10"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_log10, "Mop/s"); + +double benchmark_math_sqrt(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_sqrt"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_sqrt, "Mop/s"); + +double benchmark_math_sin(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_sin"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_sin, "Mop/s"); + +double benchmark_math_native_sin(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_sin"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_sin, "Mop/s"); + +double benchmark_math_cos(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_cos"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_cos, "Mop/s"); + +double benchmark_math_native_cos(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_cos"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_cos, "Mop/s"); + +double benchmark_math_tan(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_tan"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_tan, "Mop/s"); + +double benchmark_math_native_tan(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_native_tan"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_tan, "Mop/s"); + +double benchmark_math_asin(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_asin"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_asin, "Mop/s"); + +double benchmark_math_acos(void){ + return benchmark_generic_math("bench_math.cl", "bench_math_acos"); +} +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_acos, "Mop/s"); diff --git a/kernels/bench_math.cl b/kernels/bench_math.cl new file mode 100644 index 0000000..9a689be --- /dev/null +++ b/kernels/bench_math.cl @@ -0,0 +1,269 @@ +/* benchmark pow performance */ +kernel void bench_math_pow( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = pow(result, pwr); + + dst[get_global_id(0)] = result; +} + +/* benchmark powr native performance */ +kernel void bench_math_native_powr( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_powr(result, pwr); + + dst[get_global_id(0)] = result; +} + +/* benchmark exp2 performance */ +kernel void bench_math_exp2( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = exp2(pwr) - exp2(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark exp10 performance */ +kernel void bench_math_exp10( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = exp10(pwr) - exp10(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark native exp10 performance */ +kernel void bench_math_native_exp10( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_exp10(pwr) - native_exp10(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark log2 performance */ +kernel void bench_math_log2( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = log2(result) + pwr; + + dst[get_global_id(0)] = result; +} + +/* benchmark native log2 performance */ +kernel void bench_math_native_log2( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_log2(result) + pwr; + + dst[get_global_id(0)] = result; +} + +/* benchmark log10 performance */ +kernel void bench_math_log10( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = log10(result) + pwr; + + dst[get_global_id(0)] = result; +} + +/* benchmark native log10 performance */ +kernel void bench_math_native_log10( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_log10(result) + pwr; + + dst[get_global_id(0)] = result; +} + +/* benchmark sqrt performance */ +kernel void bench_math_sqrt( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = sqrt(result) + sqrt(pwr + result); + + dst[get_global_id(0)] = result; +} + +/* benchmark sin performance */ +kernel void bench_math_sin( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = sin(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark native sin performance */ +kernel void bench_math_native_sin( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_sin(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark cos performance */ +kernel void bench_math_cos( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = cos(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark native cos performance */ +kernel void bench_math_native_cos( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_cos(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark tan performance */ +kernel void bench_math_tan( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = tan(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark native tan performance */ +kernel void bench_math_native_tan( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = native_tan(result); + + dst[get_global_id(0)] = result; +} + +/* benchmark asin performance */ +kernel void bench_math_asin( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = asin(pwr - 1); + + dst[get_global_id(0)] = result; +} + +/* benchmark acos performance */ +kernel void bench_math_acos( + global float *src, + global float *dst, + float pwr, + uint loop) +{ + float result = src[get_global_id(0)]; + + for(; loop > 0; loop--) + result = acos(pwr - 1); + + dst[get_global_id(0)] = result; +} -- 2.5.0 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet