Resending - forgot to add the list On 11 Apr 2014 18:12, "CRV§ADER//KY" <[email protected]> wrote:
> I think the key should be searched in some difference in the command line > parameters passed to the opencl compiler between c++ and pyopencl. Not sure > how to trace that though > On 11 Apr 2014 16:56, "Kai Ruhl" <[email protected]> wrote: > >> Yes, here it is (sorry for the bad tabbing). >> >> >> #include "CL/cl.h" >> #include <fcntl.h> >> #include <stdio.h> >> #include <stdlib.h> >> #include <string.h> >> #include <math.h> >> #include <unistd.h> >> #include <sys/types.h> >> #include <sys/stat.h> >> >> // gcc testcl.c -lOpenCL -lm >> >> #define CHECK(x) if ((x) != CL_SUCCESS) exit(EXIT_FAILURE); >> #define CHECK0(x) if (!(x)) exit(EXIT_FAILURE); >> >> const char *KERNEL_SOURCE = \ >> "__kernel void square(global float* input, global float* output, const >> unsigned int count) {" \ >> " int i = get_global_id(0);" \ >> " if (i < count) output[i] = round(input[i]);" \ >> "}"; >> >> const size_t DATA_SIZE = 1024; >> >> int main(int argc, char** argv) { >> int err; >> >> size_t i = 0; >> float data[DATA_SIZE]; >> for (i = 0; i < DATA_SIZE; ++i) { >> data[i] = 100.0 * rand() / (float)RAND_MAX; >> } >> data[1023] = 88.9f; >> >> cl_platform_id platform_id; CHECK(clGetPlatformIDs(1, &platform_id, >> NULL)); >> cl_device_id device_id; CHECK(clGetDeviceIDs(platform_id, >> CL_DEVICE_TYPE_GPU, 1, &device_id, NULL)); >> cl_context context = clCreateContext(0, 1, &device_id, NULL, NULL, >> &err); CHECK(err); CHECK0(context); >> cl_command_queue commands = clCreateCommandQueue(context, device_id, >> 0, &err); CHECK(err); CHECK0(commands); >> cl_program program = clCreateProgramWithSource(context, 1, (const >> char **)&KERNEL_SOURCE, NULL, &err); CHECK(err); CHECK0(program); >> >> err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); >> if (err != CL_SUCCESS) { >> size_t len; >> char buffer[2048]; >> printf("Error: Failed to build program executable!\n"); >> clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, >> sizeof(buffer), buffer, &len); >> printf("%s\n", buffer); >> exit(EXIT_FAILURE); >> } >> >> cl_kernel kernel = clCreateKernel(program, "square", &err); >> CHECK(err); CHECK0(kernel); >> cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY, >> sizeof(float) * DATA_SIZE, NULL, NULL); CHECK0(input); >> cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, >> sizeof(float) * DATA_SIZE, NULL, NULL); CHECK0(output); >> CHECK(clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, >> sizeof(float) * DATA_SIZE, data, 0, NULL, NULL)); >> >> CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input)); >> CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &output)); >> CHECK(clSetKernelArg(kernel, 2, sizeof(unsigned int), &DATA_SIZE)); >> >> // get the maximum work group size for executing the kernel on the >> device >> size_t global = DATA_SIZE, local; >> CHECK(clGetKernelWorkGroupInfo(kernel, device_id, >> CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL)); >> CHECK(clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, >> &local, 0, NULL, NULL)); >> clFinish(commands); >> >> float results[DATA_SIZE]; >> CHECK(clEnqueueReadBuffer(commands, output, CL_TRUE, 0, >> sizeof(float) * DATA_SIZE, results, 0, NULL, NULL)); >> >> err = 0; >> for (i = 0; i < DATA_SIZE; ++i) { >> printf("%f -> %f\n", data[i], results[i]); >> if (results[i] != round(results[i])) { >> fprintf(stderr, "%f -> %f != %f\n", data[i], >> results[i], >> round(results[i])); >> ++err; >> } >> } >> printf("%d errors\n", err); >> >> clReleaseMemObject(input); >> clReleaseMemObject(output); >> clReleaseProgram(program); >> clReleaseKernel(kernel); >> clReleaseCommandQueue(commands); >> clReleaseContext(context); >> >> return 0; >> } >> >> >> >> >> >> On 11.04.2014 17:47, CRV§ADER//KY wrote: >> > Could you post the c++ code as well? This doesn't make much sense... >> > >> > On 11 Apr 2014 16:21, "Kai Ruhl" <[email protected] >> > <mailto:[email protected]>> wrote: >> > >> > I have a *very* specific bug in pyopencl: When I use round(88.9f) >> with >> > pyopencl from git (2014-04-08) on a Nvidia Ti780, it will give me >> 88.0 >> > instead of 89.0. >> > >> > - If I change the gfx card to my older GTX590, it will work. >> > - If I round doubles instead of floats, it works on both cards. >> > - If I write a test in C++, it works on both cards. >> > >> > Can anyone with a Ti780 confirm this bug? >> > >> > >> > >> > Side info: I have compiled pyopencl with ENABLE_GL=True and >> > CL_PRETEND_VERSION="1.1" due to missing clCreateSubDevices in nvidia >> > OpenCL. >> > OS is Ubuntu 14.04. >> > >> > My test is: >> > >> > import pyopencl as cl >> > import numpy as np >> > ctx = cl.create_some_context() >> > que = cl.CommandQueue(ctx) >> > cl_prg = cl.Program(ctx, "__kernel void doit(__global float *a) { >> a[0] = >> > round(88.9f); }").build() >> > a = np.zeros(1, dtype=np.float32); A = cl.Buffer(ctx, >> > cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=a) >> > cl_prg.doit(que, [1], None, A); que.finish() >> > cl.enqueue_copy(que, a, A) >> > print a[0] >> > >> > >> > >> > _______________________________________________ >> > PyOpenCL mailing list >> > [email protected] <mailto:[email protected]> >> > http://lists.tiker.net/listinfo/pyopencl >> > >> >
_______________________________________________ PyOpenCL mailing list [email protected] http://lists.tiker.net/listinfo/pyopencl
