---- Resend to mailing list ----
Yes, here it is (sorry for the bad tabbing).
#include "CL/cl.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
// gcc testcl.c -lOpenCL -lm
#define CHECK(x) if ((x) != CL_SUCCESS) exit(EXIT_FAILURE);
#define CHECK0(x) if (!(x)) exit(EXIT_FAILURE);
const char *KERNEL_SOURCE = \
"__kernel void square(global float* input, global float* output, const
unsigned int count) {" \
" int i = get_global_id(0);" \
" if (i < count) output[i] = round(input[i]);" \
"}";
const size_t DATA_SIZE = 1024;
int main(int argc, char** argv) {
int err;
size_t i = 0;
float data[DATA_SIZE];
for (i = 0; i < DATA_SIZE; ++i) {
data[i] = 100.0 * rand() / (float)RAND_MAX;
}
data[1023] = 88.9f;
cl_platform_id platform_id; CHECK(clGetPlatformIDs(1, &platform_id,
NULL));
cl_device_id device_id; CHECK(clGetDeviceIDs(platform_id,
CL_DEVICE_TYPE_GPU, 1, &device_id, NULL));
cl_context context = clCreateContext(0, 1, &device_id, NULL, NULL,
&err); CHECK(err); CHECK0(context);
cl_command_queue commands = clCreateCommandQueue(context, device_id,
0, &err); CHECK(err); CHECK0(commands);
cl_program program = clCreateProgramWithSource(context, 1, (const
char **)&KERNEL_SOURCE, NULL, &err); CHECK(err); CHECK0(program);
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS) {
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(EXIT_FAILURE);
}
cl_kernel kernel = clCreateKernel(program, "square", &err);
CHECK(err); CHECK0(kernel);
cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(float) * DATA_SIZE, NULL, NULL); CHECK0(input);
cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float) * DATA_SIZE, NULL, NULL); CHECK0(output);
CHECK(clEnqueueWriteBuffer(commands, input, CL_TRUE, 0,
sizeof(float) * DATA_SIZE, data, 0, NULL, NULL));
CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input));
CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &output));
CHECK(clSetKernelArg(kernel, 2, sizeof(unsigned int), &DATA_SIZE));
// get the maximum work group size for executing the kernel on the
device
size_t global = DATA_SIZE, local;
CHECK(clGetKernelWorkGroupInfo(kernel, device_id,
CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
CHECK(clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global,
&local, 0, NULL, NULL));
clFinish(commands);
float results[DATA_SIZE];
CHECK(clEnqueueReadBuffer(commands, output, CL_TRUE, 0,
sizeof(float) * DATA_SIZE, results, 0, NULL, NULL));
err = 0;
for (i = 0; i < DATA_SIZE; ++i) {
printf("%f -> %f\n", data[i], results[i]);
if (results[i] != round(results[i])) {
fprintf(stderr, "%f -> %f != %f\n", data[i], results[i],
round(results[i]));
++err;
}
}
printf("%d errors\n", err);
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);
return 0;
}
On 11.04.2014 17:47, CRV§ADER//KY wrote:
> Could you post the c++ code as well? This doesn't make much sense...
>
> On 11 Apr 2014 16:21, "Kai Ruhl" <[email protected]
> <mailto:[email protected]>> wrote:
>
> I have a *very* specific bug in pyopencl: When I use round(88.9f) with
> pyopencl from git (2014-04-08) on a Nvidia Ti780, it will give me 88.0
> instead of 89.0.
>
> - If I change the gfx card to my older GTX590, it will work.
> - If I round doubles instead of floats, it works on both cards.
> - If I write a test in C++, it works on both cards.
>
> Can anyone with a Ti780 confirm this bug?
>
>
>
> Side info: I have compiled pyopencl with ENABLE_GL=True and
> CL_PRETEND_VERSION="1.1" due to missing clCreateSubDevices in nvidia
> OpenCL.
> OS is Ubuntu 14.04.
>
> My test is:
>
> import pyopencl as cl
> import numpy as np
> ctx = cl.create_some_context()
> que = cl.CommandQueue(ctx)
> cl_prg = cl.Program(ctx, "__kernel void doit(__global float *a) { a[0] =
> round(88.9f); }").build()
> a = np.zeros(1, dtype=np.float32); A = cl.Buffer(ctx,
> cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
> cl_prg.doit(que, [1], None, A); que.finish()
> cl.enqueue_copy(que, a, A)
> print a[0]
>
>
>
> _______________________________________________
> PyOpenCL mailing list
> [email protected] <mailto:[email protected]>
> http://lists.tiker.net/listinfo/pyopencl
>
_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl