---- Resend to mailing list ----

Yes, here it is (sorry for the bad tabbing).


#include "CL/cl.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>

// gcc testcl.c -lOpenCL -lm

#define CHECK(x) if ((x) != CL_SUCCESS) exit(EXIT_FAILURE);
#define CHECK0(x) if (!(x)) exit(EXIT_FAILURE);

const char *KERNEL_SOURCE = \
"__kernel void square(global float* input, global float* output, const
unsigned int count) {" \
"  int i = get_global_id(0);" \
"  if (i < count) output[i] = round(input[i]);" \
"}";

const size_t DATA_SIZE = 1024;

int main(int argc, char** argv) {
    int err;

    size_t i = 0;
    float data[DATA_SIZE];
    for (i = 0; i < DATA_SIZE; ++i) {
         data[i] = 100.0 * rand() / (float)RAND_MAX;
    }
    data[1023] = 88.9f;

    cl_platform_id platform_id; CHECK(clGetPlatformIDs(1, &platform_id,
NULL));
    cl_device_id device_id; CHECK(clGetDeviceIDs(platform_id,
CL_DEVICE_TYPE_GPU, 1, &device_id, NULL));
    cl_context context = clCreateContext(0, 1, &device_id, NULL, NULL,
&err); CHECK(err); CHECK0(context);
    cl_command_queue commands = clCreateCommandQueue(context, device_id,
0, &err); CHECK(err); CHECK0(commands);
    cl_program program = clCreateProgramWithSource(context, 1, (const
char **)&KERNEL_SOURCE, NULL, &err); CHECK(err); CHECK0(program);

    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS) {
        size_t len;
        char buffer[2048];
        printf("Error: Failed to build program executable!\n");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        exit(EXIT_FAILURE);
    }

    cl_kernel kernel = clCreateKernel(program, "square", &err);
CHECK(err); CHECK0(kernel);
    cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(float) * DATA_SIZE, NULL, NULL); CHECK0(input);
    cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float) * DATA_SIZE, NULL, NULL); CHECK0(output);
    CHECK(clEnqueueWriteBuffer(commands, input, CL_TRUE, 0,
sizeof(float) * DATA_SIZE, data, 0, NULL, NULL));

    CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &input));
    CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &output));
    CHECK(clSetKernelArg(kernel, 2, sizeof(unsigned int), &DATA_SIZE));

    // get the maximum work group size for executing the kernel on the
device
    size_t global = DATA_SIZE, local;
    CHECK(clGetKernelWorkGroupInfo(kernel, device_id,
CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL));
    CHECK(clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global,
&local, 0, NULL, NULL));
    clFinish(commands);

    float results[DATA_SIZE];
    CHECK(clEnqueueReadBuffer(commands, output, CL_TRUE, 0,
sizeof(float) * DATA_SIZE, results, 0, NULL, NULL));

        err = 0;
        for (i = 0; i < DATA_SIZE; ++i) {
                printf("%f -> %f\n", data[i], results[i]);
                if (results[i] != round(results[i])) {
                        fprintf(stderr, "%f -> %f != %f\n", data[i], results[i],
round(results[i]));
                        ++err;
                }
        }
        printf("%d errors\n", err);

    clReleaseMemObject(input);
    clReleaseMemObject(output);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    return 0;
}





On 11.04.2014 17:47, CRV§ADER//KY wrote:
> Could you post the c++ code as well? This doesn't make much sense...
> 
> On 11 Apr 2014 16:21, "Kai Ruhl" <[email protected]
> <mailto:[email protected]>> wrote:
> 
>     I have a *very* specific bug in pyopencl: When I use round(88.9f) with
>     pyopencl from git (2014-04-08) on a Nvidia Ti780, it will give me 88.0
>     instead of 89.0.
> 
>     - If I change the gfx card to my older GTX590, it will work.
>     - If I round doubles instead of floats, it works on both cards.
>     - If I write a test in C++, it works on both cards.
> 
>     Can anyone with a Ti780 confirm this bug?
> 
> 
> 
>     Side info: I have compiled pyopencl with ENABLE_GL=True and
>     CL_PRETEND_VERSION="1.1" due to missing clCreateSubDevices in nvidia
>     OpenCL.
>     OS is Ubuntu 14.04.
> 
>     My test is:
> 
>     import pyopencl as cl
>     import numpy as np
>     ctx = cl.create_some_context()
>     que = cl.CommandQueue(ctx)
>     cl_prg = cl.Program(ctx, "__kernel void doit(__global float *a) { a[0] =
>     round(88.9f); }").build()
>     a = np.zeros(1, dtype=np.float32); A = cl.Buffer(ctx,
>     cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
>     cl_prg.doit(que, [1], None, A); que.finish()
>     cl.enqueue_copy(que, a, A)
>     print a[0]
> 
> 
> 
>     _______________________________________________
>     PyOpenCL mailing list
>     [email protected] <mailto:[email protected]>
>     http://lists.tiker.net/listinfo/pyopencl
> 

_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl

Reply via email to