Hi, I am having trouble with the basic usage of pyopencl.  What is the
GLOBAL_SIZE parameter to the kernel calls?  I can't seem to find a
description of this in the documentation.  Perhaps it would help if anybody
knows where a few example programs could be found, outside of the three that
are included with the source?
Here's my failed attempt to multiply two square matrices:

import sys
import pyopencl as cl
from numpy import *

N = int(sys.argv[1])
precision = int(sys.argv[2])

m1 = random.rand(N,N).astype('float%s' % precision)
m2 = random.rand(N,N).astype('float%s' % precision)

print "m1:\n",m1
print "m2:\n",m2

ctx = cl.create_context_from_type(cl.device_type.ALL)
queue = cl.CommandQueue(ctx)

mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=m1)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=m2)
dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, m2.nbytes)

prg = cl.Program(ctx, """



__kernel void multwo(__global float *a,



                     __global float *b,



                     __global float *c)



{











  int row = get_global_id(1);



  int col = get_global_id(0);
  float sum = 0.0f;



  for (int i = 0; i < %d; i++) {



     sum += a[row*%d+i] * b[i*%d+col];
  }



  c[row*%d+col] = sum;



}



"""%(N,N,N,N)).build()

prg.multwo(queue, (N,N), a_buf, b_buf, dest_buf)

product = zeros([N,N],dtype='float%s' % precision)
cl.enqueue_read_buffer(queue, dest_buf, product).wait()

print product


I get a bogus result:

[[  1.21239019e+002   9.03297802e-315   0.00000000e+000]
 [  0.00000000e+000   0.00000000e+000   0.00000000e+000]
 [  0.00000000e+000   0.00000000e+000   0.00000000e+000]]
_______________________________________________
PyOpenCL mailing list
[email protected]
http://tiker.net/mailman/listinfo/pyopencl_tiker.net

Reply via email to