Hi, I'm modifying Theano to allow it to use the code generated by pycuda. While doing so I needed 2 modifications to pycuda.
1) elemwise1.patch: This modification allow to pass the block and grid to the ElementwiseKernel generated fct. If not provided, it continue as before. 2) tools1.patch: recognize the npy_[u]{int}[8,16,32,64] and npy_float[32,64] data type. Do you have any questions/comments about those patch? I don't use the gpuarray class that are passed to the pycuda fct. I modified mine to mimic its interface. While doing so, I saw that you use the attribute size and mem_size that seam to always have the same value? Is that true? If so, why both? thanks Frédéric Bastien
--- /home/bastienf/repos/pycuda.git/pycuda/elementwise.py 2010-06-14 13:32:19.000000000 -0400 +++ elementwise.py 2010-07-15 12:25:13.000000000 -0400 @@ -114,7 +114,7 @@ "ElementwiseKernel can only be used with functions that have at least one " \ "vector argument" - def __call__(self, *args): + def __call__(self, *args, **kwargs): vectors = [] invocation_args = [] @@ -127,8 +127,17 @@ repr_vec = vectors[0] invocation_args.append(repr_vec.mem_size) - self.func.set_block_shape(*repr_vec._block) - self.func.prepared_call(repr_vec._grid, *invocation_args) + _block = kwargs.get('block') + if _block: + self.func.set_block_shape(*_block) + else: + self.func.set_block_shape(*repr_vec._block) + + _grid = kwargs.get('grid') + if _grid: + self.func.prepared_call(_grid, *invocation_args) + else: + self.func.prepared_call(repr_vec._grid, *invocation_args)
--- /home/bastienf/repos/pycuda.git/pycuda/tools.py 2010-06-14 13:32:19.000000000 -0400 +++ tools.py 2010-07-15 10:25:10.000000000 -0400 @@ -432,12 +432,14 @@ tp = c_arg[:decl_match.start()] tp = " ".join(tp.split()) - if tp == "float": dtype = numpy.float32 - elif tp == "double": dtype = numpy.float64 + if tp in ["float", "npy_float32"]: dtype = numpy.float32 + elif tp in ["double", "npy_float64"]: dtype = numpy.float64 elif tp == "pycuda::complex<float>": dtype = numpy.complex64 elif tp == "pycuda::complex<double>": dtype = numpy.complex128 - elif tp in ["int", "signed int"]: dtype = numpy.int32 - elif tp in ["unsigned", "unsigned int"]: dtype = numpy.uint32 + elif tp in ["int", "signed int", "npy_int32"]: dtype = numpy.int32 + elif tp in ["unsigned", "unsigned int", "npy_uint32", "npy_ucs4"]: dtype = numpy.uint32 + elif tp in ["npy_int64"]: dtype = numpy.int64 + elif tp in ["npy_uint64"]: dtype = numpy.uint64 elif tp in ["long", "long int"]: if platform_bits() == 64: dtype = numpy.int64 @@ -448,12 +450,14 @@ dtype = numpy.uint64 else: dtype = numpy.uint32 - elif tp in ["short", "short int"]: dtype = numpy.int16 - elif tp in ["unsigned short", "unsigned short int"]: dtype = numpy.uint16 - elif tp in ["char"]: dtype = numpy.int8 - elif tp in ["unsigned char"]: dtype = numpy.uint8 + elif tp in ["short", "short int", "npy_int16"]: dtype = numpy.int16 + elif tp in ["unsigned short", "unsigned short int", "npy_uint16"]: dtype = numpy.uint16 + elif tp in ["char", "npy_int8"]: dtype = numpy.int8 + elif tp in ["unsigned char", "npy_uint8"]: dtype = numpy.uint8 elif tp in ["bool"]: dtype = numpy.bool - else: raise ValueError, "unknown type '%s'" % tp + else: + import pdb;pdb.set_trace() + raise ValueError, "unknown type '%s'" % tp return arg_class(dtype, name)
_______________________________________________ PyCUDA mailing list PyCUDA@tiker.net http://lists.tiker.net/listinfo/pycuda