Hi all,

I modified the example
http://documen.tician.de/pycuda/tutorial.html#advanced-topics by removing
the '__padding'  from the structure definition and got incorrect result.
The kernel is launched with 2 blocks and one thread in each block.

Each thread prints the 'len' field in structure, which should be 3 for
block 0 and 2 for block 1. However, the result I got is:

block 1: 2097664
block 0: 3

No such problem if I write the following program using C.  Any help is
appreciated.

Yifei




#include <stdio.h>
struct Vec {
       * int len;*

float* data;
};


__global__ void test(Vec *a) {
        Vec v = a[blockIdx.x];
        printf("block %d: %d\n", blockIdx.x, v.len);
}

-------------------------------------------------- end of kernel
---------------------------------------------------------------

import numpy
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule

class DoubleOpStruct:
    # mem_size = 8 + numpy.intp(0).nbytes
    *mem_size = 4 + numpy.intp(0).nbytes*
    def __init__(self, array, struct_arr_ptr):
        data = cuda.to_device(array)
        cuda.memcpy_htod(int(struct_arr_ptr), numpy.int32(array.size))
        #cuda.memcpy_htod(int(struct_arr_ptr) + 8, numpy.intp(int(data)))
        *cuda.memcpy_htod(int(struct_arr_ptr) + 4, numpy.intp(int(data)))*



struct_arr = cuda.mem_alloc(2 * DoubleOpStruct.mem_size)
do2_ptr = int(struct_arr) + DoubleOpStruct.mem_size

array1 = DoubleOpStruct(numpy.array([1, 2, 3], dtype=numpy.float32),
                        struct_arr)
array2 = DoubleOpStruct(numpy.array([0, 4], dtype=numpy.float32),
                        do2_ptr)

with open('test.cu', 'r') as f:
        src  = f.read()

mod = SourceModule(src)
func = mod.get_function("test")
func(struct_arr, block = (1, 1, 1), grid=(2, 1))
_______________________________________________
PyCUDA mailing list
PyCUDA@tiker.net
http://lists.tiker.net/listinfo/pycuda

Reply via email to