[PyOpenCL] Segfault after copying to host

Sebastian Hoffmann Tue, 11 Feb 2014 06:57:35 -0800

Hi.

After fiddling with my (rudiments of a) particle simulation further, I ran
into some interesting bugs. Here's what the code is supposed to do:
* Set up an array named position of shape(number of particles, 3)
* Set up an array result with (smallest number >= number of particles that
is a multiple of the work group size, 3)
* Run the kernel which, for every global_id < number of particles
calculates the average of the position of every other particle and writes
it into the result buffer, and the zero vector for every other global_id
* Copy the result buffer into the result array
* Print the result array


What it actually *does* do is segfaulting when trying to print(result), or
printing an array mostly full of nan followed by "*** Error in `python':
free(): invalid next size (normal): 0x0000000001c6a000 ***" and a memory
map (though I think I might have just fixed that by .wait()ing for the
event returned by the kernel invocation; I assume that that means I tried
to print() the array before kernel was finished, meaning that I copied an
empty buffer into the array?).

Again, thanks in advance and for your patience.

Liebe Grüße,
Sebastian Hoffmann

#!/usr/bin/env python
"""Runs a particle simulation using OpenCL."""

import pyopencl as cl
import numpy as np
import random
from math import sqrt
import sys

# Functions for creating the initial particle data
def initial_position():
    """Returns a random equidistributed position on a radius 1.0 sphere."""
    pos_x = random.gauss(0, 1)
    pos_y = random.gauss(0, 1)
    pos_z = random.gauss(0, 1)
    distance = sqrt(pos_x**2 + pos_y**2 + pos_z**2)
    return [pos_x/distance, pos_y/distance, pos_z/distance]

# Main program
def simulate(num_particles = 10):
    """Generate a set of particles and run it through the simulation."""
    # Work group / items parameters
    print("Calculating parameters...")
    work_group_size = 256
    # work_items = "The smallest multiple of the work group size that is
    #               greater than the actually required number of work items"
    work_group_number = int(float(num_particles) / float(work_group_size))
    if work_group_number*work_group_size < num_particles:
        work_group_number += 1
    work_items = work_group_number * work_group_size
    extra_items = work_items - num_particles
    print("  %d work items (%d groups of size %d)" % (work_items,
                                                      work_group_number,
                                                      work_group_size))
    print("  %d superfluous work items" % (extra_items))
    # Setting up particles
    sys.stdout.write("Generating initial particles... ")
    position = np.empty((num_particles, 3)).astype(np.float32)
    results = np.empty((work_items, 3)).astype(np.float32)
    for i in xrange(0, num_particles):
        position[i] = initial_position()
        if (num_particles-i) % 100000 == 0:
            sys.stdout.write(str((num_particles-i)/100000) + "... ")
    sys.stdout.write("Done!\n")
    # OpenCL basics
    print("Setting up context and queue...")
    context = cl.create_some_context()
    queue = cl.CommandQueue(context)
    # OpenCL buffers
    print("Creating buffers...")
    memflag = cl.mem_flags
    position_buf = cl.Buffer(context,
                             memflag.READ_ONLY | memflag.COPY_HOST_PTR,
                             hostbuf = position)
    result_buf = cl.Buffer(context,
                           memflag.WRITE_ONLY,
                           results.nbytes)
    # The actual invocation
    print("Compiling OpenCL C code...")
    program = cl.Program(context, """
        __kernel void simulate(int num_particles,
                               __global const float3 *position,
                               __global float3 *result) {
          int wi_id = get_global_id(0);
          // The accumulator for this particles results
          float3 acc;
          // acc.xyz = (float3)(0.0, 0.0, 0.0);
          acc.s0 = 0.0;
          acc.s1 = 0.0;
          acc.s2 = 0.0;
          // Only perform calculations if this actually is a valid particle
          if (wi_id < num_particles) {
            for (int i = 0; i < num_particles; i++) {
              // Prevent a particle from interacting with itself
              if (i != wi_id) {
                acc += position[i];
              }
            }
          }
          result[wi_id] = acc / (num_particles - 1);
        }
        """).build()
    print("Running simulation...")
    execution = program.simulate(queue,
                                 (work_items,), (work_group_size,),
                                 np.int32(work_items),
                                 position_buf,
                                 result_buf)
    print(type(execution))
    execution.wait()
    # Copying the results back into the host
    print("Copying result...")
    # results = np.empty_like(position)
    print("  (target array created)")
    cl.enqueue_copy(queue, results, result_buf)
    print("Done!")
    print(results)

if __name__ == '__main__':
    if len(sys.argv) == 1:
        print("Defaulting to 25600 particles")
        simulate(25600)
    else:
        num_particles = int(sys.argv[1])
        print("Simulating %d particles" % num_particles)
        simulate (num_particles)

_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl

[PyOpenCL] Segfault after copying to host

Reply via email to