Hi.
After fiddling with my (rudiments of a) particle simulation further, I ran
into some interesting bugs. Here's what the code is supposed to do:
* Set up an array named position of shape(number of particles, 3)
* Set up an array result with (smallest number >= number of particles that
is a multiple of the work group size, 3)
* Run the kernel which, for every global_id < number of particles
calculates the average of the position of every other particle and writes
it into the result buffer, and the zero vector for every other global_id
* Copy the result buffer into the result array
* Print the result array
What it actually *does* do is segfaulting when trying to print(result), or
printing an array mostly full of nan followed by "*** Error in `python':
free(): invalid next size (normal): 0x0000000001c6a000 ***" and a memory
map (though I think I might have just fixed that by .wait()ing for the
event returned by the kernel invocation; I assume that that means I tried
to print() the array before kernel was finished, meaning that I copied an
empty buffer into the array?).
Again, thanks in advance and for your patience.
Liebe Grüße,
Sebastian Hoffmann
#!/usr/bin/env python
"""Runs a particle simulation using OpenCL."""
import pyopencl as cl
import numpy as np
import random
from math import sqrt
import sys
# Functions for creating the initial particle data
def initial_position():
"""Returns a random equidistributed position on a radius 1.0 sphere."""
pos_x = random.gauss(0, 1)
pos_y = random.gauss(0, 1)
pos_z = random.gauss(0, 1)
distance = sqrt(pos_x**2 + pos_y**2 + pos_z**2)
return [pos_x/distance, pos_y/distance, pos_z/distance]
# Main program
def simulate(num_particles = 10):
"""Generate a set of particles and run it through the simulation."""
# Work group / items parameters
print("Calculating parameters...")
work_group_size = 256
# work_items = "The smallest multiple of the work group size that is
# greater than the actually required number of work items"
work_group_number = int(float(num_particles) / float(work_group_size))
if work_group_number*work_group_size < num_particles:
work_group_number += 1
work_items = work_group_number * work_group_size
extra_items = work_items - num_particles
print(" %d work items (%d groups of size %d)" % (work_items,
work_group_number,
work_group_size))
print(" %d superfluous work items" % (extra_items))
# Setting up particles
sys.stdout.write("Generating initial particles... ")
position = np.empty((num_particles, 3)).astype(np.float32)
results = np.empty((work_items, 3)).astype(np.float32)
for i in xrange(0, num_particles):
position[i] = initial_position()
if (num_particles-i) % 100000 == 0:
sys.stdout.write(str((num_particles-i)/100000) + "... ")
sys.stdout.write("Done!\n")
# OpenCL basics
print("Setting up context and queue...")
context = cl.create_some_context()
queue = cl.CommandQueue(context)
# OpenCL buffers
print("Creating buffers...")
memflag = cl.mem_flags
position_buf = cl.Buffer(context,
memflag.READ_ONLY | memflag.COPY_HOST_PTR,
hostbuf = position)
result_buf = cl.Buffer(context,
memflag.WRITE_ONLY,
results.nbytes)
# The actual invocation
print("Compiling OpenCL C code...")
program = cl.Program(context, """
__kernel void simulate(int num_particles,
__global const float3 *position,
__global float3 *result) {
int wi_id = get_global_id(0);
// The accumulator for this particles results
float3 acc;
// acc.xyz = (float3)(0.0, 0.0, 0.0);
acc.s0 = 0.0;
acc.s1 = 0.0;
acc.s2 = 0.0;
// Only perform calculations if this actually is a valid particle
if (wi_id < num_particles) {
for (int i = 0; i < num_particles; i++) {
// Prevent a particle from interacting with itself
if (i != wi_id) {
acc += position[i];
}
}
}
result[wi_id] = acc / (num_particles - 1);
}
""").build()
print("Running simulation...")
execution = program.simulate(queue,
(work_items,), (work_group_size,),
np.int32(work_items),
position_buf,
result_buf)
print(type(execution))
execution.wait()
# Copying the results back into the host
print("Copying result...")
# results = np.empty_like(position)
print(" (target array created)")
cl.enqueue_copy(queue, results, result_buf)
print("Done!")
print(results)
if __name__ == '__main__':
if len(sys.argv) == 1:
print("Defaulting to 25600 particles")
simulate(25600)
else:
num_particles = int(sys.argv[1])
print("Simulating %d particles" % num_particles)
simulate (num_particles)
_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl