Dnia 2013-01-20, nie o godzinie 21:40 -0500, Andreas Kloeckner pisze: > Andreas Kloeckner <[email protected]> writes: > > I haven't yet figured out what's behind these "out of resources" errors, > > but I'll keep poking. I'd be glad to receive clues. On the whole, I find > > these results pretty encouraging, and I'd like to get 2013.1 out as soon > > as I can (before I get a chance to go back and break stuff again). > > I know now what they mean by "out of resources"--they mean "mem object > allocation failure" (i.e. deferred failure to allocate global memory): > > http://devgurus.amd.com/thread/160271 > > I've fixed a few more issues that I ran into as I was testing > today. Cypress appears happy with test_algorithms.py now, while > Devastator is still encountering some odd issue in the segmented scan. > > I'd very much like to hear more test results from other GPUs for the > current code in git. >
Loveland:
There are still some errors is scan, but the one I was fighting
does not appear on 13.1 drivers. I'll try test and play with it
tomorrow.
test_algorithm.py ................s.............F.F.F.....F...F.
=================================== FAILURES
===================================
test_copy_if[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>]
ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f8cc20>
@pytools.test.mark_test.opencl
def test_copy_if(ctx_factory):
from pytest import importorskip
importorskip("mako")
context = ctx_factory()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
for n in scan_test_counts:
a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
a = a_dev.get()
from pyopencl.algorithm import copy_if
crit = a_dev.dtype.type(300)
selected = a[a>crit]
selected_dev, count_dev = copy_if(a_dev, "ary[i] > myval",
[("myval", crit)])
> assert (selected_dev.get()[:count_dev.get()] ==
selected).all()
E AttributeError: 'bool' object has no attribute 'all'
test_algorithm.py:552: AttributeError
test_partition[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>]
ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f8c638>
@pytools.test.mark_test.opencl
def test_partition(ctx_factory):
context = ctx_factory()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
for n in scan_test_counts:
print("part", n)
a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
a = a_dev.get()
crit = a_dev.dtype.type(300)
true_host = a[a>crit]
false_host = a[a<=crit]
from pyopencl.algorithm import partition
true_dev, false_dev, count_true_dev = partition(a_dev,
"ary[i] > myval", [("myval", crit)])
count_true_dev = count_true_dev.get()
> assert (true_dev.get()[:count_true_dev] == true_host).all()
E AttributeError: 'bool' object has no attribute 'all'
test_algorithm.py:577: AttributeError
------------------------------- Captured stdout
--------------------------------
('part', 10)
('part', 255)
('part', 256)
('part', 257)
('part', 1019)
('part', 1024)
('part', 1029)
('part', 4091)
('part', 4096)
('part', 4101)
('part', 786432)
('part', 786437)
test_unique[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>]
ctx_factory = <pyopencl.tools.ContextFactory instance at 0x224fb48>
@pytools.test.mark_test.opencl
def test_unique(ctx_factory):
context = ctx_factory()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
for n in scan_test_counts:
a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
a = a_dev.get()
a = np.sort(a)
a_dev = cl_array.to_device(queue, a)
a_unique_host = np.unique(a)
from pyopencl.algorithm import unique
a_unique_dev, count_unique_dev = unique(a_dev)
count_unique_dev = count_unique_dev.get()
> assert (a_unique_dev.get()[:count_unique_dev] ==
a_unique_host).all()
E AttributeError: 'bool' object has no attribute 'all'
test_algorithm.py:599: AttributeError
test_sort[ctx_factory=<context factory for <pyopencl.Device 'Loveland'
on 'AMD Accelerated Parallel Processing' at 0x231be20>>]
ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f905f0>
@pytools.test.mark_test.opencl
def test_sort(ctx_factory):
from pytest import importorskip
importorskip("mako")
context = ctx_factory()
queue = cl.CommandQueue(context)
dtype = np.int32
from pyopencl.algorithm import RadixSort
sort = RadixSort(context, "int *ary", key_expr="ary[i]",
sort_arg_names=["ary"])
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=15)
from time import time
# intermediate arrays for largest size cause out-of-memory on
low-end GPUs
for n in scan_test_counts[:-1]:
print(n)
print(" rng")
a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2**16)
a = a_dev.get()
dev_start = time()
print(" device")
a_dev_sorted, = sort(a_dev, key_bits=16)
queue.finish()
dev_end = time()
print(" numpy")
a_sorted = np.sort(a)
numpy_end = time()
numpy_elapsed = numpy_end-dev_end
dev_elapsed = dev_end-dev_start
print (" dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio:
%.2fx" % (
1e-6*n/dev_elapsed, 1e-6*n/numpy_elapsed,
numpy_elapsed/dev_elapsed))
> assert (a_dev_sorted.get() == a_sorted).all()
E assert <built-in method all of numpy.ndarray object at
0x7fa3e01e1780>()
E + where <built-in method all of numpy.ndarray object at
0x7fa3e01e1780> = array([ 36, 47, 56, ..., 65467, 65521, 65535],
dtype=int32) == array([ 22, 32, 36, ..., 65419, 65423, 65529],
dtype=int32).all
E + where array([ 36, 47, 56, ..., 65467, 65521,
65535], dtype=int32) = <bound method Array.get of array([ 36, 47,
56, ..., 65467, 65521, 65535], dtype=int32)>()
E + where <bound method Array.get of array([ 36,
47, 56, ..., 65467, 65521, 65535], dtype=int32)> = array([ 36,
47, 56, ..., 65467, 65521, 65535], dtype=int32).get
test_algorithm.py:773: AssertionError
------------------------------- Captured stdout
--------------------------------
10
rng
device
numpy
dev: 0.00 MKeys/s numpy: 0.03 MKeys/s ratio: 0.00x
255
rng
device
numpy
dev: 0.02 MKeys/s numpy: 1.00 MKeys/s ratio: 0.02x
256
rng
device
numpy
dev: 0.03 MKeys/s numpy: 0.92 MKeys/s ratio: 0.03x
257
rng
device
numpy
dev: 0.03 MKeys/s numpy: 0.81 MKeys/s ratio: 0.03x
1019
rng
device
numpy
dev: 0.10 MKeys/s numpy: 2.82 MKeys/s ratio: 0.04x
1024
rng
device
numpy
dev: 0.11 MKeys/s numpy: 2.54 MKeys/s ratio: 0.04x
1029
rng
device
numpy
dev: 0.10 MKeys/s numpy: 2.63 MKeys/s ratio: 0.04x
4091
rng
device
numpy
dev: 0.25 MKeys/s numpy: 4.61 MKeys/s ratio: 0.05x
4096
rng
device
numpy
dev: 0.32 MKeys/s numpy: 3.22 MKeys/s ratio: 0.10x
4101
rng
device
numpy
dev: 0.31 MKeys/s numpy: 3.65 MKeys/s ratio: 0.09x
test_key_value_sorter[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>]
ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f853b0>
@pytools.test.mark_test.opencl
def test_key_value_sorter(ctx_factory):
from pytest import importorskip
importorskip("mako")
context = ctx_factory()
queue = cl.CommandQueue(context)
n = 10**5
nkeys = 2000
from pyopencl.clrandom import rand as clrand
keys = clrand(queue, n, np.int32, b=nkeys)
values = clrand(queue, n, np.int32, b=n).astype(np.int64)
assert np.max(keys.get()) < nkeys
from pyopencl.algorithm import KeyValueSorter
kvs = KeyValueSorter(context)
starts, lists = kvs(queue, keys, values, nkeys,
starts_dtype=np.int32)
starts = starts.get()
lists = lists.get()
mydict = dict()
for k, v in zip(keys.get(), values.get()):
mydict.setdefault(k, []).append(v)
for i in range(nkeys):
start, end = starts[i:i+2]
> assert sorted(mydict[i]) == sorted(lists[start:end])
E assert [4300, 7248, ...3, 10861, ...] == []
E Left contains more items, first extra item: 4300
test_algorithm.py:830: AssertionError
=============== 5 failed, 40 passed, 1 skipped in 290.93 seconds
--
Tomasz Rybak GPG/PGP key ID: 2AD5 9860
Fingerprint A481 824E 7DD3 9C0E C40A 488E C654 FB33 2AD5 9860
http://member.acm.org/~tomaszrybak
signature.asc
Description: This is a digitally signed message part
_______________________________________________ PyOpenCL mailing list [email protected] http://lists.tiker.net/listinfo/pyopencl
