Dnia 2013-01-20, nie o godzinie 21:40 -0500, Andreas Kloeckner pisze:
> Andreas Kloeckner <[email protected]> writes:
> > I haven't yet figured out what's behind these "out of resources" errors,
> > but I'll keep poking. I'd be glad to receive clues. On the whole, I find
> > these results pretty encouraging, and I'd like to get 2013.1 out as soon
> > as I can (before I get a chance to go back and break stuff again).
> 
> I know now what they mean by "out of resources"--they mean "mem object
> allocation failure" (i.e. deferred failure to allocate global memory):
> 
> http://devgurus.amd.com/thread/160271
> 
> I've fixed a few more issues that I ran into as I was testing
> today. Cypress appears happy with test_algorithms.py now, while
> Devastator is still encountering some odd issue in the segmented scan.
> 
> I'd very much like to hear more test results from other GPUs for the
> current code in git.
> 

Loveland:
There are still some errors is scan, but the one I was fighting
does not appear on 13.1 drivers. I'll try test and play with it
tomorrow.

test_algorithm.py ................s.............F.F.F.....F...F.

=================================== FAILURES
===================================
 test_copy_if[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>] 

ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f8cc20>

    @pytools.test.mark_test.opencl
    def test_copy_if(ctx_factory):
        from pytest import importorskip
        importorskip("mako")
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
        for n in scan_test_counts:
            a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
            a = a_dev.get()
    
            from pyopencl.algorithm import copy_if
    
            crit = a_dev.dtype.type(300)
            selected = a[a>crit]
            selected_dev, count_dev = copy_if(a_dev, "ary[i] > myval",
[("myval", crit)])
    
>           assert (selected_dev.get()[:count_dev.get()] ==
selected).all()
E           AttributeError: 'bool' object has no attribute 'all'

test_algorithm.py:552: AttributeError
 test_partition[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>] 

ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f8c638>

    @pytools.test.mark_test.opencl
    def test_partition(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
        for n in scan_test_counts:
            print("part", n)
    
            a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
            a = a_dev.get()
    
            crit = a_dev.dtype.type(300)
            true_host = a[a>crit]
            false_host = a[a<=crit]
    
            from pyopencl.algorithm import partition
            true_dev, false_dev, count_true_dev = partition(a_dev,
"ary[i] > myval", [("myval", crit)])
    
            count_true_dev = count_true_dev.get()
    
>           assert (true_dev.get()[:count_true_dev] == true_host).all()
E           AttributeError: 'bool' object has no attribute 'all'

test_algorithm.py:577: AttributeError
------------------------------- Captured stdout
--------------------------------
('part', 10)
('part', 255)
('part', 256)
('part', 257)
('part', 1019)
('part', 1024)
('part', 1029)
('part', 4091)
('part', 4096)
('part', 4101)
('part', 786432)
('part', 786437)
 test_unique[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>] 

ctx_factory = <pyopencl.tools.ContextFactory instance at 0x224fb48>

    @pytools.test.mark_test.opencl
    def test_unique(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
        for n in scan_test_counts:
            a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
            a = a_dev.get()
            a = np.sort(a)
            a_dev = cl_array.to_device(queue, a)
    
            a_unique_host = np.unique(a)
    
            from pyopencl.algorithm import unique
            a_unique_dev, count_unique_dev = unique(a_dev)
    
            count_unique_dev = count_unique_dev.get()
    
>           assert (a_unique_dev.get()[:count_unique_dev] ==
a_unique_host).all()
E           AttributeError: 'bool' object has no attribute 'all'

test_algorithm.py:599: AttributeError
 test_sort[ctx_factory=<context factory for <pyopencl.Device 'Loveland'
on 'AMD Accelerated Parallel Processing' at 0x231be20>>] 

ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f905f0>

    @pytools.test.mark_test.opencl
    def test_sort(ctx_factory):
        from pytest import importorskip
        importorskip("mako")
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        dtype = np.int32
    
        from pyopencl.algorithm import RadixSort
        sort = RadixSort(context, "int *ary", key_expr="ary[i]",
                sort_arg_names=["ary"])
    
        from pyopencl.clrandom import RanluxGenerator
        rng = RanluxGenerator(queue, seed=15)
    
        from time import time
    
        # intermediate arrays for largest size cause out-of-memory on
low-end GPUs
        for n in scan_test_counts[:-1]:
            print(n)
    
            print("  rng")
            a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2**16)
            a = a_dev.get()
    
            dev_start = time()
            print("  device")
            a_dev_sorted, = sort(a_dev, key_bits=16)
            queue.finish()
            dev_end = time()
            print("  numpy")
            a_sorted = np.sort(a)
            numpy_end = time()
    
            numpy_elapsed = numpy_end-dev_end
            dev_elapsed = dev_end-dev_start
            print ("  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio:
%.2fx" % (
                    1e-6*n/dev_elapsed, 1e-6*n/numpy_elapsed,
numpy_elapsed/dev_elapsed))
>           assert (a_dev_sorted.get() == a_sorted).all()
E           assert <built-in method all of numpy.ndarray object at
0x7fa3e01e1780>()
E            +  where <built-in method all of numpy.ndarray object at
0x7fa3e01e1780> = array([   36,    47,    56, ..., 65467, 65521, 65535],
dtype=int32) == array([   22,    32,    36, ..., 65419, 65423, 65529],
dtype=int32).all
E            +    where array([   36,    47,    56, ..., 65467, 65521,
65535], dtype=int32) = <bound method Array.get of array([   36,    47,
56, ..., 65467, 65521, 65535], dtype=int32)>()
E            +      where <bound method Array.get of array([   36,
47,    56, ..., 65467, 65521, 65535], dtype=int32)> = array([   36,
47,    56, ..., 65467, 65521, 65535], dtype=int32).get

test_algorithm.py:773: AssertionError
------------------------------- Captured stdout
--------------------------------
10
  rng
  device
  numpy
  dev: 0.00 MKeys/s numpy: 0.03 MKeys/s ratio: 0.00x
255
  rng
  device
  numpy
  dev: 0.02 MKeys/s numpy: 1.00 MKeys/s ratio: 0.02x
256
  rng
  device
  numpy
  dev: 0.03 MKeys/s numpy: 0.92 MKeys/s ratio: 0.03x
257
  rng
  device
  numpy
  dev: 0.03 MKeys/s numpy: 0.81 MKeys/s ratio: 0.03x
1019
  rng
  device
  numpy
  dev: 0.10 MKeys/s numpy: 2.82 MKeys/s ratio: 0.04x
1024
  rng
  device
  numpy
  dev: 0.11 MKeys/s numpy: 2.54 MKeys/s ratio: 0.04x
1029
  rng
  device
  numpy
  dev: 0.10 MKeys/s numpy: 2.63 MKeys/s ratio: 0.04x
4091
  rng
  device
  numpy
  dev: 0.25 MKeys/s numpy: 4.61 MKeys/s ratio: 0.05x
4096
  rng
  device
  numpy
  dev: 0.32 MKeys/s numpy: 3.22 MKeys/s ratio: 0.10x
4101
  rng
  device
  numpy
  dev: 0.31 MKeys/s numpy: 3.65 MKeys/s ratio: 0.09x
 test_key_value_sorter[ctx_factory=<context factory for <pyopencl.Device
'Loveland' on 'AMD Accelerated Parallel Processing' at 0x231be20>>] 

ctx_factory = <pyopencl.tools.ContextFactory instance at 0x1f853b0>

    @pytools.test.mark_test.opencl
    def test_key_value_sorter(ctx_factory):
        from pytest import importorskip
        importorskip("mako")
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        n = 10**5
        nkeys = 2000
        from pyopencl.clrandom import rand as clrand
        keys = clrand(queue, n, np.int32, b=nkeys)
        values = clrand(queue, n, np.int32, b=n).astype(np.int64)
    
        assert np.max(keys.get()) < nkeys
    
        from pyopencl.algorithm import KeyValueSorter
        kvs = KeyValueSorter(context)
        starts, lists = kvs(queue, keys, values, nkeys,
starts_dtype=np.int32)
    
        starts = starts.get()
        lists = lists.get()
    
        mydict = dict()
        for k, v in zip(keys.get(), values.get()):
            mydict.setdefault(k, []).append(v)
    
        for i in range(nkeys):
            start, end = starts[i:i+2]
>           assert sorted(mydict[i]) == sorted(lists[start:end])
E           assert [4300, 7248, ...3, 10861, ...] == []
E             Left contains more items, first extra item: 4300

test_algorithm.py:830: AssertionError
=============== 5 failed, 40 passed, 1 skipped in 290.93 seconds


-- 
Tomasz Rybak  GPG/PGP key ID: 2AD5 9860
Fingerprint A481 824E 7DD3 9C0E C40A  488E C654 FB33 2AD5 9860
http://member.acm.org/~tomaszrybak

Attachment: signature.asc
Description: This is a digitally signed message part

_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl

Reply via email to