Re: [PyCUDA] Python 3 Compilation

Tomasz Rybak Thu, 27 Sep 2012 11:52:27 -0700

Dnia 2012-09-27, czw o godzinie 19:27 +0200, Tomasz Rybak pisze:
> Dnia 2012-09-27, czw o godzinie 15:23 +0100, Freddie Witherden pisze:
> > Hello,
> > 
> > While attempting to compile PyCUDA under Python 3:
> > 
> [ cut ]
> > Also, are there any other potential issues with PyCUDA and Python 3.x 
> > that I should be aware of?
> > 
> 
> I was able to build Debian package python3-pycuda.
> Version from git compiles without any problems.
> I have not uploaded it into Debian though, as there
> are problems with compiling kernels under Python 3,
> and I was not able to fix it yet.
>


When I try to run tests (e.g. python3 test_cumath.py) I get
TypeError: Type str doesn't support the buffer API 20 times,
for example:
___________________________________ test_exp
___________________________________

    def f(*args, **kwargs):
        import pycuda.driver
        # appears to be idempotent, i.e. no harm in calling it more than
once
        pycuda.driver.init()
    
        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(),
tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

/usr/lib/python3/dist-packages/pycuda/tools.py:432: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

    def test():
        gpu_func = getattr(cumath, name)
        cpu_func = getattr(np, numpy_func_names.get(name, name))
    
        for s in sizes:
            for dtype in dtypes:
>               args = gpuarray.arange(a, b, (b-a)/s, dtype=np.float32)

test_cumath.py:44: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

    def arange(*args, **kwargs):
        """Create an array filled with numbers spaced `step` apart,
        starting from `start` and ending at `stop`.
    
        For floating point arguments, the length of the result is
        `ceil((stop - start)/step)`.  This rule may result in the last
        element of the result being greater than stop.
        """
    
        # argument processing
-----------------------------------------------------
    
        # Yuck. Thanks, numpy developers. ;)
        from pytools import Record
        class Info(Record):
            pass
    
        explicit_dtype = False
    
        inf = Info()
        inf.start = None
        inf.stop = None
        inf.step = None
        inf.dtype = None
    
        if isinstance(args[-1], np.dtype):
            dtype = args[-1]
            args = args[:-1]
            explicit_dtype = True
    
        argc = len(args)
        if argc == 0:
            raise ValueError("stop argument required")
        elif argc == 1:
            inf.stop = args[0]
        elif argc == 2:
            inf.start = args[0]
            inf.stop = args[1]
        elif argc == 3:
            inf.start = args[0]
            inf.stop = args[1]
            inf.step = args[2]
        else:
            raise ValueError("too many arguments")
    
        admissible_names = ["start", "stop", "step", "dtype"]
        for k, v in kwargs.items():
            if k in admissible_names:
                if getattr(inf, k) is None:
                    setattr(inf, k, v)
                    if k == "dtype":
                        explicit_dtype = True
                else:
                    raise ValueError("may not specify '%s' by position
and keyword" % k)
            else:
                raise ValueError("unexpected keyword argument '%s'" % k)
    
        if inf.start is None:
            inf.start = 0
        if inf.step is None:
            inf.step = 1
        if inf.dtype is None:
            inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
    
        # actual functionality
----------------------------------------------------
        dtype = np.dtype(inf.dtype)
        start = dtype.type(inf.start)
        step = dtype.type(inf.step)
        stop = dtype.type(inf.stop)
    
        if not explicit_dtype and dtype != np.float32:
            from warnings import warn
            warn("behavior change: arange guessed dtype other than
float32. "
                    "suggest specifying explicit dtype.")
    
        from math import ceil
        size = int(ceil((stop-start)/step))
    
        result = GPUArray((size,), dtype)
    
>       func = elementwise.get_arange_kernel(dtype)

/usr/lib/python3/dist-packages/pycuda/gpuarray.py:904: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

dtype = dtype('float32')

>   ???

<string>:2: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

func = <function get_arange_kernel at 0x28faa68>

    @decorator
    def context_dependent_memoize(func, *args):
        try:
            ctx_dict = func._pycuda_ctx_dep_memoize_dic
        except AttributeError:
            # FIXME: This may keep contexts alive longer than desired.
            # But I guess since the memory in them is freed, who cares.
            ctx_dict = func._pycuda_ctx_dep_memoize_dic = {}
    
        cur_ctx = cuda.Context.get_current()
    
        try:
            return ctx_dict[cur_ctx][args]
        except KeyError:
            context_dependent_memoized_functions.append(func)
            arg_dict = ctx_dict.setdefault(cur_ctx, {})
>           result = func(*args)

/usr/lib/python3/dist-packages/pycuda/tools.py:402: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

dtype = dtype('float32')

    @context_dependent_memoize
    def get_arange_kernel(dtype):
        return get_elwise_kernel(
                "%(tp)s *z, %(tp)s start, %(tp)s step" % {
                    "tp": dtype_to_ctype(dtype),
                    },
                "z[i] = start + i*step",
>               "arange")

/usr/lib/python3/dist-packages/pycuda/elementwise.py:534: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

arguments = 'float *z, float start, float step'
operation = 'z[i] = start + i*step', name = 'arange', keep = False
options = None

    def get_elwise_kernel(arguments, operation,
            name="kernel", keep=False, options=None, **kwargs):
        """Return a L{pycuda.driver.Function} that performs the same
scalar operation
        on one or several vectors.
        """
        func, arguments = get_elwise_kernel_and_types(
>               arguments, operation, name, keep, options, **kwargs)

/usr/lib/python3/dist-packages/pycuda/elementwise.py:165: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

arguments = [VectorArg('z', float32), ScalarArg('start', float32),
ScalarArg('step', float32), ScalarArg('n', uint64)]
operation = 'z[i] = start + i*step', name = 'arange', keep = False
options = None, use_range = False

    def get_elwise_kernel_and_types(arguments, operation,
            name="kernel", keep=False, options=None, use_range=False,
**kwargs):
        if isinstance(arguments, str):
            from pycuda.tools import parse_c_arg
            arguments = [parse_c_arg(arg) for arg in
arguments.split(",")]
    
        if use_range:
            arguments.extend([
                ScalarArg(np.intp, "start"),
                ScalarArg(np.intp, "stop"),
                ScalarArg(np.intp, "step"),
                ])
        else:
            arguments.append(ScalarArg(np.uintp, "n"))
    
        if use_range:
            module_builder = get_elwise_range_module
        else:
            module_builder = get_elwise_module
    
        mod = module_builder(arguments, operation, name,
>               keep, options, **kwargs)

/usr/lib/python3/dist-packages/pycuda/elementwise.py:151: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

arguments = [VectorArg('z', float32), ScalarArg('start', float32),
ScalarArg('step', float32), ScalarArg('n', uint64)]
operation = 'z[i] = start + i*step', name = 'arange', keep = False
options = None, preamble = '', loop_prep = '', after_loop = ''

    def get_elwise_module(arguments, operation,
            name="kernel", keep=False, options=None,
            preamble="", loop_prep="", after_loop=""):
        from pycuda.compiler import SourceModule
        return SourceModule("""
            #include <pycuda-complex.hpp>
    
            %(preamble)s
    
            __global__ void %(name)s(%(arguments)s)
            {
    
              unsigned tid = threadIdx.x;
              unsigned total_threads = gridDim.x*blockDim.x;
              unsigned cta_start = blockDim.x*blockIdx.x;
              unsigned i;
    
              %(loop_prep)s;
    
              for (i = cta_start + tid; i < n; i += total_threads)
              {
                %(operation)s;
              }
    
              %(after_loop)s;
            }
            """ % {
                "arguments": ", ".join(arg.declarator() for arg in
arguments),
                "operation": operation,
                "name": name,
                "preamble": preamble,
                "loop_prep": loop_prep,
                "after_loop": after_loop,
                },
>           options=options, keep=keep)
    
    
    
    
    def get_elwise_range_module(arguments, operation,
            name="kernel", keep=False, options=None,
            preamble="", loop_prep="", after_loop=""):

/usr/lib/python3/dist-packages/pycuda/elementwise.py:75: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

self = <pycuda.compiler.SourceModule object at 0x2efb210>
source = '\n        #include <pycuda-complex.hpp>\n\n        \n\n
__global__ void arange(float *z, float start, float st... i +=
total_threads)\n          {\n            z[i] = start + i*step;
\n          }\n\n          ;\n        }\n        '
nvcc = 'nvcc', options = None, keep = False, no_extern_c = False, arch =
None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=None, keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        self._check_arch(arch)
    
        cubin = compile(source, nvcc, options, keep, no_extern_c,
>               arch, code, cache_dir, include_dirs)

/usr/lib/python3/dist-packages/pycuda/compiler.py:282: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

source = 'extern "C" {\n\n        #include <pycuda-complex.hpp>\n\n
\n\n        __global__ void arange(float *z, float s... total_threads)\n
{\n            z[i] = start + i*step;\n          }\n\n          ;
\n        }\n        \n}\n'
nvcc = 'nvcc'
options = ['-arch', 'sm_21',
'-I/usr/lib/python3/dist-packages/pycuda/../../../../include/pycuda']
keep = False, no_extern_c = False, arch = 'sm_21', code = None
cache_dir = '/tmp/pycuda-compiler-cache-v1-uid1000'
include_dirs =
['/usr/lib/python3/dist-packages/pycuda/../../../../include/pycuda']

    def compile(source, nvcc="nvcc", options=None, keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
    
        if not no_extern_c:
            source = 'extern "C" {\n%s\n}\n' % source
    
        if options is None:
            options = DEFAULT_NVCC_FLAGS
    
        options = options[:]
        if arch is None:
            try:
                from pycuda.driver import Context
                arch = "sm_%d%d" %
Context.get_device().compute_capability()
            except RuntimeError:
                pass
    
        from pycuda.driver import CUDA_DEBUGGING
        if CUDA_DEBUGGING:
            cache_dir = False
            keep = True
            options.extend(["-g", "-G"])
    
        if cache_dir is None:
            from os.path import join
            from tempfile import gettempdir
            cache_dir = join(gettempdir(),
                    "pycuda-compiler-cache-v1-%s" %
_get_per_user_string())
    
            from os import mkdir
            try:
                mkdir(cache_dir)
            except OSError as e:
                from errno import EEXIST
                if e.errno != EEXIST:
                    raise
    
        if arch is not None:
            options.extend(["-arch", arch])
    
        if code is not None:
            options.extend(["-code", code])
    
        if 'darwin' in sys.platform and sys.maxsize ==
9223372036854775807:
            options.append('-m64')
        elif 'win32' in sys.platform and sys.maxsize ==
9223372036854775807:
            options.append('-m64')
        elif 'win32' in sys.platform and sys.maxsize == 2147483647:
            options.append('-m32')
    
    
        include_dirs = include_dirs + [_find_pycuda_include_path()]
    
        for i in include_dirs:
            options.append("-I"+i)
    
>       return compile_plain(source, options, keep, nvcc, cache_dir)

/usr/lib/python3/dist-packages/pycuda/compiler.py:272: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ 

source = 'extern "C" {\n\n        #include <pycuda-complex.hpp>\n\n
\n\n        __global__ void arange(float *z, float s... total_threads)\n
{\n            z[i] = start + i*step;\n          }\n\n          ;
\n        }\n        \n}\n'
options = ['-arch', 'sm_21',
'-I/usr/lib/python3/dist-packages/pycuda/../../../../include/pycuda']
keep = False, nvcc = 'nvcc', cache_dir =
'/tmp/pycuda-compiler-cache-v1-uid1000'

    def compile_plain(source, options, keep, nvcc, cache_dir):
        from os.path import join
    
        if cache_dir:
            checksum = _new_md5()
    
            if '#include' in source:
                checksum.update(preprocess_source(source, options,
nvcc))
            else:
                checksum.update(source.encode("utf-8"))
    
            for option in options:
                checksum.update(option.encode("utf-8"))
            checksum.update(get_nvcc_version(nvcc))
            from pycuda.characterize import platform_bits
            checksum.update(str(platform_bits()).encode("utf-8"))
    
            cache_file = checksum.hexdigest()
            cache_path = join(cache_dir, cache_file + ".cubin")
    
            try:
                return open(cache_path, "rb").read()
            except:
                pass
    
        from tempfile import mkdtemp
        file_dir = mkdtemp()
        file_root = "kernel"
    
        cu_file_name = file_root + ".cu"
        cu_file_path = join(file_dir, cu_file_name)
    
        outf = open(cu_file_path, "w")
        outf.write(str(source))
        outf.close()
    
        if keep:
            options = options[:]
            options.append("--keep")
    
            print("*** compiler output in %s" % file_dir)
    
        cmdline = [nvcc, "--cubin"] + options + [cu_file_name]
        result, stdout, stderr = call_capture_output(cmdline,
cwd=file_dir, error_on_nonzero=False)
    
        try:
            cubin_f = open(join(file_dir, file_root + ".cubin"), "rb")
        except IOError:
            no_output = True
        else:
            no_output = False
    
        if result != 0 or (no_output and (stdout or stderr)):
            if result == 0:
                from warnings import warn
                warn("PyCUDA: nvcc exited with status 0, but appears to
have "
                        "encountered an error")
            from pycuda.driver import CompileError
            raise CompileError("nvcc compilation of %s failed" %
cu_file_path,
                    cmdline, stdout=stdout, stderr=stderr)
    
        if stdout or stderr:
            lcase_err_text = (stdout+stderr).lower()
            from warnings import warn
>           if "demoted" in lcase_err_text or "demoting" in
lcase_err_text:
E   TypeError: Type str doesn't support the buffer API

/usr/lib/python3/dist-packages/pycuda/compiler.py:134: TypeError


-- 
Tomasz Rybak  GPG/PGP key ID: 2AD5 9860
Fingerprint A481 824E 7DD3 9C0E C40A  488E C654 FB33 2AD5 9860
http://member.acm.org/~tomaszrybak

signature.asc
Description: This is a digitally signed message part

_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda

Re: [PyCUDA] Python 3 Compilation

Reply via email to