Dnia 2012-09-27, czw o godzinie 19:27 +0200, Tomasz Rybak pisze: > Dnia 2012-09-27, czw o godzinie 15:23 +0100, Freddie Witherden pisze: > > Hello, > > > > While attempting to compile PyCUDA under Python 3: > > > [ cut ] > > Also, are there any other potential issues with PyCUDA and Python 3.x > > that I should be aware of? > > > > I was able to build Debian package python3-pycuda. > Version from git compiles without any problems. > I have not uploaded it into Debian though, as there > are problems with compiling kernels under Python 3, > and I was not able to fix it yet. >
When I try to run tests (e.g. python3 test_cumath.py) I get
TypeError: Type str doesn't support the buffer API 20 times,
for example:
___________________________________ test_exp
___________________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than
once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(),
tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
/usr/lib/python3/dist-packages/pycuda/tools.py:432:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
def test():
gpu_func = getattr(cumath, name)
cpu_func = getattr(np, numpy_func_names.get(name, name))
for s in sizes:
for dtype in dtypes:
> args = gpuarray.arange(a, b, (b-a)/s, dtype=np.float32)
test_cumath.py:44:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
def arange(*args, **kwargs):
"""Create an array filled with numbers spaced `step` apart,
starting from `start` and ending at `stop`.
For floating point arguments, the length of the result is
`ceil((stop - start)/step)`. This rule may result in the last
element of the result being greater than stop.
"""
# argument processing
-----------------------------------------------------
# Yuck. Thanks, numpy developers. ;)
from pytools import Record
class Info(Record):
pass
explicit_dtype = False
inf = Info()
inf.start = None
inf.stop = None
inf.step = None
inf.dtype = None
if isinstance(args[-1], np.dtype):
dtype = args[-1]
args = args[:-1]
explicit_dtype = True
argc = len(args)
if argc == 0:
raise ValueError("stop argument required")
elif argc == 1:
inf.stop = args[0]
elif argc == 2:
inf.start = args[0]
inf.stop = args[1]
elif argc == 3:
inf.start = args[0]
inf.stop = args[1]
inf.step = args[2]
else:
raise ValueError("too many arguments")
admissible_names = ["start", "stop", "step", "dtype"]
for k, v in kwargs.items():
if k in admissible_names:
if getattr(inf, k) is None:
setattr(inf, k, v)
if k == "dtype":
explicit_dtype = True
else:
raise ValueError("may not specify '%s' by position
and keyword" % k)
else:
raise ValueError("unexpected keyword argument '%s'" % k)
if inf.start is None:
inf.start = 0
if inf.step is None:
inf.step = 1
if inf.dtype is None:
inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
# actual functionality
----------------------------------------------------
dtype = np.dtype(inf.dtype)
start = dtype.type(inf.start)
step = dtype.type(inf.step)
stop = dtype.type(inf.stop)
if not explicit_dtype and dtype != np.float32:
from warnings import warn
warn("behavior change: arange guessed dtype other than
float32. "
"suggest specifying explicit dtype.")
from math import ceil
size = int(ceil((stop-start)/step))
result = GPUArray((size,), dtype)
> func = elementwise.get_arange_kernel(dtype)
/usr/lib/python3/dist-packages/pycuda/gpuarray.py:904:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
dtype = dtype('float32')
> ???
<string>:2:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
func = <function get_arange_kernel at 0x28faa68>
@decorator
def context_dependent_memoize(func, *args):
try:
ctx_dict = func._pycuda_ctx_dep_memoize_dic
except AttributeError:
# FIXME: This may keep contexts alive longer than desired.
# But I guess since the memory in them is freed, who cares.
ctx_dict = func._pycuda_ctx_dep_memoize_dic = {}
cur_ctx = cuda.Context.get_current()
try:
return ctx_dict[cur_ctx][args]
except KeyError:
context_dependent_memoized_functions.append(func)
arg_dict = ctx_dict.setdefault(cur_ctx, {})
> result = func(*args)
/usr/lib/python3/dist-packages/pycuda/tools.py:402:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
dtype = dtype('float32')
@context_dependent_memoize
def get_arange_kernel(dtype):
return get_elwise_kernel(
"%(tp)s *z, %(tp)s start, %(tp)s step" % {
"tp": dtype_to_ctype(dtype),
},
"z[i] = start + i*step",
> "arange")
/usr/lib/python3/dist-packages/pycuda/elementwise.py:534:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
arguments = 'float *z, float start, float step'
operation = 'z[i] = start + i*step', name = 'arange', keep = False
options = None
def get_elwise_kernel(arguments, operation,
name="kernel", keep=False, options=None, **kwargs):
"""Return a L{pycuda.driver.Function} that performs the same
scalar operation
on one or several vectors.
"""
func, arguments = get_elwise_kernel_and_types(
> arguments, operation, name, keep, options, **kwargs)
/usr/lib/python3/dist-packages/pycuda/elementwise.py:165:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
arguments = [VectorArg('z', float32), ScalarArg('start', float32),
ScalarArg('step', float32), ScalarArg('n', uint64)]
operation = 'z[i] = start + i*step', name = 'arange', keep = False
options = None, use_range = False
def get_elwise_kernel_and_types(arguments, operation,
name="kernel", keep=False, options=None, use_range=False,
**kwargs):
if isinstance(arguments, str):
from pycuda.tools import parse_c_arg
arguments = [parse_c_arg(arg) for arg in
arguments.split(",")]
if use_range:
arguments.extend([
ScalarArg(np.intp, "start"),
ScalarArg(np.intp, "stop"),
ScalarArg(np.intp, "step"),
])
else:
arguments.append(ScalarArg(np.uintp, "n"))
if use_range:
module_builder = get_elwise_range_module
else:
module_builder = get_elwise_module
mod = module_builder(arguments, operation, name,
> keep, options, **kwargs)
/usr/lib/python3/dist-packages/pycuda/elementwise.py:151:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
arguments = [VectorArg('z', float32), ScalarArg('start', float32),
ScalarArg('step', float32), ScalarArg('n', uint64)]
operation = 'z[i] = start + i*step', name = 'arange', keep = False
options = None, preamble = '', loop_prep = '', after_loop = ''
def get_elwise_module(arguments, operation,
name="kernel", keep=False, options=None,
preamble="", loop_prep="", after_loop=""):
from pycuda.compiler import SourceModule
return SourceModule("""
#include <pycuda-complex.hpp>
%(preamble)s
__global__ void %(name)s(%(arguments)s)
{
unsigned tid = threadIdx.x;
unsigned total_threads = gridDim.x*blockDim.x;
unsigned cta_start = blockDim.x*blockIdx.x;
unsigned i;
%(loop_prep)s;
for (i = cta_start + tid; i < n; i += total_threads)
{
%(operation)s;
}
%(after_loop)s;
}
""" % {
"arguments": ", ".join(arg.declarator() for arg in
arguments),
"operation": operation,
"name": name,
"preamble": preamble,
"loop_prep": loop_prep,
"after_loop": after_loop,
},
> options=options, keep=keep)
def get_elwise_range_module(arguments, operation,
name="kernel", keep=False, options=None,
preamble="", loop_prep="", after_loop=""):
/usr/lib/python3/dist-packages/pycuda/elementwise.py:75:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x2efb210>
source = '\n #include <pycuda-complex.hpp>\n\n \n\n
__global__ void arange(float *z, float start, float st... i +=
total_threads)\n {\n z[i] = start + i*step;
\n }\n\n ;\n }\n '
nvcc = 'nvcc', options = None, keep = False, no_extern_c = False, arch =
None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=None, keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
self._check_arch(arch)
cubin = compile(source, nvcc, options, keep, no_extern_c,
> arch, code, cache_dir, include_dirs)
/usr/lib/python3/dist-packages/pycuda/compiler.py:282:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
source = 'extern "C" {\n\n #include <pycuda-complex.hpp>\n\n
\n\n __global__ void arange(float *z, float s... total_threads)\n
{\n z[i] = start + i*step;\n }\n\n ;
\n }\n \n}\n'
nvcc = 'nvcc'
options = ['-arch', 'sm_21',
'-I/usr/lib/python3/dist-packages/pycuda/../../../../include/pycuda']
keep = False, no_extern_c = False, arch = 'sm_21', code = None
cache_dir = '/tmp/pycuda-compiler-cache-v1-uid1000'
include_dirs =
['/usr/lib/python3/dist-packages/pycuda/../../../../include/pycuda']
def compile(source, nvcc="nvcc", options=None, keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if not no_extern_c:
source = 'extern "C" {\n%s\n}\n' % source
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if arch is None:
try:
from pycuda.driver import Context
arch = "sm_%d%d" %
Context.get_device().compute_capability()
except RuntimeError:
pass
from pycuda.driver import CUDA_DEBUGGING
if CUDA_DEBUGGING:
cache_dir = False
keep = True
options.extend(["-g", "-G"])
if cache_dir is None:
from os.path import join
from tempfile import gettempdir
cache_dir = join(gettempdir(),
"pycuda-compiler-cache-v1-%s" %
_get_per_user_string())
from os import mkdir
try:
mkdir(cache_dir)
except OSError as e:
from errno import EEXIST
if e.errno != EEXIST:
raise
if arch is not None:
options.extend(["-arch", arch])
if code is not None:
options.extend(["-code", code])
if 'darwin' in sys.platform and sys.maxsize ==
9223372036854775807:
options.append('-m64')
elif 'win32' in sys.platform and sys.maxsize ==
9223372036854775807:
options.append('-m64')
elif 'win32' in sys.platform and sys.maxsize == 2147483647:
options.append('-m32')
include_dirs = include_dirs + [_find_pycuda_include_path()]
for i in include_dirs:
options.append("-I"+i)
> return compile_plain(source, options, keep, nvcc, cache_dir)
/usr/lib/python3/dist-packages/pycuda/compiler.py:272:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
source = 'extern "C" {\n\n #include <pycuda-complex.hpp>\n\n
\n\n __global__ void arange(float *z, float s... total_threads)\n
{\n z[i] = start + i*step;\n }\n\n ;
\n }\n \n}\n'
options = ['-arch', 'sm_21',
'-I/usr/lib/python3/dist-packages/pycuda/../../../../include/pycuda']
keep = False, nvcc = 'nvcc', cache_dir =
'/tmp/pycuda-compiler-cache-v1-uid1000'
def compile_plain(source, options, keep, nvcc, cache_dir):
from os.path import join
if cache_dir:
checksum = _new_md5()
if '#include' in source:
checksum.update(preprocess_source(source, options,
nvcc))
else:
checksum.update(source.encode("utf-8"))
for option in options:
checksum.update(option.encode("utf-8"))
checksum.update(get_nvcc_version(nvcc))
from pycuda.characterize import platform_bits
checksum.update(str(platform_bits()).encode("utf-8"))
cache_file = checksum.hexdigest()
cache_path = join(cache_dir, cache_file + ".cubin")
try:
return open(cache_path, "rb").read()
except:
pass
from tempfile import mkdtemp
file_dir = mkdtemp()
file_root = "kernel"
cu_file_name = file_root + ".cu"
cu_file_path = join(file_dir, cu_file_name)
outf = open(cu_file_path, "w")
outf.write(str(source))
outf.close()
if keep:
options = options[:]
options.append("--keep")
print("*** compiler output in %s" % file_dir)
cmdline = [nvcc, "--cubin"] + options + [cu_file_name]
result, stdout, stderr = call_capture_output(cmdline,
cwd=file_dir, error_on_nonzero=False)
try:
cubin_f = open(join(file_dir, file_root + ".cubin"), "rb")
except IOError:
no_output = True
else:
no_output = False
if result != 0 or (no_output and (stdout or stderr)):
if result == 0:
from warnings import warn
warn("PyCUDA: nvcc exited with status 0, but appears to
have "
"encountered an error")
from pycuda.driver import CompileError
raise CompileError("nvcc compilation of %s failed" %
cu_file_path,
cmdline, stdout=stdout, stderr=stderr)
if stdout or stderr:
lcase_err_text = (stdout+stderr).lower()
from warnings import warn
> if "demoted" in lcase_err_text or "demoting" in
lcase_err_text:
E TypeError: Type str doesn't support the buffer API
/usr/lib/python3/dist-packages/pycuda/compiler.py:134: TypeError
--
Tomasz Rybak GPG/PGP key ID: 2AD5 9860
Fingerprint A481 824E 7DD3 9C0E C40A 488E C654 FB33 2AD5 9860
http://member.acm.org/~tomaszrybak
signature.asc
Description: This is a digitally signed message part
_______________________________________________ PyCUDA mailing list [email protected] http://lists.tiker.net/listinfo/pycuda
