Hi,
I wrapped AMD's deprecated DGEMM as a form of matrix_multiply in pyOpenCL as
follows:
python matrix_multiply_setup.py build_ext -inplace
where matrix_multiply_setup.py is
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
import numpy as np
extensions = [
Extension(name = 'matrix_multiply',
sources = ['matrix_multiply.pyx'],
include_dirs = [ "C:\\Program Files (x86)\\AMD APP
SDK\\2.9-1\\include",
"C:\\Program Files
(x86)\\AMD\\clAmdBlas\\include",
np.get_include() ],
library_dirs = ["C:\\Program Files (x86)\\AMD APP
SDK\\2.9-1\\bin\\x86_64",
"c:\\Program Files (x86)\\AMD\\clAmdBlas\\bin64"],
libraries=['clAmdBlas', 'OpenCL'])
]
extensions = cythonize(extensions)
setup(
ext_modules = extensions
)
and matrix_multiply.pyx is:
import numpy as np
cimport numpy as np
import pyopencl as cl
import pyopencl.array as cla
import pyopencl.clrandom as clr
import pyopencl.clmath as clm
from clAmdBlas cimport *
def blas_setup():
clAmdBlasSetup()
def blas_teardown():
clAmdBlasTeardown()
def matrix_multiply(A_g,B_g,C_g,queue):
(M,K)=A_g.shape
N=B_g.shape[1]
cdef cl_event event = NULL
cdef intptr_t queue_p = <intptr_t>queue.int_ptr
cdef cl_command_queue cq = <cl_command_queue>queue_p
cdef intptr_t A_g_p = A_g.data.int_ptr
cdef cl_mem bufA = <cl_mem> A_g_p
cdef intptr_t B_g_p = B_g.data.int_ptr
cdef cl_mem bufB = <cl_mem> B_g_p
cdef intptr_t C_g_p = C_g.data.int_ptr
cdef cl_mem bufC = <cl_mem> C_g_p
err =
clAmdBlasDgemm(clAmdBlasRowMajor,clAmdBlasNoTrans,clAmdBlasNoTrans,M,N,K,1.0,
bufA,K,bufB,N,0.0,bufC,N,1,&cq,0,NULL,&event)
where clAmdBlas.pxd is:
from libc.stdint cimport intptr_t, uintptr_t
cdef extern from "clAmdBlas.h":
enum:
CL_SUCCESS = 0
enum clAmdBlasStatus:
clAmdBlasSuccess = CL_SUCCESS
enum clAmdBlasOrder:
clAmdBlasRowMajor = 0
enum clAmdBlasTranspose:
clAmdBlasNoTrans = 0
ctypedef unsigned int cl_uint
ctypedef double cl_double
ctypedef void* cl_mem
ctypedef void* cl_command_queue
ctypedef void* cl_event
ctypedef void* cl_platform_id
ctypedef void* cl_device_id
ctypedef void* cl_context
clAmdBlasStatus clAmdBlasSetup( )
void clAmdBlasTeardown( )
clAmdBlasStatus clAmdBlasDgemm(clAmdBlasOrder order, clAmdBlasTranspose
transA, clAmdBlasTranspose transB,
size_t M, size_t N, size_t K, cl_double
alpha, const cl_mem A, size_t lda, const cl_mem B,
size_t ldb, cl_double beta, cl_mem C,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList, const
cl_event *eventWaitList, cl_event *events)
Once matrix_multiply.pyd is created, it can be used in a pure Python program
involving PyOpenCL for example as follows, where queue is a pyOpenCL queue:
import pyopencl.array as cla
import matrix_multiply
import numpy as np
import pyopencl as cl
A = np.ascontiguousarray(np.ones((2,2)))
B = np.ascontiguousarray(np.ones(2,2)))
bufA = cla.to_device(queue, A)
bufB = cla.to_device(queue, B)
bufC = cl.array.zeros(queue, shape=((2,2)), dtype=np.float64)
matrix_multiply.blas_setup()
matrix_multiply.matrix_multiply(bufA, bufB, bufC, queue)
matrix_multiply.blas_teardown()
Note though that clAmdBlas is deprecated in favor of clBlas on gitub:
https://github.com/clMathLibraries/clBLAS, which doesn't have a Windows
installer, whereas clAmdBlas did.
The reason it doesn't have a Windows installer is that a process of post-build
host-based tuning which was used on clAmdBlas has been replaced by direct
access to driver properties in the compilation of clBlas. This means that each
user of the package has to compile it on their machine before they can use it,
which also means that wrappers for the package to pyOpenCL have to be built at
that time. In addition, if you have a machine with multiple OpenCL devices
(for example I have AMD and Intel OpenCL on my workstation, with the Intel CPU
chip acting as a separate platform and device), I don't know if the build is
correct and optimal for all devices and platforms on the machine at build time
or only correct and optimal for the AMD device.
Thanks,
Lars Ericson
Quantitative Analytics Consultant
Market & Institutional Risk Management
Wells Fargo Bank, N.A. | 301 S. College St., 4th Floor | Charlotte, NC
28202-6000
MAC D1053-04X
Tel 704-410-2219 | Cell 917-891-1639
[email protected]<mailto:[email protected]>
_______________________________________________
PyOpenCL mailing list
[email protected]
http://lists.tiker.net/listinfo/pyopencl