[SYSTEMML-540] Support sigmoid function on GPU Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3ca91e68 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3ca91e68 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3ca91e68
Branch: refs/heads/master Commit: 3ca91e68c3a2f68b6d4c77b154d004244377fb1d Parents: 6b877b0 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Sun Dec 3 09:32:44 2017 -0800 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Sun Dec 3 10:46:24 2017 -0800 ---------------------------------------------------------------------- src/main/cpp/kernels/SystemML.cu | 24 + src/main/cpp/kernels/SystemML.ptx | 3008 ++++++++++-------- .../java/org/apache/sysml/hops/UnaryOp.java | 1 + .../instructions/GPUInstructionParser.java | 1 + .../instructions/gpu/GPUInstruction.java | 1 + .../gpu/MatrixBuiltinGPUInstruction.java | 2 + .../runtime/matrix/data/LibMatrixCUDA.java | 17 + 7 files changed, 1673 insertions(+), 1381 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca91e68/src/main/cpp/kernels/SystemML.cu ---------------------------------------------------------------------- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index ade2dd1..8eac454 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -1743,3 +1743,27 @@ extern "C" __global__ void matrix_sign_f(float *A, float *C, unsigned int size) { matrix_sign(A, C, size); } + +/** + * Do an sigmoid over all the elements of a matrix + * @param A the input matrix (of length = size) + * @param C the pre-allocated output matrix (of length = size) + * @param siz the length of the input and output matrices + */ +template <typename T> +__device__ void matrix_sigmoid(T *A, T *C, unsigned int size) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index < size) { + C[index] = 0.5 * tanh(0.5 * A[index]) + 0.5; + } +} + +extern "C" __global__ void matrix_sigmoid_d(double *A, double *C, + unsigned int size) { + matrix_sigmoid(A, C, size); +} + +extern "C" __global__ void matrix_sigmoid_f(float *A, float *C, + unsigned int size) { + matrix_sigmoid(A, C, size); +}