[SYSTEMML-540] Support sigmoid function on GPU

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3ca91e68
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3ca91e68
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3ca91e68

Branch: refs/heads/master
Commit: 3ca91e68c3a2f68b6d4c77b154d004244377fb1d
Parents: 6b877b0
Author: Niketan Pansare <npan...@us.ibm.com>
Authored: Sun Dec 3 09:32:44 2017 -0800
Committer: Niketan Pansare <npan...@us.ibm.com>
Committed: Sun Dec 3 10:46:24 2017 -0800

----------------------------------------------------------------------
 src/main/cpp/kernels/SystemML.cu                |   24 +
 src/main/cpp/kernels/SystemML.ptx               | 3008 ++++++++++--------
 .../java/org/apache/sysml/hops/UnaryOp.java     |    1 +
 .../instructions/GPUInstructionParser.java      |    1 +
 .../instructions/gpu/GPUInstruction.java        |    1 +
 .../gpu/MatrixBuiltinGPUInstruction.java        |    2 +
 .../runtime/matrix/data/LibMatrixCUDA.java      |   17 +
 7 files changed, 1673 insertions(+), 1381 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca91e68/src/main/cpp/kernels/SystemML.cu
----------------------------------------------------------------------
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index ade2dd1..8eac454 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -1743,3 +1743,27 @@ extern "C" __global__ void matrix_sign_f(float *A, float 
*C,
                                          unsigned int size) {
   matrix_sign(A, C, size);
 }
+
+/**
+ * Do an sigmoid over all the elements of a matrix
+ * @param A the input matrix (of length = size)
+ * @param C the pre-allocated output matrix (of length = size)
+ * @param siz the length of the input and output matrices
+ */
+template <typename T>
+__device__ void matrix_sigmoid(T *A, T *C, unsigned int size) {
+  int index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (index < size) {
+    C[index] = 0.5 * tanh(0.5 * A[index]) + 0.5;
+  }
+}
+
+extern "C" __global__ void matrix_sigmoid_d(double *A, double *C,
+                                         unsigned int size) {
+  matrix_sigmoid(A, C, size);
+}
+
+extern "C" __global__ void matrix_sigmoid_f(float *A, float *C,
+                                         unsigned int size) {
+  matrix_sigmoid(A, C, size);
+}

Reply via email to