This is an automated email from the ASF dual-hosted git repository.

markd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 3faacac  [MINOR] Compilation issue with CUDA codegen's vectCountnnz() 
function fixed.
3faacac is described below

commit 3faacace2cc766c1684d3818dba0aca5a9b7781c
Author: Mark Dokter <[email protected]>
AuthorDate: Tue Apr 13 00:35:06 2021 +0200

    [MINOR] Compilation issue with CUDA codegen's vectCountnnz() function fixed.
---
 src/main/cuda/headers/operators.cuh   | 11 +++++++++++
 src/main/cuda/headers/spoof_utils.cuh |  5 ++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/main/cuda/headers/operators.cuh 
b/src/main/cuda/headers/operators.cuh
index c88a19d..a48c990 100644
--- a/src/main/cuda/headers/operators.cuh
+++ b/src/main/cuda/headers/operators.cuh
@@ -107,6 +107,17 @@ struct NotEqualOp {
 };
 
 template<typename T>
+struct NotZero {
+       __device__  __forceinline__ T operator()(T a, T b) const {
+               return (a != 0) ? 1.0 : 0.0;
+       }
+       
+       __device__  __forceinline__ static T exec(T a, T b) {
+               return (a != 0) ? 1.0 : 0.0;
+       }
+};
+
+template<typename T>
 struct XorOp {
        __device__  __forceinline__ static T exec(T a, T b) {
                return (a != 0.0) != (b != 0.0) ? 1.0 : 0.0;
diff --git a/src/main/cuda/headers/spoof_utils.cuh 
b/src/main/cuda/headers/spoof_utils.cuh
index 9bcaef5..5d9b101 100644
--- a/src/main/cuda/headers/spoof_utils.cuh
+++ b/src/main/cuda/headers/spoof_utils.cuh
@@ -693,9 +693,8 @@ Vector<T>& vectPow2Write(T* avals, uint32_t* aix, uint32_t 
ai, uint32_t alen, ui
 template<typename T>
 T vectCountnnz(T* a, uint32_t ai, uint32_t len) {
        SumOp<T> agg_op;
-       NotEqualOp<T> load_op;
-       T result = BLOCK_ROW_AGG(&a[ai], &a[ai], len, agg_op, load_op);
-       return result;
+       NotZero<T> load_op;
+       return BLOCK_ROW_AGG(&a[ai], &a[ai], len, agg_op, load_op);
 }
 
 template<typename T>

Reply via email to