This is an automated email from the ASF dual-hosted git repository.
markd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 3faacac [MINOR] Compilation issue with CUDA codegen's vectCountnnz()
function fixed.
3faacac is described below
commit 3faacace2cc766c1684d3818dba0aca5a9b7781c
Author: Mark Dokter <[email protected]>
AuthorDate: Tue Apr 13 00:35:06 2021 +0200
[MINOR] Compilation issue with CUDA codegen's vectCountnnz() function fixed.
---
src/main/cuda/headers/operators.cuh | 11 +++++++++++
src/main/cuda/headers/spoof_utils.cuh | 5 ++---
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/src/main/cuda/headers/operators.cuh
b/src/main/cuda/headers/operators.cuh
index c88a19d..a48c990 100644
--- a/src/main/cuda/headers/operators.cuh
+++ b/src/main/cuda/headers/operators.cuh
@@ -107,6 +107,17 @@ struct NotEqualOp {
};
template<typename T>
+struct NotZero {
+ __device__ __forceinline__ T operator()(T a, T b) const {
+ return (a != 0) ? 1.0 : 0.0;
+ }
+
+ __device__ __forceinline__ static T exec(T a, T b) {
+ return (a != 0) ? 1.0 : 0.0;
+ }
+};
+
+template<typename T>
struct XorOp {
__device__ __forceinline__ static T exec(T a, T b) {
return (a != 0.0) != (b != 0.0) ? 1.0 : 0.0;
diff --git a/src/main/cuda/headers/spoof_utils.cuh
b/src/main/cuda/headers/spoof_utils.cuh
index 9bcaef5..5d9b101 100644
--- a/src/main/cuda/headers/spoof_utils.cuh
+++ b/src/main/cuda/headers/spoof_utils.cuh
@@ -693,9 +693,8 @@ Vector<T>& vectPow2Write(T* avals, uint32_t* aix, uint32_t
ai, uint32_t alen, ui
template<typename T>
T vectCountnnz(T* a, uint32_t ai, uint32_t len) {
SumOp<T> agg_op;
- NotEqualOp<T> load_op;
- T result = BLOCK_ROW_AGG(&a[ai], &a[ai], len, agg_op, load_op);
- return result;
+ NotZero<T> load_op;
+ return BLOCK_ROW_AGG(&a[ai], &a[ai], len, agg_op, load_op);
}
template<typename T>