Repository: systemml Updated Branches: refs/heads/master 54c52ab3c -> a22502583
[SYSTEMML-2266] Fix native BLAS integration for large dense blocks >16GB This patch fixes issues of native BLAS matrix multiply calls for large-dense blocks (inputs or outputs) which currently only access the first block and thus fails with an exception indicating incorrect results. The native BLAS call is now conditioned on contiguous blocks and otherwise we fall back to our default operations. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c1a7f855 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c1a7f855 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c1a7f855 Branch: refs/heads/master Commit: c1a7f855b605d80eabc9de833d202b55b4319639 Parents: 54c52ab Author: Matthias Boehm <mboe...@gmail.com> Authored: Fri Apr 20 19:39:27 2018 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Fri Apr 20 19:39:27 2018 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/runtime/matrix/data/LibMatrixNative.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/c1a7f855/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java index cf4501f..e122e7f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java @@ -79,9 +79,11 @@ public class LibMatrixNative return; } - if (NativeHelper.isNativeLibraryLoaded() + if( NativeHelper.isNativeLibraryLoaded() && !isMatMultMemoryBound(m1.rlen, m1.clen, m2.clen) - && !m1.isInSparseFormat() && !m2.isInSparseFormat()) + && !m1.isInSparseFormat() && !m2.isInSparseFormat() + && m1.getDenseBlock().isContiguous() && m2.getDenseBlock().isContiguous() + && 8L * ret.getLength() < Integer.MAX_VALUE ) //contiguous but not allocated { ret.sparse = false; ret.allocateDenseBlock();