This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new d9e4f213fa [SYSTEMDS-3636] Ultra-sparse tsmm right w/ multi-threaded 
transpose
d9e4f213fa is described below

commit d9e4f213fa41e6698a23b07d4faebcc5f32f1bc0
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Oct 31 15:22:37 2023 +0100

    [SYSTEMDS-3636] Ultra-sparse tsmm right w/ multi-threaded transpose
    
    Further improvement of ultra-sparse tsmm right: w/ multi-threaded
    transpose. On a scenario of 10 times G %*% t(G) with G being
    germany_osm, the runtime changes as follows
    
    OLD:
    matrix mult: 13.346214013s
    matrix mult: 5.498598342s
    matrix mult: 5.11548485s
    matrix mult: 5.573473983s
    matrix mult: 5.673529942s
    matrix mult: 6.08607291s
    matrix mult: 6.244303553s
    matrix mult: 6.422722927s
    matrix mult: 4.995632087s
    matrix mult: 9.085500786s
    
    SystemDS Statistics:
    Total elapsed time:             71.007 sec.
    Total compilation time:         0.792 sec.
    Total execution time:           70.215 sec.
    Cache hits (Mem/Li/WB/FS/HDFS): 11/0/0/0/1.
    Cache writes (Li/WB/FS/HDFS):   0/11/0/0.
    Cache times (ACQr/m, RLS, EXP): 4.696/0.000/9.415/0.000 sec.
    HOP DAGs recompiled (PRED, SB): 0/0.
    HOP DAGs recompile time:        0.000 sec.
    Total JIT compile time:         5.987 sec.
    Total JVM GC count:             1.
    Total JVM GC time:              0.059 sec.
    Heavy hitter instructions:
      1  tsmm          68.039     10
      2  !=             1.577      1
      3  uak+           0.585      1
      4  +              0.027     22
      5  print          0.008     12
      6  mvvar          0.001     31
      7  createvar      0.001     12
      8  rmvar          0.000     45
      9  time           0.000     20
     10  -              0.000     10
    
    NEW
    
    matrix mult: 12.17142539s
    matrix mult: 5.063393773s
    matrix mult: 4.764698928s
    matrix mult: 4.771695393s
    matrix mult: 5.434539822s
    matrix mult: 4.640708695s
    matrix mult: 4.967180443s
    matrix mult: 5.156199379s
    matrix mult: 5.472330144s
    matrix mult: 5.310449401s
    
    SystemDS Statistics:
    Total elapsed time:             60.405 sec.
    Total compilation time:         0.880 sec.
    Total execution time:           59.525 sec.
    Cache hits (Mem/Li/WB/FS/HDFS): 11/0/0/0/1.
    Cache writes (Li/WB/FS/HDFS):   0/11/0/0.
    Cache times (ACQr/m, RLS, EXP): 4.223/0.000/8.626/0.000 sec.
    HOP DAGs recompiled (PRED, SB): 0/0.
    HOP DAGs recompile time:        0.000 sec.
    Total JIT compile time:         6.956 sec.
    Total JVM GC count:             1.
    Total JVM GC time:              0.062 sec.
    Heavy hitter instructions:
      1  tsmm          57.751     10
      2  !=             1.296      1
      3  uak+           0.465      1
      4  +              0.029     22
      5  print          0.008     12
      6  mvvar          0.001     31
      7  createvar      0.001     12
      8  rmvar          0.000     45
      9  time           0.000     20
     10  -              0.000     10
---
 src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
index 3df09cbc61..98b3eaa1bb 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
@@ -480,7 +480,7 @@ public class LibMatrixMult
                ret.sparse = isSparseOutputTSMM(m1, leftTranspose);
                ret.allocateBlock();
                MatrixBlock m1t = isSparseOutputTSMM(m1, leftTranspose, true) ?
-                       LibMatrixReorg.transpose(m1) : null;
+                       LibMatrixReorg.transpose(m1, k) : null;
                
                //core multi-threaded matrix mult computation
                ExecutorService pool = CommonThreadPool.get(k);

Reply via email to