This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 045f9e0f62f5f9df0d997d8bbd8496eea5d141f4
Author: Matthias Boehm <[email protected]>
AuthorDate: Wed Apr 3 19:50:35 2024 +0200

    [SYSTEMDS-3691] Multi-threaded dot-product matrix multiplication
    
    So far, dense dot-product where always executed in a single-threaded
    manner despite going through the multi-threaded code path because
    only a single task was created (single row in lhs matrix). We now
    use the existing logic for parallelizing over the common dimension
    instead.
    
    For pageRank on the europe_osm road network graph, the involved
    dot product (1x50912018 mmult 50912018x1) improved from ~50ms to 7ms
    on a machine with 24 pcores / 48 vcores.
---
 .../java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
index 780afdad67..d71b6d479f 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
@@ -1019,7 +1019,10 @@ public class LibMatrixMult
                        if( m==1 && n==1 ) {            //DOT PRODUCT
                                double[] avals = a.valuesAt(0);
                                double[] bvals = b.valuesAt(0);
-                               c.set(0, 0, dotProduct(avals, bvals, cd));
+                               if( ru > m ) //pm2r - parallelize over common 
dim
+                                       c.set(0, 0, dotProduct(avals, bvals, 
rl, rl, ru-rl));
+                               else
+                                       c.set(0, 0, dotProduct(avals, bvals, 
cd));
                        }
                        else if( n>1 && cd == 1 ) {     //OUTER PRODUCT
                                double[] avals = a.valuesAt(0);
@@ -4460,8 +4463,8 @@ public class LibMatrixMult
        private static boolean checkParMatrixMultRightInputRows( MatrixBlock 
m1, MatrixBlock m2, int k ) {
                //parallelize over rows in rhs matrix if number of rows in 
lhs/output is very small
                double jvmMem = InfrastructureAnalyzer.getLocalMaxMemory();
-               return (m1.rlen==1 && LOW_LEVEL_OPTIMIZATION && m2.clen>1 && 
!(m1.isUltraSparse()||m2.isUltraSparse()))
-                       || (m1.rlen<=16 && LOW_LEVEL_OPTIMIZATION && m2.clen>1 
&& m2.rlen > m1.rlen 
+               return (m1.rlen==1 && LOW_LEVEL_OPTIMIZATION && 
!(m1.isUltraSparse()||m2.isUltraSparse()))
+                       || (m1.rlen<=16 && LOW_LEVEL_OPTIMIZATION && m2.rlen > 
m1.rlen 
                           && ( !m1.isUltraSparse() && !(m1.sparse & m2.sparse) 
) //dense-dense / sparse-dense / dense-sparse
                           && (long)k * 8 * m1.rlen * m2.clen < 
Math.max(MEM_OVERHEAD_THRESHOLD,0.01*jvmMem) );
        }

Reply via email to