This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 9eb99fa806 [MINOR] Fine-tuning multi-threading thresholds for unary 
aggregates
9eb99fa806 is described below

commit 9eb99fa8063a9d70e3ac5fdd90268ae9e855ac92
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Mar 15 18:58:18 2024 +0100

    [MINOR] Fine-tuning multi-threading thresholds for unary aggregates
    
    This patch makes the multi-threading thresholds of unary aggregates
    scalable in the number of threads, ensuring tasks are large enough
    to warrant multi-threading. Previously, they were hard-coded assuming
    common 16 or 24 core settings, which are nowadays with 128+ cores no
    longer true. For an experiment of 1000x sum(X) operations on a machine
    with 2x Intel Xeon Gold 6338 (128 vcores), this patch improved
    performance as follows:
    
    1000x sum(1000 x 10): 0.131s (unchanged)
    1000x sum(2000 x 10): 0.558s -> 0.214s (unchanged)
    1000x sum(4000 x 10): 0.629s -> 0.375s (unchanged)
    1000x sum(8000 x 10): 0.765s -> 0.693s (unchanged)
    1000x sum(16000 x 10): 1.224s (unchanged)
---
 .../java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
index 5d5cbc14e8..f8ac37c443 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
@@ -99,7 +99,7 @@ public class LibMatrixAgg {
        //internal configuration parameters
        private static final boolean NAN_AWARENESS = false;
        private static final long PAR_NUMCELL_THRESHOLD1 = 1024*1024; //Min 1M 
elements
-       private static final long PAR_NUMCELL_THRESHOLD2 = 16*1024;   //Min 16K 
elements
+       private static final long PAR_NUMCELL_THRESHOLD2 = 1024;   //Min 16K 
elements
        private static final long PAR_INTERMEDIATE_SIZE_THRESHOLD = 
2*1024*1024; //Max 2MB
        
        ////////////////////////////////
@@ -663,13 +663,13 @@ public class LibMatrixAgg {
                boolean sharedTP = 
(InfrastructureAnalyzer.getLocalParallelism() == k);
                return k > 1 && out.isThreadSafe() && in.rlen > (sharedTP ? k/8 
: k/2)
                        && (uaop.indexFn instanceof ReduceCol || out.clen*8*k < 
PAR_INTERMEDIATE_SIZE_THRESHOLD) //size
-                       && in.nonZeros > (sharedTP ? PAR_NUMCELL_THRESHOLD2 : 
PAR_NUMCELL_THRESHOLD1);
+                       && in.nonZeros > (sharedTP ? k*PAR_NUMCELL_THRESHOLD2 : 
PAR_NUMCELL_THRESHOLD1);
        }
        
        public static boolean satisfiesMultiThreadingConstraints(MatrixBlock 
in, int k) {
                boolean sharedTP = 
(InfrastructureAnalyzer.getLocalParallelism() == k);
                return k > 1 && in.rlen > (sharedTP ? k/8 : k/2)
-                       && in.nonZeros > (sharedTP ? PAR_NUMCELL_THRESHOLD2 : 
PAR_NUMCELL_THRESHOLD1);
+                       && in.nonZeros > (sharedTP ? k*PAR_NUMCELL_THRESHOLD2 : 
PAR_NUMCELL_THRESHOLD1);
        }
        
        /**

Reply via email to