This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 9eb99fa806 [MINOR] Fine-tuning multi-threading thresholds for unary
aggregates
9eb99fa806 is described below
commit 9eb99fa8063a9d70e3ac5fdd90268ae9e855ac92
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Mar 15 18:58:18 2024 +0100
[MINOR] Fine-tuning multi-threading thresholds for unary aggregates
This patch makes the multi-threading thresholds of unary aggregates
scalable in the number of threads, ensuring tasks are large enough
to warrant multi-threading. Previously, they were hard-coded assuming
common 16 or 24 core settings, which are nowadays with 128+ cores no
longer true. For an experiment of 1000x sum(X) operations on a machine
with 2x Intel Xeon Gold 6338 (128 vcores), this patch improved
performance as follows:
1000x sum(1000 x 10): 0.131s (unchanged)
1000x sum(2000 x 10): 0.558s -> 0.214s (unchanged)
1000x sum(4000 x 10): 0.629s -> 0.375s (unchanged)
1000x sum(8000 x 10): 0.765s -> 0.693s (unchanged)
1000x sum(16000 x 10): 1.224s (unchanged)
---
.../java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
index 5d5cbc14e8..f8ac37c443 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
@@ -99,7 +99,7 @@ public class LibMatrixAgg {
//internal configuration parameters
private static final boolean NAN_AWARENESS = false;
private static final long PAR_NUMCELL_THRESHOLD1 = 1024*1024; //Min 1M
elements
- private static final long PAR_NUMCELL_THRESHOLD2 = 16*1024; //Min 16K
elements
+ private static final long PAR_NUMCELL_THRESHOLD2 = 1024; //Min 16K
elements
private static final long PAR_INTERMEDIATE_SIZE_THRESHOLD =
2*1024*1024; //Max 2MB
////////////////////////////////
@@ -663,13 +663,13 @@ public class LibMatrixAgg {
boolean sharedTP =
(InfrastructureAnalyzer.getLocalParallelism() == k);
return k > 1 && out.isThreadSafe() && in.rlen > (sharedTP ? k/8
: k/2)
&& (uaop.indexFn instanceof ReduceCol || out.clen*8*k <
PAR_INTERMEDIATE_SIZE_THRESHOLD) //size
- && in.nonZeros > (sharedTP ? PAR_NUMCELL_THRESHOLD2 :
PAR_NUMCELL_THRESHOLD1);
+ && in.nonZeros > (sharedTP ? k*PAR_NUMCELL_THRESHOLD2 :
PAR_NUMCELL_THRESHOLD1);
}
public static boolean satisfiesMultiThreadingConstraints(MatrixBlock
in, int k) {
boolean sharedTP =
(InfrastructureAnalyzer.getLocalParallelism() == k);
return k > 1 && in.rlen > (sharedTP ? k/8 : k/2)
- && in.nonZeros > (sharedTP ? PAR_NUMCELL_THRESHOLD2 :
PAR_NUMCELL_THRESHOLD1);
+ && in.nonZeros > (sharedTP ? k*PAR_NUMCELL_THRESHOLD2 :
PAR_NUMCELL_THRESHOLD1);
}
/**