This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 5ac091a20b [MINOR] Increase Memory Estimate for Frames
5ac091a20b is described below
commit 5ac091a20bc9e8af7886ab2c45219e6c01e1d2ea
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Sat Dec 28 14:05:36 2024 +0100
[MINOR] Increase Memory Estimate for Frames
This commit increases the default estimate of frame size.
Previously, frames were estimated similarly to Matrices.
The wrong estimate leads to problems on frames of > Integer rows.
To improve it, this commit defaults to 8 character strings on all cells.
In an unread matrix.
Since there is no way of knowing if the input Frame contains longer
strings, it is still a subpar estimate. However,
it is an improvement overestimating everything as a dense double Matrix.
(The change happened because I encountered very incorrect estimates in
BEWARE)
Closes #2158
Signed-off-by: Sebastian Baunsgaard <[email protected]>
---
src/main/java/org/apache/sysds/hops/DataOp.java | 9 ++++-
.../java/org/apache/sysds/hops/OptimizerUtils.java | 13 +++++++
.../test/component/misc/MemoryEstimateTest.java | 1 -
.../test/component/misc/OptimizerUtilsTest.java | 43 ++++++++++++++++++++++
4 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/apache/sysds/hops/DataOp.java
b/src/main/java/org/apache/sysds/hops/DataOp.java
index 82e5ecbbad..7be61f4129 100644
--- a/src/main/java/org/apache/sysds/hops/DataOp.java
+++ b/src/main/java/org/apache/sysds/hops/DataOp.java
@@ -359,8 +359,8 @@ public class DataOp extends Hop {
protected double computeOutputMemEstimate( long dim1, long dim2, long
nnz )
{
double ret = 0;
-
- if ( getDataType() == DataType.SCALAR )
+ final DataType dt = getDataType();
+ if ( dt == DataType.SCALAR )
{
switch( getValueType() )
{
@@ -379,6 +379,11 @@ public class DataOp extends Hop {
ret = 0;
}
}
+ else if(dt == DataType.FRAME) {
+ if(_op == OpOpData.PERSISTENTREAD || _op ==
OpOpData.TRANSIENTREAD) {
+ ret =
OptimizerUtils.estimateSizeExactFrame(dim1, dim2);
+ }
+ }
else //MATRIX / FRAME
{
if( _op == OpOpData.PERSISTENTREAD
diff --git a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
index 6338ff7a70..a3161c5723 100644
--- a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java
@@ -64,6 +64,7 @@ import org.apache.sysds.runtime.meta.MatrixCharacteristics;
import org.apache.sysds.runtime.util.IndexRange;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.sysds.utils.stats.InfrastructureAnalyzer;
+import org.apache.sysds.utils.MemoryEstimates;
public class OptimizerUtils
{
@@ -788,6 +789,18 @@ public class OptimizerUtils
double sp = getSparsity(nrows, ncols, nnz);
return estimateSizeExactSparsity(nrows, ncols, sp);
}
+
+
+ public static long estimateSizeExactFrame(long nRows, long nCols){
+ // Currently we do not support frames larger than INT.
+ // Therefore, we estimate their size to be extremely large.
+ // The large size force spark operations.
+ if(nRows > Integer.MAX_VALUE)
+ return Long.MAX_VALUE;
+
+ // assuming String arrays and on average 8 characters per value.
+ return (long)MemoryEstimates.stringArrayCost((int)nRows, 8) *
nCols;
+ }
/**
* Estimates the footprint (in bytes) for an in-memory representation
of a
diff --git
a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
index 8c8e31535b..d68c30f836 100644
--- a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
+++ b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
@@ -87,7 +87,6 @@ public class MemoryEstimateTest {
assertEquals(MemoryEstimates.doubleArrayCost(length), measure(arrayDouble),
0.2);
break;
default:
-
System.out.println(arrayToMeasure.getClass().getSimpleName());
throw new
NotImplementedException(arrayToMeasure + " not implemented");
}
}
diff --git
a/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java
b/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java
new file mode 100644
index 0000000000..16e9b2c27b
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.misc;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.sysds.hops.OptimizerUtils;
+import org.junit.Test;
+
+public class OptimizerUtilsTest {
+
+ @Test
+ public void estimateFrameSize() {
+ Long size = OptimizerUtils.estimateSizeExactFrame(10, 10);
+ assertTrue(size > 10 * 10);
+ }
+
+ @Test
+ public void estimateFrameSizeMoreRowsThanInt() {
+ // Currently we do not support frames larger than INT.
Therefore we estimate their size to be extremely large.
+ // The large size force spark operations
+ Long size =
OptimizerUtils.estimateSizeExactFrame(Integer.MAX_VALUE + 1L, 10);
+
+ assertTrue(size == Long.MAX_VALUE);
+ }
+}