This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 2e59d667571c36dede2f3f445478d540423b72f2 Author: Matthias Boehm <[email protected]> AuthorDate: Wed Feb 23 22:38:53 2022 +0100 [SYSTEMDS-3295] Avoid unnecessary buffer pool pollution on replace Replace (pattern, replacement) operations first check if the input contains at least one value to replace, and only if necessary allocate the output and copy the modified values. However, we still created a new matrix object which unnecessarily polluted the buffer pool, leading to unnecessary evictions. We now simply return the input matrix object in such cases. While implementing the test for both matrices and frames, it turned out NaNs where always handled as strings in frame replace leading to mismatching types, and unnecessary slow downs for string conversions. Furthermore, the frame replace should also implement a shallow-copy. --- .../cp/ParameterizedBuiltinCPInstruction.java | 10 +++- .../sysds/runtime/matrix/data/FrameBlock.java | 8 ++- .../functions/caching/BufferpoolShallowCopies.java | 66 ++++++++++++++++++++++ .../functions/caching/BufferpoolShallow.dml | 34 +++++++++++ 4 files changed, 112 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java index 233154a..be6c40a 100644 --- a/src/main/java/org/apache/sysds/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java +++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java @@ -243,12 +243,16 @@ public class ParameterizedBuiltinCPInstruction extends ComputationCPInstruction ec.setFrameOutput(output.getName(), ret); ec.releaseFrameInput(params.get("target")); } else{ - MatrixBlock target = ec.getMatrixInput(params.get("target")); + MatrixObject targetObj = ec.getMatrixObject(params.get("target")); + MatrixBlock target = targetObj.acquireRead(); double pattern = Double.parseDouble(params.get("pattern")); double replacement = Double.parseDouble(params.get("replacement")); MatrixBlock ret = target.replaceOperations(new MatrixBlock(), pattern, replacement); - ec.setMatrixOutput(output.getName(), ret); - ec.releaseMatrixInput(params.get("target")); + if( ret == target ) //shallow copy (avoid bufferpool pollution) + ec.setVariable(output.getName(), targetObj); + else + ec.setMatrixOutput(output.getName(), ret); + targetObj.release(); } } diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java index bcdf431..2ab834d 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java @@ -2524,13 +2524,15 @@ public class FrameBlock implements CacheBlock, Externalizable { public <T> FrameBlock replaceOperations(String pattern, String replacement) { FrameBlock ret = new FrameBlock(this); - ValueType patternType = UtilFunctions.isBoolean(pattern) ? ValueType.BOOLEAN : (NumberUtils.isCreatable(pattern) ? + boolean NaNp = "NaN".equals(pattern); + boolean NaNr = "NaN".equals(replacement); + ValueType patternType = UtilFunctions.isBoolean(pattern) ? ValueType.BOOLEAN : (NumberUtils.isCreatable(pattern) | NaNp ? (UtilFunctions.isIntegerNumber(pattern) ? ValueType.INT64 : ValueType.FP64) : ValueType.STRING); - ValueType replacementType = UtilFunctions.isBoolean(replacement) ? ValueType.BOOLEAN : (NumberUtils.isCreatable(replacement) ? + ValueType replacementType = UtilFunctions.isBoolean(replacement) ? ValueType.BOOLEAN : (NumberUtils.isCreatable(replacement) | NaNr ? (UtilFunctions.isIntegerNumber(replacement) ? ValueType.INT64 : ValueType.FP64) : ValueType.STRING); if(patternType != replacementType || !ValueType.isSameTypeString(patternType, replacementType)) - throw new DMLRuntimeException("Pattern and replacement types should be same."); + throw new DMLRuntimeException("Pattern and replacement types should be same: "+patternType+" "+replacementType); for(int i = 0; i < ret.getNumColumns(); i++){ Array colData = ret._coldata[i]; diff --git a/src/test/java/org/apache/sysds/test/functions/caching/BufferpoolShallowCopies.java b/src/test/java/org/apache/sysds/test/functions/caching/BufferpoolShallowCopies.java new file mode 100644 index 0000000..97e9273 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/functions/caching/BufferpoolShallowCopies.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.functions.caching; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysds.runtime.controlprogram.caching.CacheStatistics; +import org.apache.sysds.test.AutomatedTestBase; +import org.apache.sysds.test.TestConfiguration; + +public class BufferpoolShallowCopies extends AutomatedTestBase +{ + private final static String TEST_NAME = "BufferpoolShallow"; + private final static String TEST_DIR = "functions/caching/"; + private final static String TEST_CLASS_DIR = TEST_DIR + BufferpoolShallowCopies.class.getSimpleName() + "/"; + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME, + new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "V" }) ); + } + + @Test + public void testShallowM() { + runTestBufferpoolShallow(2000, 1500, "M"); + } + + //TODO implement shallow copy for frame replace +// @Test +// public void testShallowF() { +// runTestBufferpoolShallow(2000, 1500, "F"); +// } + + private void runTestBufferpoolShallow(int rows, int cols, String type) { + TestConfiguration config = getTestConfiguration(TEST_NAME); + config.addVariable("rows", rows); + config.addVariable("cols", cols); + loadTestConfiguration(config); + + String HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + programArgs = new String[]{"-stats", "-args", + Integer.toString(rows), Integer.toString(cols), type}; + + //run test and check no buffer pool writes (lineage and same object) + runTest(true, false, null, -1); + Assert.assertEquals(0, CacheStatistics.getFSBuffWrites()); + } +} diff --git a/src/test/scripts/functions/caching/BufferpoolShallow.dml b/src/test/scripts/functions/caching/BufferpoolShallow.dml new file mode 100644 index 0000000..ebeaaaa --- /dev/null +++ b/src/test/scripts/functions/caching/BufferpoolShallow.dml @@ -0,0 +1,34 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = rand(rows=$1, cols=$2, min=1, max=10, seed=7); + +if( $3 == "F" ) { + Y = as.frame(X); + for(i in 1:3) + Y = replace(target=Y, pattern=NaN, replacement=0.0); + print(as.scalar(Y[1,1])) +} +else { # duplicated for loop because no conditional dt change + for(i in 1:3) + X = replace(target=X, pattern=NaN, replacement=0); + print(sum(X)) +}
