This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new cdccbcd [SYSTEMDS-2927] Fix codegen memory estimates, cleanup, and
templates
cdccbcd is described below
commit cdccbcdd883d0f7400d281f1ce908b5e7be1284c
Author: Matthias Boehm <[email protected]>
AuthorDate: Mon May 24 22:36:08 2021 +0200
[SYSTEMDS-2927] Fix codegen memory estimates, cleanup, and templates
This patch fixes several smaller issues in the codegen compiler,
including corrupted memory estimates in forced singlenode execution
(thanks Mark for catching this issue), remove unused finalization code
(no object of the SpoofCompiler ever created), and move the java
cell/row templates back into the operators to avoid unnecessary jar
unpacking (should have read from resource stream), simplify debugging,
and avoid too verbose generated operators (no license necessary).
---
conf/SystemDS-config.xml.template | 2 +-
src/main/java/org/apache/sysds/api/DMLScript.java | 4 +-
.../apache/sysds/hops/codegen/SpoofCompiler.java | 44 ++++++++++------------
.../apache/sysds/hops/codegen/SpoofFusedOp.java | 25 +++++++-----
.../org/apache/sysds/hops/codegen/cplan/CNode.java | 4 +-
.../apache/sysds/hops/codegen/cplan/CNodeCell.java | 19 ++++++++++
.../apache/sysds/hops/codegen/cplan/CNodeRow.java | 20 ++++++++++
.../hops/codegen/cplan/java/Cellwise.java.template | 37 ------------------
.../hops/codegen/cplan/java/Rowwise.java.template | 43 ---------------------
9 files changed, 80 insertions(+), 118 deletions(-)
diff --git a/conf/SystemDS-config.xml.template
b/conf/SystemDS-config.xml.template
index b516161..141f044 100644
--- a/conf/SystemDS-config.xml.template
+++ b/conf/SystemDS-config.xml.template
@@ -43,7 +43,7 @@
<sysds.codegen.enabled>false</sysds.codegen.enabled>
<!-- set the codegen API (auto, java, cuda) -->
- <sysds.codegen.api>auto</sysds.codegen.api>
+ <sysds.codegen.api>auto</sysds.codegen.api>
<!-- set the codegen java compiler (auto, janino, javac, nvcc, nvrtc) -->
<sysds.codegen.compiler>auto</sysds.codegen.compiler>
diff --git a/src/main/java/org/apache/sysds/api/DMLScript.java
b/src/main/java/org/apache/sysds/api/DMLScript.java
index 390833a..c60fa3e 100644
--- a/src/main/java/org/apache/sysds/api/DMLScript.java
+++ b/src/main/java/org/apache/sysds/api/DMLScript.java
@@ -47,6 +47,7 @@ import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.conf.DMLConfig;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.hops.codegen.SpoofCompiler;
+import org.apache.sysds.hops.codegen.SpoofCompiler.GeneratorAPI;
import org.apache.sysds.lops.Lop;
import org.apache.sysds.parser.DMLProgram;
import org.apache.sysds.parser.DMLTranslator;
@@ -602,7 +603,8 @@ public class DMLScript
private static void configureCodeGen() {
// load native codegen if configured
if(ConfigurationManager.isCodegenEnabled()) {
- SpoofCompiler.GeneratorAPI configured_generator =
SpoofCompiler.GeneratorAPI.valueOf(ConfigurationManager.getDMLConfig().getTextValue(DMLConfig.CODEGEN_API).toUpperCase());
+ GeneratorAPI configured_generator =
GeneratorAPI.valueOf(
+
ConfigurationManager.getDMLConfig().getTextValue(DMLConfig.CODEGEN_API).toUpperCase());
try {
SpoofCompiler.loadNativeCodeGenerator(configured_generator);
}
diff --git a/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
b/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
index d3d638a..f31a640 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
@@ -120,9 +120,18 @@ public class SpoofCompiler {
public static PlanCachePolicy PLAN_CACHE_POLICY =
PlanCachePolicy.CSLH;
public static final int PLAN_CACHE_SIZE = 1024; //max 1K
classes
public static final RegisterAlloc REG_ALLOC_POLICY =
RegisterAlloc.EXACT_STATIC_BUFF;
- public static GeneratorAPI API = GeneratorAPI.JAVA;
- public static HashMap<GeneratorAPI, Long> native_contexts;
+ public static GeneratorAPI API = GeneratorAPI.JAVA;
+ public static HashMap<GeneratorAPI, Long> native_contexts = new
HashMap<>();
+ //plan cache for cplan->compiled source to avoid unnecessary
codegen/source code compile
+ //for equal operators from (1) different hop dags and (2) repeated
recompilation
+ //note: if PLAN_CACHE_SIZE is exceeded, we evict the
least-recently-used plan (LRU policy)
+ private static final PlanCache planCache = new
PlanCache(PLAN_CACHE_SIZE);
+
+ private static ProgramRewriter rewriteCSE = new ProgramRewriter(
+ new RewriteCommonSubexpressionElimination(true),
+ new RewriteRemoveUnnecessaryCasts());
+
public enum CompilerType {
AUTO,
JAVAC,
@@ -178,26 +187,20 @@ public class SpoofCompiler {
EXACT_STATIC_BUFF, //min number of live vector intermediates,
assuming static array ring buffer
}
- @Override
- protected void finalize() {
- SpoofCompiler.cleanupCodeGenerator();
- }
-
public static void loadNativeCodeGenerator(GeneratorAPI generator) {
if(DMLScript.getGlobalExecMode() == ExecMode.SPARK) {
LOG.warn("Not loading native codegen library in SPARK
execution mode!\n");
+ generator = GeneratorAPI.JAVA;
return;
}
// loading cuda codegen (the only supported API atm)
- if(generator == GeneratorAPI.AUTO && DMLScript.USE_ACCELERATOR)
- generator = GeneratorAPI.CUDA;
-
- if(generator == GeneratorAPI.CUDA && !DMLScript.USE_ACCELERATOR)
- generator = GeneratorAPI.JAVA;
-
- if(native_contexts == null)
- native_contexts = new HashMap<>();
+ if( generator == GeneratorAPI.AUTO | generator ==
GeneratorAPI.CUDA ) {
+ generator = DMLScript.USE_ACCELERATOR ?
+ GeneratorAPI.CUDA : GeneratorAPI.JAVA;
+ if( generator == GeneratorAPI.JAVA )
+ return;
+ }
if(!native_contexts.containsKey(generator)) {
String local_tmp =
ConfigurationManager.getDMLConfig().getTextValue(DMLConfig.LOCAL_TMP_DIR);
@@ -277,6 +280,7 @@ public class SpoofCompiler {
}
}
+ //FIXME completely remove or load via resource stream (see builtin
functions)
private static void extractCodegenSources(String resource_path, String
jar_path) throws IOException {
try(JarFile jar_file = new JarFile(jar_path)) {
Enumeration<JarEntry> files_in_jar = jar_file.entries();
@@ -301,15 +305,6 @@ public class SpoofCompiler {
private static native long initialize_cuda_context(int device_id,
String resource_path);
private static native void destroy_cuda_context(long ctx, int
device_id);
-
- //plan cache for cplan->compiled source to avoid unnecessary
codegen/source code compile
- //for equal operators from (1) different hop dags and (2) repeated
recompilation
- //note: if PLAN_CACHE_SIZE is exceeded, we evict the
least-recently-used plan (LRU policy)
- private static final PlanCache planCache = new
PlanCache(PLAN_CACHE_SIZE);
-
- private static ProgramRewriter rewriteCSE = new ProgramRewriter(
- new RewriteCommonSubexpressionElimination(true),
- new RewriteRemoveUnnecessaryCasts());
public static void generateCode(DMLProgram dmlprog) {
// for each namespace, handle function statement blocks
@@ -630,7 +625,6 @@ public class SpoofCompiler {
if(API != GeneratorAPI.JAVA)
unloadNativeCodeGenerator();
-
}
/**
diff --git a/src/main/java/org/apache/sysds/hops/codegen/SpoofFusedOp.java
b/src/main/java/org/apache/sysds/hops/codegen/SpoofFusedOp.java
index f46fa86..be3ff1c 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/SpoofFusedOp.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/SpoofFusedOp.java
@@ -21,6 +21,7 @@ package org.apache.sysds.hops.codegen;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.ValueType;
+import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.hops.codegen.SpoofCompiler.GeneratorAPI;
import org.apache.sysds.hops.Hop;
import org.apache.sysds.hops.MemoTable;
@@ -64,8 +65,8 @@ public class SpoofFusedOp extends MultiThreadedHop
}
- public SpoofFusedOp( String name, DataType dt, ValueType vt, Class<?>
cla, GeneratorAPI api, String genVarName,
- boolean dist,
SpoofOutputDimsType type ) {
+ public SpoofFusedOp( String name, DataType dt, ValueType vt, Class<?>
cla, GeneratorAPI api,
+ String genVarName, boolean dist, SpoofOutputDimsType type ) {
super(name, dt, vt);
_class = cla;
_distSupported = dist;
@@ -101,13 +102,19 @@ public class SpoofFusedOp extends MultiThreadedHop
@Override
protected double computeOutputMemEstimate(long dim1, long dim2, long
nnz) {
- if(_api == GeneratorAPI.JAVA) {
- return
_class.getGenericSuperclass().equals(SpoofRowwise.class) ?
- OptimizerUtils.estimateSize(dim1, dim2)
:
-
OptimizerUtils.estimatePartitionedSizeExactSparsity(dim1, dim2, getBlocksize(),
nnz);
- }
- else
- return
OptimizerUtils.estimatePartitionedSizeExactSparsity(dim1, dim2, getBlocksize(),
nnz);
+ // The output estimate influences the ExecType decision as
usual,
+ // but for codegen operators also various fusion decisions in
both
+ // local and distributed environments. For that reason, we use
the
+ // partitioned size as a more conservative estimate - for dense
this
+ // is almost the same, but for sparse it includes the block
indexes
+ // and overhead of row arrays per block. In forced singlenode
exec
+ // mode, the blocksize is however -1 and need appropriate
treatment.
+ boolean onlyDenseOut = (_api == GeneratorAPI.JAVA
+ &&
_class.getGenericSuperclass().equals(SpoofRowwise.class));
+ int blen = (getBlocksize() > 0) ? getBlocksize() :
ConfigurationManager.getBlocksize();
+ return onlyDenseOut ?
+ OptimizerUtils.estimateSize(dim1, dim2) :
+
OptimizerUtils.estimatePartitionedSizeExactSparsity(dim1, dim2, blen, nnz);
}
@Override
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
index fd5c2e6..36cc8f4 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
@@ -280,9 +280,9 @@ public abstract class CNode
else return null;
case JAVA:
if(caller instanceof CNodeCell)
- return
CodeTemplate.getTemplate("/java/org/apache/sysds/hops/codegen/cplan/java/Cellwise.java.template");
+ return CNodeCell.JAVA_TEMPLATE;
else if(caller instanceof CNodeRow)
- return
CodeTemplate.getTemplate("/java/org/apache/sysds/hops/codegen/cplan/java/Rowwise.java.template");
+ return CNodeRow.JAVA_TEMPLATE;
else return null;
default:
throw new RuntimeException("API not supported
by code generator: " + api.toString());
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeCell.java
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeCell.java
index 4e79fea..c8cd6e2 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeCell.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeCell.java
@@ -31,6 +31,25 @@ import org.apache.sysds.runtime.util.UtilFunctions;
public class CNodeCell extends CNodeTpl
{
+ protected static final String JAVA_TEMPLATE =
+ "package codegen;\n"
+ + "import
org.apache.sysds.runtime.codegen.LibSpoofPrimitives;\n"
+ + "import org.apache.sysds.runtime.codegen.SpoofCellwise;\n"
+ + "import
org.apache.sysds.runtime.codegen.SpoofCellwise.AggOp;\n"
+ + "import
org.apache.sysds.runtime.codegen.SpoofCellwise.CellType;\n"
+ + "import
org.apache.sysds.runtime.codegen.SpoofOperator.SideInput;\n"
+ + "import org.apache.commons.math3.util.FastMath;\n"
+ + "\n"
+ + "public final class %TMP% extends SpoofCellwise {\n"
+ + " public %TMP%() {\n"
+ + " super(CellType.%TYPE%, %SPARSE_SAFE%, %SEQ%,
%AGG_OP_NAME%);\n"
+ + " }\n"
+ + " protected double genexec(double a, SideInput[] b, double[]
scalars, int m, int n, long grix, int rix, int cix) { \n"
+ + "%BODY_dense%"
+ + " return %OUT%;\n"
+ + " }\n"
+ + "}\n";
+
private CellType _type = null;
private AggOp _aggOp = null;
private boolean _sparseSafe = false;
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
index 603fea9..88844d6 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
@@ -31,6 +31,26 @@ import java.util.ArrayList;
public class CNodeRow extends CNodeTpl
{
+ protected static final String JAVA_TEMPLATE =
+ "package codegen;\n"
+ + "import
org.apache.sysds.runtime.codegen.LibSpoofPrimitives;\n"
+ + "import
org.apache.sysds.runtime.codegen.SpoofOperator.SideInput;\n"
+ + "import org.apache.sysds.runtime.codegen.SpoofRowwise;\n"
+ + "import
org.apache.sysds.runtime.codegen.SpoofRowwise.RowType;\n"
+ + "import org.apache.commons.math3.util.FastMath;\n"
+ + "\n"
+ + "public final class %TMP% extends SpoofRowwise { \n"
+ + " public %TMP%() {\n"
+ + " super(RowType.%TYPE%, %CONST_DIM2%, %TB1%,
%VECT_MEM%);\n"
+ + " }\n"
+ + " protected void genexec(double[] a, int ai, SideInput[] b,
double[] scalars, double[] c, int ci, int len, long grix, int rix) { \n"
+ + "%BODY_dense%"
+ + " }\n"
+ + " protected void genexec(double[] avals, int[] aix, int ai,
SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, long
grix, int rix) { \n"
+ + "%BODY_sparse%"
+ + " }\n"
+ + "}\n";
+
private static final String TEMPLATE_ROWAGG_OUT = " c[rix] =
%IN%;\n";
private static final String TEMPLATE_FULLAGG_OUT = " c[0] +=
%IN%;\n";
private static final String TEMPLATE_NOAGG_OUT = "
LibSpoofPrimitives.vectWrite(%IN%, c, ci, %LEN%);\n";
diff --git
a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Cellwise.java.template
b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Cellwise.java.template
deleted file mode 100644
index 3f7c6fe..0000000
---
a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Cellwise.java.template
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package codegen;
-import org.apache.sysds.runtime.codegen.LibSpoofPrimitives;
-import org.apache.sysds.runtime.codegen.SpoofCellwise;
-import org.apache.sysds.runtime.codegen.SpoofCellwise.AggOp;
-import org.apache.sysds.runtime.codegen.SpoofCellwise.CellType;
-import org.apache.sysds.runtime.codegen.SpoofOperator.SideInput;
-import org.apache.commons.math3.util.FastMath;
-
-public final class %TMP% extends SpoofCellwise {
- public %TMP%() {
- super(CellType.%TYPE%, %SPARSE_SAFE%, %SEQ%, %AGG_OP_NAME%);
- }
-
- protected double genexec(double a, SideInput[] b, double[] scalars, int m,
int n, long grix, int rix, int cix) {
-%BODY_dense%
- return %OUT%;
- }
-}
diff --git
a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Rowwise.java.template
b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Rowwise.java.template
deleted file mode 100644
index 89f5015..0000000
---
a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Rowwise.java.template
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package codegen;
-import org.apache.sysds.runtime.codegen.LibSpoofPrimitives;
-import org.apache.sysds.runtime.codegen.SpoofOperator.SideInput;
-import org.apache.sysds.runtime.codegen.SpoofRowwise;
-import org.apache.sysds.runtime.codegen.SpoofRowwise.RowType;
-import org.apache.commons.math3.util.FastMath;
-
-public final class %TMP% extends SpoofRowwise {
- public %TMP%() {
- super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, %VECT_MEM%);
- }
-
- protected void genexec(double[] a, int ai, SideInput[] b,
- double[] scalars, double[] c, int ci, int len, long grix, int rix)
- {
-%BODY_dense%
- }
-
- protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b,
- double[] scalars, double[] c, int ci, int alen, int len, long grix, int
rix)
- {
-%BODY_sparse%
- }
-}