Limit JVM system props passed around in Spark jobs Prior to this commit, SGC indiscriminately set the entire job config as JVM system properties on Spark executors. It didn't account for the fact that some config values (e.g. spark.job.description) could have spaces. A value with a space wouldn't get quoted. This led to Spark workers failing to start, because part of the unquoted value would be erroneously interpreted as the JVM main class.
This commit makes SGC only pass two config settings as system props: * gremlin.io.registry * gremlin.io.kryoShimService Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/f7b71b37 Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/f7b71b37 Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/f7b71b37 Branch: refs/heads/master Commit: f7b71b376fef46327b25bc4800860cfb404c8613 Parents: 18eae6d Author: Dan LaRocque <[email protected]> Authored: Thu Nov 17 11:33:58 2016 -0500 Committer: Marko A. Rodriguez <[email protected]> Committed: Tue Nov 29 04:58:19 2016 -0700 ---------------------------------------------------------------------- .../spark/process/computer/SparkGraphComputer.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/f7b71b37/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java ---------------------------------------------------------------------- diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 4e74088..ee3ebe1 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -71,11 +71,16 @@ import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD; import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage; import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService; import org.apache.tinkerpop.gremlin.structure.Direction; +import org.apache.tinkerpop.gremlin.structure.io.IoRegistry; import org.apache.tinkerpop.gremlin.structure.io.Storage; import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; import java.io.File; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -91,6 +96,16 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { private boolean workersSet = false; private final ThreadFactory threadFactoryBoss = new BasicThreadFactory.Builder().namingPattern(SparkGraphComputer.class.getSimpleName() + "-boss").build(); + private static final Set<String> KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES; + + static + { + Set<String> s = new HashSet<>(); + s.add(KryoShimServiceLoader.KRYO_SHIM_SERVICE); + s.add(IoRegistry.IO_REGISTRY); + KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES = Collections.unmodifiableSet(s); + } + /** * An {@code ExecutorService} that schedules up background work. Since a {@link GraphComputer} is only used once * for a {@link VertexProgram} a single threaded executor is sufficient. @@ -146,7 +161,7 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { /////////// final StringBuilder params = new StringBuilder(); this.sparkConfiguration.getKeys().forEachRemaining(key -> { - if (key.startsWith("gremlin") || key.startsWith("spark")) { + if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) { params.append(" -D").append("tinkerpop.").append(key).append("=").append(this.sparkConfiguration.getProperty(key)); System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString()); }
