Updated Branches: refs/heads/trunk 4f88fc8fc -> d36dd5081
GIRAPH-592: yourkit profiling Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/d36dd508 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/d36dd508 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/d36dd508 Branch: refs/heads/trunk Commit: d36dd50810929b5dad2d3bab2007e28802087ad6 Parents: 4f88fc8 Author: Nitay Joffe <[email protected]> Authored: Mon May 6 14:24:46 2013 -0400 Committer: Nitay Joffe <[email protected]> Committed: Mon May 6 14:24:46 2013 -0400 ---------------------------------------------------------------------- CHANGELOG | 2 + giraph-core/pom.xml | 4 + .../apache/giraph/conf/GiraphConfiguration.java | 60 +++++++ .../org/apache/giraph/conf/GiraphConstants.java | 5 + .../org/apache/giraph/utils/YourKitContext.java | 123 +++++++++++++++ .../org/apache/giraph/utils/YourKitProfiler.java | 72 +++++++++ .../org/apache/giraph/worker/BspServiceWorker.java | 31 ++-- pom.xml | 6 + 8 files changed, 287 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 018c2a4..6439581 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 1.0.1 - unreleased + GIRAPH-592: YourKit profiler (nitay) + GIRAPH-618: Website Documentation: Aggregators (and sharded aggregators) (majakabiljo) http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/pom.xml ---------------------------------------------------------------------- diff --git a/giraph-core/pom.xml b/giraph-core/pom.xml index 56ff468..bfd894c 100644 --- a/giraph-core/pom.xml +++ b/giraph-core/pom.xml @@ -409,6 +409,10 @@ under the License. <dependencies> <!-- compile dependencies. sorted lexicographically. --> <dependency> + <groupId>com.facebook.thirdparty.yourkit-api</groupId> + <artifactId>yjp-controller-api-redist</artifactId> + </dependency> + <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> </dependency> http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java index 45a29ff..48f3d4b 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java @@ -38,6 +38,7 @@ import org.apache.giraph.partition.ReusesObjectsPartition; import org.apache.giraph.worker.WorkerContext; import org.apache.giraph.worker.WorkerObserver; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.net.DNS; import java.net.UnknownHostException; @@ -1009,4 +1010,63 @@ public class GiraphConfiguration extends Configuration super.setClass(name, theClass, xface); giraphSetParameters.setClass(name, theClass, xface); } + + /** + * Get the output directory to write YourKit snapshots to + * @param context Map context + * @return output directory + */ + public String getYourKitOutputDir(Mapper.Context context) { + final String cacheKey = "giraph.yourkit.outputDirCached"; + String outputDir = get(cacheKey); + if (outputDir == null) { + outputDir = getStringVars(YOURKIT_OUTPUT_DIR, YOURKIT_OUTPUT_DIR_DEFAULT, + context); + set(cacheKey, outputDir); + } + return outputDir; + } + + /** + * Get string, replacing variables in the output. + * + * %JOB_ID% => job id + * %TASK_ID% => task id + * %USER% => owning user name + * + * @param key name of key to lookup + * @param context mapper context + * @return value for key, with variables expanded + */ + public String getStringVars(String key, Mapper.Context context) { + return getStringVars(key, null, context); + } + + /** + * Get string, replacing variables in the output. + * + * %JOB_ID% => job id + * %TASK_ID% => task id + * %USER% => owning user name + * + * @param key name of key to lookup + * @param defaultValue value to return if no mapping exists. This can also + * have variables, which will be substituted. + * @param context mapper context + * @return value for key, with variables expanded + */ + public String getStringVars(String key, String defaultValue, + Mapper.Context context) { + String value = get(key); + if (value == null) { + if (defaultValue == null) { + return null; + } + value = defaultValue; + } + value = value.replace("%JOB_ID%", context.getJobID().toString()); + value = value.replace("%TASK_ID%", context.getTaskAttemptID().toString()); + value = value.replace("%USER%", get("user.name", "unknown_user")); + return value; + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java index 6a5949e..0067c25 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java @@ -586,6 +586,11 @@ public interface GiraphConstants { BooleanConfOption USE_OUT_OF_CORE_GRAPH = new BooleanConfOption("giraph.useOutOfCoreGraph", false); + /** Directory to write YourKit snapshots to */ + String YOURKIT_OUTPUT_DIR = "giraph.yourkit.outputDir"; + /** Default directory to write YourKit snapshots to */ + String YOURKIT_OUTPUT_DIR_DEFAULT = "/tmp/giraph/%JOB_ID%/%TASK_ID%"; + /** Maximum number of partitions to hold in memory for each worker. */ IntConfOption MAX_PARTITIONS_IN_MEMORY = new IntConfOption("giraph.maxPartitionsInMemory", 10); http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java new file mode 100644 index 0000000..5a05113 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitContext.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.utils; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.log4j.Logger; + +import com.google.common.base.Joiner; +import com.google.common.io.Files; +import com.yourkit.api.Controller; +import com.yourkit.api.ProfilingModes; + +import java.io.File; +import java.io.IOException; + +/** + * Convenience context for profiling. Hides away all of the exception handling. + * Do not instantiate directly, use only through {@link YourKitProfiler}. + */ +public class YourKitContext { + /** Logger */ + private static final Logger LOG = Logger.getLogger(YourKitContext.class); + + /** Joiner on path separator */ + private static Joiner SLASH_JOINER = Joiner.on("/"); + + /** The YourKit profiling object, or null if no profiling going on. */ + private final Controller yourKitController; + + /** + * Constructor + * @param yourKitController profiling object + */ + YourKitContext(Controller yourKitController) { + this.yourKitController = yourKitController; + } + + /** + * Capture a snapshot + * @param flags See {@link com.yourkit.api.ProfilingModes} + * @param context map context + * @param name unique name for this snapshot + */ + private void snapshot(long flags, Mapper.Context context, String name) { + if (yourKitController != null) { + String path; + try { + path = yourKitController.captureSnapshot(flags); + // CHECKSTYLE: stop IllegalCatch + } catch (Exception e) { + // CHECKSTYLE: resume IllegalCatch + return; + } + File destFile = new File(SLASH_JOINER.join( + "/tmp", context.getJobID(), context.getTaskAttemptID(), + name + ".snapshot")); + try { + Files.createParentDirs(destFile); + Files.move(new File(path), destFile); + } catch (IOException e) { + LOG.error("Failed to move YourKit snapshot file from " + path + + " to " + destFile.getPath(), e); + } + } + } + + /** + * This method is just a convenient replacement of + * {@link #captureSnapshot(long, java.io.File)} with + * {@link com.yourkit.api.ProfilingModes.SNAPSHOT_WITH_HEAP} for the flags. + * + * WARNING: This is likely to be VERY slow for large jobs. + * + * @param context map context + * @param name unique name for this snapshot + */ + public void snapshotWithMemory(Mapper.Context context, String name) { + snapshot(ProfilingModes.SNAPSHOT_WITH_HEAP, context, name); + } + + /** + * This method is just a convenient replacement of + * {@link #captureSnapshot(long, java.io.File)} with + * {@link com.yourkit.api.ProfilingModes.SNAPSHOT_WITHOUT_HEAP} for the flags. + * + * @param context map context + * @param name unique name for this snapshot + */ + public void snapshotCPUOnly(Mapper.Context context, String name) { + snapshot(ProfilingModes.SNAPSHOT_WITHOUT_HEAP, context, name); + } + + /** + * Stop profiling CPU + */ + public void stop() { + if (yourKitController != null) { + try { + yourKitController.stopCPUProfiling(); + // CHECKSTYLE: stop IllegalCatch + } catch (Exception e) { + // CHECKSTYLE: resume IllegalCatch + LOG.error("Failed to stop YourKit CPU profiling", e); + } + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java new file mode 100644 index 0000000..c9688bd --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/utils/YourKitProfiler.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.utils; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.log4j.Logger; + +import com.yourkit.api.Controller; +import com.yourkit.api.ProfilingModes; + +/** + * Helper for YourKit profiling from within the code. + * + * See the following for information about usage: + * - http://www.yourkit.com/docs/95/help/api.jsp + * - http://www.yourkit.com/docs/95/api/index.html + * + * This class is a simple helper around the API mentioned above that allows you + * to easily wrap code with {@link YourKitProfiler#startProfile(GiraphConfiguration)} + * followed by any amount of snapshotX calls and finally {@link YourKitContext#stop()}. + * See also {@link YourKitContext}. + * + * As of 05/2013 YourKit is not publishing their API jars to Maven, but their + * license allows us to do it, so we have setup a repository to do this. + * See https://github.com/facebook/sonatype-yourkit for more info. + */ +public class YourKitProfiler { + /** Logger */ + private static final Logger LOG = Logger.getLogger(YourKitProfiler.class); + + /** Don't construct, allow inheritance */ + protected YourKitProfiler() { } + + /** + * Convenient replacement of {@link #startProfilingCPU(long)} with + * {@link ProfilingModes.CPU_TRACING} for the mode. + * + * @param conf GiraphConfiguration + * @return profiler context + */ + public static YourKitContext startProfile(GiraphConfiguration conf) { + Controller controller = null; + try { + controller = new Controller(); + controller.enableStackTelemetry(); + controller.startCPUProfiling(ProfilingModes.CPU_SAMPLING, + Controller.DEFAULT_FILTERS); + LOG.debug("Started YourKit profiling CPU"); + // CHECKSTYLE: stop IllegalCatch + } catch (Exception e) { + // CHECKSTYLE: resume IllegalCatch + LOG.debug("Failed to start YourKit CPU profiling", e); + } + return new YourKitContext(controller); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java index 51edbac..03a4876 100644 --- a/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java +++ b/giraph-core/src/main/java/org/apache/giraph/worker/BspServiceWorker.java @@ -18,8 +18,8 @@ package org.apache.giraph.worker; - import org.apache.giraph.bsp.ApplicationState; +import org.apache.giraph.bsp.BspService; import org.apache.giraph.bsp.CentralizedServiceWorker; import org.apache.giraph.comm.ServerData; import org.apache.giraph.comm.WorkerClient; @@ -32,27 +32,18 @@ import org.apache.giraph.comm.netty.NettyWorkerClientRequestProcessor; import org.apache.giraph.comm.netty.NettyWorkerServer; import org.apache.giraph.conf.GiraphConstants; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.graph.AddressesAndPartitionsWritable; +import org.apache.giraph.graph.FinishedSuperstepStats; +import org.apache.giraph.graph.GlobalStats; import org.apache.giraph.graph.GraphState; -import org.apache.giraph.bsp.BspService; import org.apache.giraph.graph.GraphTaskManager; -import org.apache.giraph.graph.VertexEdgeCount; -import org.apache.giraph.graph.InputSplitPaths; import org.apache.giraph.graph.InputSplitEvents; -import org.apache.giraph.graph.FinishedSuperstepStats; -import org.apache.giraph.graph.AddressesAndPartitionsWritable; -import org.apache.giraph.graph.GlobalStats; -import org.apache.giraph.io.superstep_output.SuperstepOutput; -import org.apache.giraph.utils.CallableFactory; -import org.apache.giraph.utils.JMapHistoDumper; +import org.apache.giraph.graph.InputSplitPaths; import org.apache.giraph.graph.Vertex; +import org.apache.giraph.graph.VertexEdgeCount; import org.apache.giraph.io.VertexOutputFormat; import org.apache.giraph.io.VertexWriter; -import org.apache.giraph.partition.Partition; -import org.apache.giraph.partition.PartitionExchange; -import org.apache.giraph.partition.PartitionOwner; -import org.apache.giraph.partition.PartitionStats; -import org.apache.giraph.partition.PartitionStore; -import org.apache.giraph.partition.WorkerGraphPartitioner; +import org.apache.giraph.io.superstep_output.SuperstepOutput; import org.apache.giraph.master.MasterInfo; import org.apache.giraph.metrics.GiraphMetrics; import org.apache.giraph.metrics.GiraphTimer; @@ -60,6 +51,14 @@ import org.apache.giraph.metrics.GiraphTimerContext; import org.apache.giraph.metrics.ResetSuperstepMetricsObserver; import org.apache.giraph.metrics.SuperstepMetricsRegistry; import org.apache.giraph.metrics.WorkerSuperstepMetrics; +import org.apache.giraph.partition.Partition; +import org.apache.giraph.partition.PartitionExchange; +import org.apache.giraph.partition.PartitionOwner; +import org.apache.giraph.partition.PartitionStats; +import org.apache.giraph.partition.PartitionStore; +import org.apache.giraph.partition.WorkerGraphPartitioner; +import org.apache.giraph.utils.CallableFactory; +import org.apache.giraph.utils.JMapHistoDumper; import org.apache.giraph.utils.LoggerUtils; import org.apache.giraph.utils.MemoryUtils; import org.apache.giraph.utils.ProgressableUtils; http://git-wip-us.apache.org/repos/asf/giraph/blob/d36dd508/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 7d7fec9..0afd675 100644 --- a/pom.xml +++ b/pom.xml @@ -262,6 +262,7 @@ under the License. <hive.version>0.10.0</hive.version> <facebook-hadoop.version>0.20.0</facebook-hadoop.version> + <yourkit-api.version>9.5.6</yourkit-api.version> <forHadoop>for-hadoop-${hadoop.version}</forHadoop> </properties> @@ -1018,6 +1019,11 @@ under the License. <version>1.2</version> </dependency> <dependency> + <groupId>com.facebook.thirdparty.yourkit-api</groupId> + <artifactId>yjp-controller-api-redist</artifactId> + <version>${yourkit-api.version}</version> + </dependency> + <dependency> <groupId>io.netty</groupId> <artifactId>netty</artifactId> <version>3.5.3.Final</version>
