Dataflow: Add option to upload heap dumps to GCS This flag needs to go in before backend runner code that reads it. It will have no effect until that code is deployed.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2fc1c055 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2fc1c055 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2fc1c055 Branch: refs/heads/tez-runner Commit: 2fc1c055396a7c16d2c7bcb3364e45877b55c5d3 Parents: e40e882 Author: bchambers <bchamb...@google.com> Authored: Wed Nov 8 12:50:44 2017 -0800 Committer: bchambers <bchamb...@google.com> Committed: Fri Nov 10 09:21:13 2017 -0800 ---------------------------------------------------------------------- .../options/DataflowPipelineDebugOptions.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/2fc1c055/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java ---------------------------------------------------------------------- diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java index ec108da..ffc51d1 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.beam.runners.dataflow.util.DataflowTransport; import org.apache.beam.runners.dataflow.util.GcsStager; import org.apache.beam.runners.dataflow.util.Stager; +import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.options.Default; import org.apache.beam.sdk.options.DefaultValueFactory; import org.apache.beam.sdk.options.Description; @@ -39,6 +40,7 @@ import org.apache.beam.sdk.util.InstanceBuilder; + "debugging and testing purposes.") @Hidden public interface DataflowPipelineDebugOptions extends ExperimentalOptions, PipelineOptions { + /** * The root URL for the Dataflow API. {@code dataflowEndpoint} can override this value * if it contains an absolute URL, otherwise {@code apiRootUrl} will be combined with @@ -178,6 +180,19 @@ public interface DataflowPipelineDebugOptions extends ExperimentalOptions, Pipel void setDumpHeapOnOOM(boolean dumpHeapBeforeExit); /** + * CAUTION: This option implies dumpHeapOnOOM, and has similar caveats. Specifically, heap + * dumps can of comparable size to the default boot disk. Consider increasing the boot disk size + * before setting this flag to true. + */ + @Description( + "[EXPERIMENTAL] Set to a GCS bucket (directory) to upload heap dumps to the given location.\n" + + "Enabling this implies that heap dumps should be generated on OOM (--dumpHeapOnOOM=true)\n" + + "Uploads will continue until the pipeline is stopped or updated without this option.\n") + @Experimental + String getSaveHeapDumpsToGcsPath(); + void setSaveHeapDumpsToGcsPath(String gcsPath); + + /** * Creates a {@link Stager} object using the class specified in * {@link #getStagerClass()}. */