BEAM-879: Changing DeDupExample to DistinctExample
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/e3dca4ca Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/e3dca4ca Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/e3dca4ca Branch: refs/heads/gearpump-runner Commit: e3dca4cab6914166465c70f5f0b4be4f06ddd088 Parents: 1abbb90 Author: Neelesh Srinivas Salian <nsal...@cloudera.com> Authored: Thu Dec 1 20:28:43 2016 -0800 Committer: Dan Halperin <dhalp...@google.com> Committed: Fri Dec 2 15:20:16 2016 -0800 ---------------------------------------------------------------------- .../beam/examples/cookbook/DeDupExample.java | 96 -------------------- .../beam/examples/cookbook/DistinctExample.java | 96 ++++++++++++++++++++ 2 files changed, 96 insertions(+), 96 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e3dca4ca/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java ---------------------------------------------------------------------- diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java deleted file mode 100644 index 34fb901..0000000 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.examples.cookbook; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Distinct; -import org.apache.beam.sdk.util.gcsfs.GcsPath; - -/** - * This example uses as input Shakespeare's plays as plaintext files, and will remove any - * duplicate lines across all the files. (The output does not preserve any input order). - * - * <p>Concepts: the Distinct transform, and how to wire transforms together. - * Demonstrates {@link org.apache.beam.sdk.io.TextIO.Read}/ - * {@link Distinct}/{@link org.apache.beam.sdk.io.TextIO.Write}. - * - * <p>To execute this pipeline locally, specify a local output file or output prefix on GCS: - * --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX] - * - * <p>To change the runner, specify: - * <pre>{@code - * --runner=YOUR_SELECTED_RUNNER - * } - * </pre> - * See examples/java/README.md for instructions about how to configure different runners. - * - * <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can be - * overridden with {@code --input}. - */ -public class DeDupExample { - - /** - * Options supported by {@link DeDupExample}. - * - * <p>Inherits standard configuration options. - */ - private interface Options extends PipelineOptions { - @Description("Path to the directory or GCS prefix containing files to read from") - @Default.String("gs://apache-beam-samples/shakespeare/*") - String getInput(); - void setInput(String value); - - @Description("Path of the file to write to") - @Default.InstanceFactory(OutputFactory.class) - String getOutput(); - void setOutput(String value); - - /** Returns gs://${TEMP_LOCATION}/"deduped.txt". */ - class OutputFactory implements DefaultValueFactory<String> { - @Override - public String create(PipelineOptions options) { - if (options.getTempLocation() != null) { - return GcsPath.fromUri(options.getTempLocation()) - .resolve("deduped.txt").toString(); - } else { - throw new IllegalArgumentException("Must specify --output or --tempLocation"); - } - } - } - } - - - public static void main(String[] args) - throws Exception { - - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - Pipeline p = Pipeline.create(options); - - p.apply("ReadLines", TextIO.Read.from(options.getInput())) - .apply(Distinct.<String>create()) - .apply("DedupedShakespeare", TextIO.Write.to(options.getOutput())); - - p.run().waitUntilFinish(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/e3dca4ca/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java ---------------------------------------------------------------------- diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java new file mode 100644 index 0000000..9670b7f --- /dev/null +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.cookbook; + +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.options.Default; +import org.apache.beam.sdk.options.DefaultValueFactory; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.Distinct; +import org.apache.beam.sdk.util.gcsfs.GcsPath; + +/** + * This example uses as input Shakespeare's plays as plaintext files, and will remove any + * duplicate lines across all the files. (The output does not preserve any input order). + * + * <p>Concepts: the Distinct transform, and how to wire transforms together. + * Demonstrates {@link org.apache.beam.sdk.io.TextIO.Read}/ + * {@link Distinct}/{@link org.apache.beam.sdk.io.TextIO.Write}. + * + * <p>To execute this pipeline locally, specify a local output file or output prefix on GCS: + * --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX] + * + * <p>To change the runner, specify: + * <pre>{@code + * --runner=YOUR_SELECTED_RUNNER + * } + * </pre> + * See examples/java/README.md for instructions about how to configure different runners. + * + * <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can be + * overridden with {@code --input}. + */ +public class DistinctExample { + + /** + * Options supported by {@link DistinctExample}. + * + * <p>Inherits standard configuration options. + */ + private interface Options extends PipelineOptions { + @Description("Path to the directory or GCS prefix containing files to read from") + @Default.String("gs://apache-beam-samples/shakespeare/*") + String getInput(); + void setInput(String value); + + @Description("Path of the file to write to") + @Default.InstanceFactory(OutputFactory.class) + String getOutput(); + void setOutput(String value); + + /** Returns gs://${TEMP_LOCATION}/"deduped.txt". */ + class OutputFactory implements DefaultValueFactory<String> { + @Override + public String create(PipelineOptions options) { + if (options.getTempLocation() != null) { + return GcsPath.fromUri(options.getTempLocation()) + .resolve("deduped.txt").toString(); + } else { + throw new IllegalArgumentException("Must specify --output or --tempLocation"); + } + } + } + } + + + public static void main(String[] args) + throws Exception { + + Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + Pipeline p = Pipeline.create(options); + + p.apply("ReadLines", TextIO.Read.from(options.getInput())) + .apply(Distinct.<String>create()) + .apply("DedupedShakespeare", TextIO.Write.to(options.getOutput())); + + p.run().waitUntilFinish(); + } +}