Repository: beam Updated Branches: refs/heads/master 1ea3b35ba -> ced1e5c3a
[BEAM-1778] Second clean up pass of dataflow references/URLs in Java SDK Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/07a50b48 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/07a50b48 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/07a50b48 Branch: refs/heads/master Commit: 07a50b48af9c176732ff172e3d612052d4e15386 Parents: 87c8ef0 Author: melissa <meliss...@google.com> Authored: Thu Mar 23 16:49:41 2017 -0700 Committer: Kenneth Knowles <k...@google.com> Committed: Mon Mar 27 13:15:48 2017 -0700 ---------------------------------------------------------------------- .../main/java/org/apache/beam/sdk/io/AvroSource.java | 2 +- .../java/org/apache/beam/sdk/io/BoundedSource.java | 4 +--- .../main/java/org/apache/beam/sdk/io/PubsubIO.java | 6 ------ .../org/apache/beam/sdk/io/PubsubUnboundedSink.java | 2 -- .../apache/beam/sdk/io/PubsubUnboundedSource.java | 2 -- .../main/java/org/apache/beam/sdk/io/TFRecordIO.java | 6 +++--- .../main/java/org/apache/beam/sdk/io/XmlSink.java | 4 ++-- .../org/apache/beam/sdk/io/range/ByteKeyRange.java | 4 +--- .../java/org/apache/beam/sdk/options/GcpOptions.java | 2 +- .../beam/sdk/testing/SerializableMatchers.java | 2 +- .../org/apache/beam/sdk/testing/StreamingIT.java | 2 +- .../java/org/apache/beam/sdk/util/CoderUtils.java | 15 +++++++-------- .../beam/sdk/coders/protobuf/ProtobufUtilTest.java | 1 - .../apache/beam/sdk/runners/PipelineRunnerTest.java | 2 +- .../io/gcp/bigquery/BigQueryTableRowIterator.java | 3 +-- .../beam/sdk/io/gcp/datastore/DatastoreV1.java | 2 +- .../org/apache/beam/sdk/io/hdfs/HDFSFileSink.java | 3 +-- .../java/org/apache/beam/sdk/io/jdbc/JdbcIO.java | 4 ---- .../java/org/apache/beam/sdk/io/kafka/KafkaIO.java | 4 +--- .../resources/META-INF/maven/archetype-metadata.xml | 2 +- .../resources/META-INF/maven/archetype-metadata.xml | 2 +- .../resources/META-INF/maven/archetype-metadata.xml | 2 +- 22 files changed, 26 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java index fe3ac5c..0c52dea 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java @@ -109,7 +109,7 @@ import org.apache.commons.compress.utils.CountingInputStream; * than the end offset of the source. * * <p>To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.5.jar}, - * which has been marked as optional in the Maven {@code sdk/pom.xml} for Google Cloud Dataflow: + * which has been marked as optional in the Maven {@code sdk/pom.xml}. * * <pre>{@code * <dependency> http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java index 8e5145c..8538e7f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java @@ -104,9 +104,7 @@ public abstract class BoundedSource<T> extends Source<T> { * * <p>Sources which support dynamic work rebalancing should use * {@link org.apache.beam.sdk.io.range.RangeTracker} to manage the (source-specific) - * range of positions that is being split. If your source supports dynamic work rebalancing, - * please use that class to implement it if possible; if not possible, please contact the team - * at <i>dataflow-feedb...@google.com</i>. + * range of positions that is being split. */ @Experimental(Experimental.Kind.SOURCE_SINK) public abstract static class BoundedReader<T> extends Source.Reader<T> { http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java index 806b7da..c1ad353 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java @@ -839,9 +839,6 @@ public class PubsubIO { * <p>TODO: Consider replacing with BoundedReadFromUnboundedSource on top * of PubsubUnboundedSource. * - * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow - * service in streaming mode. - * * <p>Public so can be suppressed by runners. */ public class PubsubBoundedReader extends DoFn<Void, T> { @@ -1132,9 +1129,6 @@ public class PubsubIO { /** * Writer to Pubsub which batches messages from bounded collections. * - * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow - * service in streaming mode. - * * <p>Public so can be suppressed by runners. */ public class PubsubBoundedWriter extends DoFn<T, Void> { http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java index f41b5b7..55605b3 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java @@ -87,8 +87,6 @@ import org.joda.time.Duration; * <li>A failed bundle will cause messages to be resent. Thus we rely on the Pubsub consumer * to dedup messages. * </ul> - * - * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow service. */ public class PubsubUnboundedSink<T> extends PTransform<PCollection<T>, PDone> { /** http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java index 90bcc76..1184968 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java @@ -106,8 +106,6 @@ import org.slf4j.LoggerFactory; * are blocking. We rely on the underlying runner to allow multiple * {@link UnboundedSource.UnboundedReader} instances to execute concurrently and thus hide latency. * </ul> - * - * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow service. */ public class PubsubUnboundedSource<T> extends PTransform<PBegin, PCollection<T>> { private static final Logger LOG = LoggerFactory.getLogger(PubsubUnboundedSource.class); http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java index 243506c..0552236 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java @@ -68,8 +68,8 @@ public class TFRecordIO { * Returns a transform for reading TFRecord files that reads from the file(s) * with the given filename or filename pattern. This can be a local path (if running locally), * or a Google Cloud Storage filename or filename pattern of the form - * {@code "gs://<bucket>/<filepath>"} (if running locally or via the Google Cloud Dataflow - * service). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html" + * {@code "gs://<bucket>/<filepath>"} (if running locally or using remote + * execution). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html" * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported. */ public static Bound from(String filepattern) { @@ -284,7 +284,7 @@ public class TFRecordIO { * with the given prefix. This can be a local filename * (if running locally), or a Google Cloud Storage filename of * the form {@code "gs://<bucket>/<filepath>"} - * (if running locally or via the Google Cloud Dataflow service). + * (if running locally or using remote execution). * * <p>The files written will begin with this prefix, followed by * a shard identifier (see {@link TFRecordIO.Write.Bound#withNumShards(int)}, and end http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java index 0f25aea..6937e93 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java @@ -146,7 +146,7 @@ public class XmlSink { * Returns an XmlSink that writes objects as XML entities. * * <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n - * is the number of output bundles that the Dataflow service divides the output into. + * is the number of output bundles. * * @param klass the class of the elements to write. * @param rootElementName the enclosing root element. @@ -183,7 +183,7 @@ public class XmlSink { * Returns an XmlSink that writes to files with the given prefix. * * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is - * the number of output bundles that the Dataflow service divides the output into. + * the number of output bundles. */ public Bound<T> toFilenamePrefix(String baseOutputFilename) { return new Bound<>(classToBind, rootElementName, baseOutputFilename); http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java index 0212e8a..d5b2919 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java @@ -48,9 +48,7 @@ import org.slf4j.LoggerFactory; * * <p>The primary role of {@link ByteKeyRange} is to provide functionality for * {@link #estimateFractionForKey(ByteKey)}, {@link #interpolateKey(double)}, and - * {@link #split(int)}, which are used for Google Cloud Dataflow's - * <a href="https://cloud.google.com/dataflow/service/dataflow-service-desc#AutoScaling">Autoscaling - * and Dynamic Work Rebalancing</a> features. + * {@link #split(int)}. * * <p>{@link ByteKeyRange} implements these features by treating a {@link ByteKey}'s underlying * {@code byte[]} as the binary expansion of floating point numbers in the range {@code [0.0, 1.0]}. http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java index c04e4f0..d01406f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java @@ -55,7 +55,7 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions { /** * Project id to use when launching jobs. */ - @Description("Project id. Required when running a Dataflow in the cloud. " + @Description("Project id. Required when using Google Cloud Platform services. " + "See https://cloud.google.com/storage/docs/projects for further details.") @Default.InstanceFactory(DefaultProjectFactory.class) String getProject(); http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java index 7f72805..ade146d 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java @@ -41,7 +41,7 @@ import org.hamcrest.Matchers; * documentation there. Values retained by a {@link SerializableMatcher} are required to be * serializable, either via Java serialization or via a provided {@link Coder}. * - * <p>The following matchers are novel to Dataflow: + * <p>The following matchers are novel to Apache Beam: * <ul> * <li>{@link #kvWithKey} for matching just the key of a {@link KV}. * <li>{@link #kvWithValue} for matching just the value of a {@link KV}. http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java index 4922d83..427b908 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java @@ -18,7 +18,7 @@ package org.apache.beam.sdk.testing; /** - * Category tag used to mark tests which execute using the Dataflow runner + * Category tag used to mark tests which execute * in streaming mode. Example usage: * <pre><code> * {@literal @}Test http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java index 5d03574..6d97868 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java @@ -229,17 +229,16 @@ public final class CoderUtils { /** * A {@link com.fasterxml.jackson.databind.Module} that adds the type - * resolver needed for Coder definitions created by the Dataflow service. + * resolver needed for Coder definitions. */ static final class Jackson2Module extends SimpleModule { /** * The Coder custom type resolver. * - * <p>This resolver resolves coders. If the Coder ID is a particular - * well-known identifier supplied by the Dataflow service, it's replaced - * with the corresponding class. All other Coder instances are resolved - * by class name, using the package org.apache.beam.sdk.coders - * if there are no "."s in the ID. + * <p>This resolver resolves coders. If the Coder ID is a particular + * well-known identifier, it's replaced with the corresponding class. + * All other Coder instances are resolved by class name, using the package + * org.apache.beam.sdk.coders if there are no "."s in the ID. */ private static final class Resolver extends TypeIdResolverBase { @SuppressWarnings("unused") // Used via @JsonTypeIdResolver annotation on Mixin @@ -307,14 +306,14 @@ public final class CoderUtils { * {@link ObjectMapper}. * * <p>This is done via a mixin so that this resolver is <i>only</i> used - * during deserialization requested by the Dataflow SDK. + * during deserialization requested by the Apache Beam SDK. */ @JsonTypeIdResolver(Resolver.class) @JsonTypeInfo(use = Id.CUSTOM, include = As.PROPERTY, property = PropertyNames.OBJECT_TYPE_NAME) private static final class Mixin {} public Jackson2Module() { - super("DataflowCoders"); + super("BeamCoders"); setMixInAnnotation(Coder.class, Mixin.class); } } http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java index 9736824..1408048 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java @@ -118,7 +118,6 @@ public class ProtobufUtilTest { @Test public void testVerifyProto2() { - // Everything in Dataflow's Proto2TestMessages uses Proto2 syntax. checkProto2Syntax(MessageA.class, ExtensionRegistry.getEmptyRegistry()); checkProto2Syntax(MessageB.class, ExtensionRegistry.getEmptyRegistry()); checkProto2Syntax(MessageC.class, ExtensionRegistry.getEmptyRegistry()); http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java ---------------------------------------------------------------------- diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java index e980497..71ef311 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java @@ -36,7 +36,7 @@ import org.mockito.Mock; import org.mockito.MockitoAnnotations; /** - * Tests for DataflowRunner. + * Tests for PipelineRunner. */ @RunWith(JUnit4.class) public class PipelineRunnerTest { http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java ---------------------------------------------------------------------- diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java index 5edc78c..59f2bb6 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java @@ -139,8 +139,7 @@ class BigQueryTableRowIterator implements AutoCloseable { while (true) { if (iteratorOverCurrentBatch != null && iteratorOverCurrentBatch.hasNext()) { // Embed schema information into the raw row, so that values have an - // associated key. This matches how rows are read when using the - // DataflowRunner. + // associated key. current = getTypedTableRow(schema.getFields(), iteratorOverCurrentBatch.next()); return true; } http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java ---------------------------------------------------------------------- diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java index 400860f..73ac8df 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java @@ -132,7 +132,7 @@ import org.slf4j.LoggerFactory; * .withQuery(query)); * } </pre> * - * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel across + * <p><b>Note:</b> A runner may read from Cloud Datastore in parallel across * many workers. However, when the {@link Query} is configured with a limit using * {@link com.google.datastore.v1.Query.Builder#setLimit(Int32Value)} or if the Query contains * inequality filters like {@code GREATER_THAN, LESS_THAN} etc., then all returned results http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java ---------------------------------------------------------------------- diff --git a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java index 0118249..9b085ca 100644 --- a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java +++ b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java @@ -408,8 +408,7 @@ public abstract class HDFSFileSink<T, K, V> extends Sink<T> { FileOutputFormat.setOutputPath(job, new Path(path)); // Each Writer is responsible for writing one bundle of elements and is represented by one - // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow - // handles retrying of failed bundles, each task has one attempt only. + // unique Hadoop task based on uId/hash. All tasks share the same job ID. JobID jobId = job.getJobID(); TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash); context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0)); http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java ---------------------------------------------------------------------- diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java index 1b83fe9..4754c98 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java @@ -306,10 +306,6 @@ public class JdbcIO { return input .apply(Create.of(getQuery())) .apply(ParDo.of(new ReadFn<>(this))).setCoder(getCoder()) - // generate a random key followed by a GroupByKey and then ungroup - // to prevent fusion - // see https://cloud.google.com/dataflow/service/dataflow-service-desc#preventing-fusion - // for details .apply(ParDo.of(new DoFn<T, KV<Integer, T>>() { private Random random; @Setup http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java ---------------------------------------------------------------------- diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index 890fb2b..83cef4b 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -151,8 +151,7 @@ import org.slf4j.LoggerFactory; * * <h3>Partition Assignment and Checkpointing</h3> * The Kafka partitions are evenly distributed among splits (workers). - * Dataflow checkpointing is fully supported and - * each split can resume from previous checkpoint. See + * Checkpointing is fully supported and each split can resume from previous checkpoint. See * {@link UnboundedKafkaSource#generateInitialSplits(int, PipelineOptions)} for more details on * splits and checkpoint support. * @@ -818,7 +817,6 @@ public class KafkaIO { checkState(checkpointMark.getPartitions().size() == partitions.size(), "checkPointMark and assignedPartitions should match"); - // we could consider allowing a mismatch, though it is not expected in current Dataflow for (int i = 0; i < partitions.size(); i++) { PartitionMark ckptMark = checkpointMark.getPartitions().get(i); http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml ---------------------------------------------------------------------- diff --git a/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml index dbdd614..2781a43 100644 --- a/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -17,7 +17,7 @@ --> <archetype-descriptor xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd" - name="Google Cloud Dataflow Example Pipelines Archetype" + name="Apache Beam Example Pipelines Archetype" xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml ---------------------------------------------------------------------- diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml index a130b65..7f0430a 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -17,7 +17,7 @@ --> <archetype-descriptor xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd" - name="Google Cloud Dataflow Example Pipelines Archetype" + name="Apache Beam Example Pipelines Archetype" xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml ---------------------------------------------------------------------- diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml index 46c21c3..e550960 100644 --- a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -17,7 +17,7 @@ --> <archetype-descriptor xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd" - name="Google Cloud Dataflow Starter Pipeline Archetype" + name="Apache Beam Starter Pipeline Archetype" xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <requiredProperties>