[beam] branch nightly-refs/heads/master updated (bee56a6 -> bfff840)
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to branch nightly-refs/heads/master in repository https://gitbox.apache.org/repos/asf/beam.git. from bee56a6 [BEAM-13288] improve logging for no rows present error (#16096) add 45955da Merge pull request #16117 from [BEAM-13368][Playground][Bugfix] Fix CI and failed unit tests on master add 0830a02 Run python SpannerIO IT with python 3.7 only to avoid overload spanner instance (#16112) add 862ece1 Merge pull request #15378 from [RFC] Define and document per-key ordering semantics for runners add 3422774 Bump python containers to beam-master-20211202 (#16129) add 29213ce [BEAM-13354, BEAM-13015, BEAM-12802, BEAM-12588] Support prefetch for multimap and set state making loading keys and values truly lazy (#16092) add 09bbb48 [BEAM-11936] Fix errorprone UnusedVariable in IO (#16036) add 3c948fc Fix failing RecommendationAICatalogItemIT add 89e2b23 Merge pull request #16132 from y1chi/fix_post add 91950d6 Clarify instructions on how to get contributor list. add dfb5726 Merge pull request #16133 from tvalentyn/tags_command add 6b9e374 Revert "Revert "Allow wildcards for java class lookup transform providers."" add 8faf018 Move stand-alone expansion service jar into its own project. add bfff840 Merge pull request #16044 Restore "Allow wildcards for java class lookup transform providers." No new revisions were added by this update. Summary of changes: ...Java_PortableValidatesRunner_Flink_Batch.groovy | 2 +- build.gradle.kts | 3 + playground/backend/build.gradle.kts| 2 +- playground/backend/cmd/server/controller_test.go | 21 +- runners/flink/job-server/flink_job_server.gradle | 2 + runners/spark/job-server/spark_job_server.gradle | 1 + ...edMetrics.java => UsesPerKeyOrderInBundle.java} | 10 +- ...leParDo.java => UsesPerKeyOrderedDelivery.java} | 9 +- .../org/apache/beam/sdk/transforms/ParDoTest.java | 3 +- .../beam/sdk/transforms/PerKeyOrderingTest.java| 328 +++ .../app}/build.gradle | 21 +- sdks/java/expansion-service/build.gradle | 5 +- .../expansion/service/ExpansionServiceOptions.java | 10 +- .../service/JavaClassLookupTransformProvider.java | 83 +++- .../JavaClassLookupTransformProviderTest.java | 42 ++ .../ml/RecommendationAICatalogItemIT.java | 4 +- .../beam/fn/harness/state/FnApiStateAccessor.java | 19 +- .../beam/fn/harness/state/MultimapUserState.java | 165 +--- .../fn/harness/state/MultimapUserStateTest.java| 449 +++-- sdks/java/io/amazon-web-services/build.gradle | 1 - sdks/java/io/amazon-web-services2/build.gradle | 1 - .../apache/beam/sdk/io/aws2/kinesis/KinesisIO.java | 6 - .../beam/sdk/io/aws2/s3/S3FileSystemTest.java | 2 +- sdks/java/io/amqp/build.gradle | 2 +- .../java/org/apache/beam/sdk/io/amqp/AmqpIO.java | 8 +- .../beam/sdk/io/amqp/AmqpMessageCoderTest.java | 2 +- sdks/java/io/azure/build.gradle| 1 - .../beam/sdk/io/azure/options/AzureModule.java | 6 - sdks/java/io/bigquery-io-perf-tests/build.gradle | 2 +- .../beam/sdk/bigqueryioperftests/BigQueryIOIT.java | 4 - sdks/java/io/cassandra/build.gradle| 1 - .../apache/beam/sdk/io/cassandra/CassandraIO.java | 1 - .../beam/sdk/io/cassandra/CassandraIOTest.java | 1 - sdks/java/io/clickhouse/build.gradle | 1 - sdks/java/io/common/build.gradle | 2 +- sdks/java/io/contextualtextio/build.gradle | 1 - .../contextualtextio/ContextualTextIOSource.java | 4 - sdks/java/io/debezium/build.gradle | 1 - .../io/debezium/expansion-service/build.gradle | 1 - .../io/debezium/DebeziumTransformRegistrar.java| 3 - .../apache/beam/io/debezium/SourceRecordJson.java | 4 +- .../apache/beam/io/debezium/DebeziumIOTest.java| 4 +- .../io/debezium/KafkaSourceConsumerFnTest.java | 23 +- .../elasticsearch-tests-5/build.gradle | 1 - .../elasticsearch-tests-6/build.gradle | 1 - .../elasticsearch-tests-7/build.gradle | 1 - .../elasticsearch-tests-common/build.gradle| 1 - sdks/java/io/elasticsearch/build.gradle| 2 +- sdks/java/io/expansion-service/build.gradle| 1 - sdks/java/io/file-based-io-tests/build.gradle | 2 +- .../java/org/apache/beam/sdk/io/avro/AvroIOIT.java | 4 - .../apache/beam/sdk/io/parquet/ParquetIOIT.java| 4 - .../java/org/apache/beam/sdk/io/text/TextIOIT.java | 7 - .../apache/beam/sdk/io/tfrecord/TFRecordIOIT.java | 4 - .../java/org/apache/beam/sdk/io/xml/XmlIOIT.java | 4 - sdks/java/io/google-cloud-platform/build.gradle| 1 -
[beam] branch master updated (dfb5726 -> bfff840)
This is an automated email from the ASF dual-hosted git repository. robertwb pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from dfb5726 Merge pull request #16133 from tvalentyn/tags_command add 6b9e374 Revert "Revert "Allow wildcards for java class lookup transform providers."" add 8faf018 Move stand-alone expansion service jar into its own project. add bfff840 Merge pull request #16044 Restore "Allow wildcards for java class lookup transform providers." No new revisions were added by this update. Summary of changes: .../app}/build.gradle | 22 +++--- sdks/java/expansion-service/build.gradle | 5 +- .../expansion/service/ExpansionServiceOptions.java | 10 +-- .../service/JavaClassLookupTransformProvider.java | 83 -- .../JavaClassLookupTransformProviderTest.java | 42 +++ sdks/python/apache_beam/transforms/external.py | 56 --- .../python/apache_beam/transforms/external_test.py | 24 +++ sdks/python/apache_beam/utils/processes_test.py| 8 +++ sdks/python/apache_beam/utils/subprocess_server.py | 47 +++- .../apache_beam/utils/subprocess_server_test.py| 51 + settings.gradle.kts| 1 + 11 files changed, 303 insertions(+), 46 deletions(-) copy sdks/java/{io/snowflake/expansion-service => expansion-service/app}/build.gradle (70%)
[beam] branch asf-site updated: Publishing website 2021/12/04 00:01:46 at commit dfb5726
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new a436624 Publishing website 2021/12/04 00:01:46 at commit dfb5726 a436624 is described below commit a4366242e6aa547c1b2e9e7b577db54179838a69 Author: jenkins AuthorDate: Sat Dec 4 00:01:47 2021 + Publishing website 2021/12/04 00:01:46 at commit dfb5726 --- website/generated-content/contribute/index.xml| 2 +- website/generated-content/contribute/release-guide/index.html | 4 ++-- website/generated-content/sitemap.xml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/website/generated-content/contribute/index.xml b/website/generated-content/contribute/index.xml index 4da81a2..6b47b31 100644 --- a/website/generated-content/contribute/index.xml +++ b/website/generated-content/contribute/index.xml @@ -1041,7 +1041,7 @@ See a href="https://github.com/apache/beam/commit/a32a75ed0657c122c6625aee1a li>Copy the changes for the current release from code>CHANGES.md/code> to the blog post and edit as necessary./li> li>Be sure to add yourself to a href="https://github.com/apache/beam/blob/master/website/www/site/data/authors.yml;>authors.yml/a> if necessary./li> /ul> -p>strong>Tip/strong>: Use git log to find contributors to the releases. (e.g: code>git log --pretty='%aN' ^v2.10.0 v2.11.0 | sort | uniq/code>). +p>strong>Tip/strong>: Use git log to find contributors to the releases. (e.g: code>git fetch origin --tags; git log --pretty='%aN' ^v2.10.0 v2.11.0-RC1 | sort | uniq/code>). Make sure to clean it up, as there may be duplicate or incorrect user names./p> p>strong>NOTE/strong>: Make sure to include any breaking changes, even to code>@Experimental/code> features, all major features and bug fixes, and all known issues./p> diff --git a/website/generated-content/contribute/release-guide/index.html b/website/generated-content/contribute/release-guide/index.html index ec9234f..4259677 100644 --- a/website/generated-content/contribute/release-guide/index.html +++ b/website/generated-content/contribute/release-guide/index.html @@ -174,7 +174,7 @@ To avoid invalid redirects for the current version, merge these PR Once the PR is merged, the new contents will get picked up automatically and served to the Beam website, usually within an hour. A committer can manually trigger the https://ci-beam.apache.org/job/beam_PostCommit_Website_Publish/>beam_PostCommit_Website_Publish task in Jenkins to avoid waiting.PR 1: apache/beam-siteThis pull request is against the apache/beam-site repo, on the release-docs branch (https://github.com/apache/beam-site/pull/603>example). It is created by build_release_candidate.sh (see above).PR 2: apache/beamThis pull request is against the apache/beam repo, on the master branch (https://github.com/apache/beam/pull/15068>example).Update CHANGES.md to update release date and remove template.Update release version in website/www/site/config.toml.Add new release in website/www/site [...] -See https://github.com/apache/beam/commit/a32a75ed0657c122c6625aee1ace27994e7df195#diff-1e2b83a4f61dce8014a1989869b6d31eb3f80cb0d6dade42fb8df5d9407b4748>beam-2.31.0.md as an example.Copy the changes for the current release from CHANGES.md to the blog post and edit as necessary.Be sure to add yourself to https://github.com/apache/beam/blob/master/website/www/site/data/authors.yml>authors.yml if necessary.Tiphttps://github.com/apache/beam/commit/a32a75ed0657c122c6625aee1ace27994e7df195#diff-1e2b83a4f61dce8014a1989869b6d31eb3f80cb0d6dade42fb8df5d9407b4748>beam-2.31.0.md as an example.Copy the changes for the current release from CHANGES.md to the blog post and edit as necessary.Be sure to add yourself to https://github.com/apache/beam/blob/master/website/www/site/data/authors.yml>authors.yml if necessary.TipNOTE: Make sure to include any breaking changes, even to @Experimental features, all major features and bug fixes, and all known issues.Template:We are happy to present the new {$RELEASE_VERSION} release of Beam. This release includes both improvements and new functionality. @@ -424,7 +424,7 @@ If you end up getting permissions errors ask on the mailing list for assistance. Ask other contributors to do the same.Also, update https://en.wikipedia.org/wiki/Apache_Beam>the Wikipedia article on Apache Beam.Checklist to declare the process completedRelease announced on the user@ mailing list.Blog post published, if applicable.Release recorded in reporter.apache.org.Release announced on social media.Completion declared on the dev@ mailing list. [...] Once you’ve finished the release, please take a step back and look what areas of this process and be improved. Perhaps some part of the process can be simplified. Perhaps parts of this guide can
[beam] branch master updated: Clarify instructions on how to get contributor list.
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 91950d6 Clarify instructions on how to get contributor list. new dfb5726 Merge pull request #16133 from tvalentyn/tags_command 91950d6 is described below commit 91950d60925dfb88e32869b5194313a589c15242 Author: Valentyn Tymofieiev AuthorDate: Fri Dec 3 11:57:16 2021 -0800 Clarify instructions on how to get contributor list. --- website/www/site/content/en/contribute/release-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 8a8b607..9896640 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -680,7 +680,7 @@ See [beam-2.31.0.md](https://github.com/apache/beam/commit/a32a75ed0657c122c6625 - Copy the changes for the current release from `CHANGES.md` to the blog post and edit as necessary. - Be sure to add yourself to [authors.yml](https://github.com/apache/beam/blob/master/website/www/site/data/authors.yml) if necessary. -__Tip__: Use git log to find contributors to the releases. (e.g: `git log --pretty='%aN' ^v2.10.0 v2.11.0 | sort | uniq`). +__Tip__: Use git log to find contributors to the releases. (e.g: `git fetch origin --tags; git log --pretty='%aN' ^v2.10.0 v2.11.0-RC1 | sort | uniq`). Make sure to clean it up, as there may be duplicate or incorrect user names. __NOTE__: Make sure to include any breaking changes, even to `@Experimental` features,
[beam] branch master updated: Fix failing RecommendationAICatalogItemIT
This is an automated email from the ASF dual-hosted git repository. yichi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 3c948fc Fix failing RecommendationAICatalogItemIT new 89e2b23 Merge pull request #16132 from y1chi/fix_post 3c948fc is described below commit 3c948fc1476c7e2b3ef97380b8931452c33f90d1 Author: Yichi Zhang AuthorDate: Fri Dec 3 11:24:25 2021 -0800 Fix failing RecommendationAICatalogItemIT --- .../apache/beam/sdk/extensions/ml/RecommendationAICatalogItemIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/java/extensions/ml/src/test/java/org/apache/beam/sdk/extensions/ml/RecommendationAICatalogItemIT.java b/sdks/java/extensions/ml/src/test/java/org/apache/beam/sdk/extensions/ml/RecommendationAICatalogItemIT.java index 4d45bc4..00e70d9 100644 --- a/sdks/java/extensions/ml/src/test/java/org/apache/beam/sdk/extensions/ml/RecommendationAICatalogItemIT.java +++ b/sdks/java/extensions/ml/src/test/java/org/apache/beam/sdk/extensions/ml/RecommendationAICatalogItemIT.java @@ -38,15 +38,15 @@ import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollectionTuple; import org.junit.AfterClass; +import org.junit.ClassRule; import org.junit.Ignore; -import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @RunWith(JUnit4.class) public class RecommendationAICatalogItemIT { - @Rule public static TestPipeline testPipeline = TestPipeline.create(); + @ClassRule public static TestPipeline testPipeline = TestPipeline.create(); private static String projectId = testPipeline.getOptions().as(GcpOptions.class).getProject(); private static final String randomId = "aitest-" + Instant.now().getEpochSecond() + UUID.randomUUID().toString();
[beam] branch master updated: [BEAM-11936] Fix errorprone UnusedVariable in IO (#16036)
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 09bbb48 [BEAM-11936] Fix errorprone UnusedVariable in IO (#16036) 09bbb48 is described below commit 09bbb48187301f18bec6d9110741c69b955e2b5a Author: Benjamin Gonzalez <74670721+benw...@users.noreply.github.com> AuthorDate: Fri Dec 3 14:14:32 2021 -0600 [BEAM-11936] Fix errorprone UnusedVariable in IO (#16036) * [BEAM-11936] Fix errorprone UnusedVariable in io * [BEAM-11936] Change todo issue ref, and spotlessApply * [BEAM-11936] Remove unused code * [BEAM-11936] Remove suppressUnusedVariable flag * [BEAM-11936] Remove unsued code after merge --- sdks/java/io/amazon-web-services/build.gradle | 1 - sdks/java/io/amazon-web-services2/build.gradle | 1 - .../apache/beam/sdk/io/aws2/kinesis/KinesisIO.java | 6 - .../beam/sdk/io/aws2/s3/S3FileSystemTest.java | 2 +- sdks/java/io/amqp/build.gradle | 2 +- .../java/org/apache/beam/sdk/io/amqp/AmqpIO.java | 8 ++ .../beam/sdk/io/amqp/AmqpMessageCoderTest.java | 2 +- sdks/java/io/azure/build.gradle| 1 - .../beam/sdk/io/azure/options/AzureModule.java | 6 - sdks/java/io/bigquery-io-perf-tests/build.gradle | 2 +- .../beam/sdk/bigqueryioperftests/BigQueryIOIT.java | 4 --- sdks/java/io/cassandra/build.gradle| 1 - .../apache/beam/sdk/io/cassandra/CassandraIO.java | 1 - .../beam/sdk/io/cassandra/CassandraIOTest.java | 1 - sdks/java/io/clickhouse/build.gradle | 1 - sdks/java/io/common/build.gradle | 2 +- sdks/java/io/contextualtextio/build.gradle | 1 - .../contextualtextio/ContextualTextIOSource.java | 4 --- sdks/java/io/debezium/build.gradle | 1 - .../io/debezium/expansion-service/build.gradle | 1 - .../io/debezium/DebeziumTransformRegistrar.java| 3 --- .../apache/beam/io/debezium/SourceRecordJson.java | 4 +-- .../apache/beam/io/debezium/DebeziumIOTest.java| 4 +-- .../io/debezium/KafkaSourceConsumerFnTest.java | 23 - .../elasticsearch-tests-5/build.gradle | 1 - .../elasticsearch-tests-6/build.gradle | 1 - .../elasticsearch-tests-7/build.gradle | 1 - .../elasticsearch-tests-common/build.gradle| 1 - sdks/java/io/elasticsearch/build.gradle| 2 +- sdks/java/io/expansion-service/build.gradle| 1 - sdks/java/io/file-based-io-tests/build.gradle | 2 +- .../java/org/apache/beam/sdk/io/avro/AvroIOIT.java | 4 --- .../apache/beam/sdk/io/parquet/ParquetIOIT.java| 4 --- .../java/org/apache/beam/sdk/io/text/TextIOIT.java | 7 - .../apache/beam/sdk/io/tfrecord/TFRecordIOIT.java | 4 --- .../java/org/apache/beam/sdk/io/xml/XmlIOIT.java | 4 --- sdks/java/io/google-cloud-platform/build.gradle| 1 - .../expansion-service/build.gradle | 1 - .../sdk/io/gcp/bigquery/BigQueryQuerySource.java | 4 --- .../sdk/io/gcp/bigquery/BigQueryServicesImpl.java | 3 --- .../gcp/bigquery/BigQueryStorageArrowReader.java | 5 .../sdk/io/gcp/bigquery/BigQueryTableSource.java | 3 --- .../beam/sdk/io/gcp/bigquery/CreateTables.java | 6 - .../beam/sdk/io/gcp/bigquery/StorageApiLoads.java | 3 --- .../bigquery/StorageApiWritesShardedRecords.java | 5 +++- .../beam/sdk/io/gcp/bigtable/BigtableIO.java | 1 - .../apache/beam/sdk/io/gcp/healthcare/FhirIO.java | 1 - .../io/gcp/healthcare/HttpHealthcareApiClient.java | 2 +- .../sdk/io/gcp/pubsub/PubsubUnboundedSink.java | 4 --- .../beam/sdk/io/gcp/pubsub/TestPubsubSignal.java | 9 ++- .../sdk/io/gcp/spanner/MutationKeyEncoder.java | 4 +-- .../sdk/io/gcp/testing/FakeDatasetService.java | 1 - .../io/gcp/bigquery/BigQueryIOStorageReadTest.java | 3 --- .../io/gcp/bigquery/BigQueryNestedRecordsIT.java | 2 -- .../io/gcp/bigquery/BigQueryServicesImplTest.java | 4 --- .../sdk/io/gcp/bigquery/BigQueryToTableIT.java | 5 ++-- .../beam/sdk/io/gcp/bigtable/BigtableIOTest.java | 2 -- .../beam/sdk/io/gcp/datastore/DatastoreV1Test.java | 3 --- .../gcp/firestore/BaseFirestoreV1WriteFnTest.java | 5 +--- .../beam/sdk/io/gcp/healthcare/FhirIOLROIT.java| 4 +-- .../beam/sdk/io/gcp/healthcare/FhirIOTestUtil.java | 3 --- .../beam/sdk/io/gcp/pubsub/PubsubReadIT.java | 3 --- .../sdk/io/gcp/spanner/ReadSpannerSchemaTest.java | 2 -- .../beam/sdk/io/gcp/spanner/SpannerIOReadTest.java | 30 ++ .../sdk/io/gcp/spanner/SpannerIOWriteTest.java | 1 - sdks/java/io/hadoop-common/build.gradle| 2 +- sdks/java/io/hadoop-file-system/build.gradle | 1 - sdks/java/io/hadoop-format/build.gradle| 1 -
[beam] branch master updated: [BEAM-13354, BEAM-13015, BEAM-12802, BEAM-12588] Support prefetch for multimap and set state making loading keys and values truly lazy (#16092)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 29213ce [BEAM-13354, BEAM-13015, BEAM-12802, BEAM-12588] Support prefetch for multimap and set state making loading keys and values truly lazy (#16092) 29213ce is described below commit 29213ce366e7f7ef55ba2ed0b943532289421c19 Author: Lukasz Cwik AuthorDate: Fri Dec 3 11:23:28 2021 -0800 [BEAM-13354, BEAM-13015, BEAM-12802, BEAM-12588] Support prefetch for multimap and set state making loading keys and values truly lazy (#16092) * [BEAM-13354, BEAM-13015, BEAM-12802, BEAM-12588] Support prefetch for multimap and set state making loading keys and values truly lazy. Also fix implementation to use structural values when comparing keys. --- .../beam/fn/harness/state/FnApiStateAccessor.java | 19 +- .../beam/fn/harness/state/MultimapUserState.java | 165 +--- .../fn/harness/state/MultimapUserStateTest.java| 449 +++-- 3 files changed, 450 insertions(+), 183 deletions(-) diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/FnApiStateAccessor.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/FnApiStateAccessor.java index 517be05..55052b4 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/FnApiStateAccessor.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/FnApiStateAccessor.java @@ -352,7 +352,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState readLater() { -// TODO: Support prefetching. +impl.get(t).iterator().prefetch(); return this; } }; @@ -364,7 +364,6 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { if (isEmpty) { impl.put(t, null); } -// TODO: Support prefetching. return ReadableStates.immediate(isEmpty); } @@ -389,7 +388,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState readLater() { -// TODO: Support prefetching. +impl.keys().iterator().prefetch(); return this; } }; @@ -402,7 +401,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public SetState readLater() { -// TODO: Support prefetching. +impl.keys().iterator().prefetch(); return this; } }; @@ -469,7 +468,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState readLater() { -// TODO: Support prefetching. +impl.get(key).iterator().prefetch(); return this; } }; @@ -485,7 +484,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState> readLater() { -// TODO: Support prefetching. +impl.keys().iterator().prefetch(); return this; } }; @@ -501,7 +500,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState> readLater() { -// TODO: Support prefetching. +entries().readLater(); return this; } }; @@ -519,7 +518,9 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState>> readLater() { -// TODO: Support prefetching. +// Start prefetching the keys. We would need to block to start prefetching +// the values. +keys().readLater(); return this; } }; @@ -535,7 +536,7 @@ public class FnApiStateAccessor implements SideInputReader, StateBinder { @Override public ReadableState readLater() { -// TODO: Support prefetching.
[beam] branch master updated: Bump python containers to beam-master-20211202 (#16129)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 3422774 Bump python containers to beam-master-20211202 (#16129) 3422774 is described below commit 342277407766c8c1237fb8e3e4f27cf4c7e5e897 Author: Yichi Zhang AuthorDate: Fri Dec 3 11:14:10 2021 -0800 Bump python containers to beam-master-20211202 (#16129) --- sdks/python/apache_beam/runners/dataflow/internal/names.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index 0c2bbe5..2c5c4ef 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -36,10 +36,10 @@ SERIALIZED_SOURCE_KEY = 'serialized_source' # Update this version to the next version whenever there is a change that will # require changes to legacy Dataflow worker execution environment. -BEAM_CONTAINER_VERSION = 'beam-master-2026' +BEAM_CONTAINER_VERSION = 'beam-master-20211202' # Update this version to the next version whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. -BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-2026' +BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-20211202' DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3'
[beam] branch asf-site updated: Publishing website 2021/12/03 18:03:23 at commit 862ece1
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new efdbfba Publishing website 2021/12/03 18:03:23 at commit 862ece1 efdbfba is described below commit efdbfbab9e1221d92319bbc4728d12382197ac73 Author: jenkins AuthorDate: Fri Dec 3 18:03:24 2021 + Publishing website 2021/12/03 18:03:23 at commit 862ece1 --- website/generated-content/documentation/index.xml | 39 +- .../index.html | 2 +- .../runners/capability-matrix/index.html | 2 +- .../runners/capability-matrix/index.xml| 62 ++ .../documentation/runtime/model/index.html | 29 -- website/generated-content/sitemap.xml | 2 +- 6 files changed, 128 insertions(+), 8 deletions(-) diff --git a/website/generated-content/documentation/index.xml b/website/generated-content/documentation/index.xml index 7289a86..fc279a0 100644 --- a/website/generated-content/documentation/index.xml +++ b/website/generated-content/documentation/index.xml @@ -15395,7 +15395,9 @@ serializing the elements and broadcasting them to all the workers executing the code>ParDo/code>./li> li>Passing elements between transforms that are running on the same worker. This may allow the runner to avoid serializing elements; instead, the runner -can just pass the elements in memory./li> +can just pass the elements in memory. This is done as part of an +optimization that is known as +a href="https://beam.apache.org/documentation/glossary/#fusion;>fusion/a>./li> /ul> p>Some situations where the runner may serialize and persist elements are:/p> ol> @@ -15420,6 +15422,41 @@ choose an appropriate middle-ground between persisting results after every element, and having to retry everything if there is a failure. For example, a streaming runner may prefer to process and commit small bundles, and a batch runner may prefer to process larger bundles./p> +h3 id="data-partitioning-and-inter-stage-execution">Data partitioning and inter-stage execution/h3> +p>Partitioning and parallelization of element processing within a Beam pipeline is +dependent on two things:/p> +ul> +li>Data source implementation/li> +li>Inter-stage key parallelism/li> +/ul> +p>Beam pipelines read data from a source (e.g. code>KafkaIO/code>, code>BigQueryIO/code>, code>JdbcIO/code>, +or your own source implementation). To implement a Source in Beam one must +implement it as a Splittable code>DoFn/code>. A Splittable code>DoFn/code> provides the runner +with interfaces to facilitate the splitting of work./p> +p>When running key-based operations in Beam (e.g. code>GroupByKey/code>, code>Combine/code>, +code>Reshuffle.perKey/code>, and stateful code>DoFn/code>s), Beam runners perform serialization +and transfer of data known as em>shuffle/em>sup>1/sup>. Shuffle allows data +elements of the same key to be processed together./p> +p>The way in which runners em>shuffle/em> data may be slightly different for Batch and +Streaming execution modes./p> +p>sup>1/sup>Not to be confused with the code>shuffle/code> operation in some runners./p> +h4 id="data-ordering-in-a-pipeline-execution">Data ordering in a pipeline execution/h4> +p>The Beam model does not define strict guidelines regarding the order in which +runners process elements or transport them across code>PTransforms/code>. Runners are +free to implement data transfer semantics in different forms./p> +p>Some use cases exist where user pipelines may need to rely on specific ordering +semantics in pipeline execution. The a href="/documentation/runners/capability-matrix/additional-common-features-not-yet-part-of-the-beam-model/index.html">capability matrix documents/a> +runner behavior for strong>key-ordered delivery/strong>./p> +p>Consider a single Beam worker processing a series of bundles from the same Beam +transform, and consider a code>PTransform/code> that outputs data from this Stage into a +downstream code>PCollection/code>. Finally, consider two events em>with the same key/em> +emitted in a certain order by this worker (within the same bundle or as part of +different bundles)./p> +p>We say that the Beam runner supports strong>key-ordered delivery/strong> if it guarantees +that these two events will be observed in the same order by a PTransform that is +immediately downstream independently of the kind of data transmission method./p> +p>This characteristic will hold true in runners and operations that have +key-limited parallelism./p> h2 id="parallelism">Failures and parallelism within and between transforms/h2> p>In this section, we discuss how elements in the input collection are processed in parallel, and how transforms are retried when failures occur./p> diff --git
[beam] branch master updated (0830a02 -> 862ece1)
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 0830a02 Run python SpannerIO IT with python 3.7 only to avoid overload spanner instance (#16112) add 862ece1 Merge pull request #15378 from [RFC] Define and document per-key ordering semantics for runners No new revisions were added by this update. Summary of changes: ...Java_PortableValidatesRunner_Flink_Batch.groovy | 2 +- runners/flink/job-server/flink_job_server.gradle | 2 + runners/spark/job-server/spark_job_server.gradle | 1 + ...edMetrics.java => UsesPerKeyOrderInBundle.java} | 10 +- ...leParDo.java => UsesPerKeyOrderedDelivery.java} | 9 +- .../org/apache/beam/sdk/transforms/ParDoTest.java | 3 +- .../beam/sdk/transforms/PerKeyOrderingTest.java| 328 + .../site/content/en/documentation/runtime/model.md | 50 +++- website/www/site/data/capability_matrix.yaml | 39 +++ .../documentation/capability-matrix-row.html | 2 + .../site/layouts/shortcodes/capability-matrix.html | 2 + .../documentation/capability-matrix-single.html| 6 +- 12 files changed, 440 insertions(+), 14 deletions(-) copy sdks/java/core/src/main/java/org/apache/beam/sdk/testing/{UsesCommittedMetrics.java => UsesPerKeyOrderInBundle.java} (71%) copy sdks/java/core/src/main/java/org/apache/beam/sdk/testing/{UsesBoundedSplittableParDo.java => UsesPerKeyOrderedDelivery.java} (75%) create mode 100644 sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/PerKeyOrderingTest.java
[beam] branch master updated: Run python SpannerIO IT with python 3.7 only to avoid overload spanner instance (#16112)
This is an automated email from the ASF dual-hosted git repository. yichi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 0830a02 Run python SpannerIO IT with python 3.7 only to avoid overload spanner instance (#16112) 0830a02 is described below commit 0830a02f8984383028a74ca9b89b9e3dc9f1597c Author: Yichi Zhang AuthorDate: Fri Dec 3 09:20:20 2021 -0800 Run python SpannerIO IT with python 3.7 only to avoid overload spanner instance (#16112) --- build.gradle.kts | 3 ++ .../io/gcp/experimental/spannerio_read_it_test.py | 20 ++-- .../io/gcp/experimental/spannerio_write_it_test.py | 10 +++--- sdks/python/test-suites/dataflow/common.gradle | 24 ++ sdks/python/test-suites/direct/common.gradle | 20 sdks/python/test-suites/portable/common.gradle | 38 +- 6 files changed, 99 insertions(+), 16 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 37e0164..f5421ed 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -287,11 +287,14 @@ task("python36PostCommit") { task("python37PostCommit") { dependsOn(":sdks:python:test-suites:dataflow:py37:postCommitIT") + dependsOn(":sdks:python:test-suites:dataflow:py37:spannerioIT") dependsOn(":sdks:python:test-suites:direct:py37:postCommitIT") dependsOn(":sdks:python:test-suites:direct:py37:directRunnerIT") dependsOn(":sdks:python:test-suites:direct:py37:hdfsIntegrationTest") dependsOn(":sdks:python:test-suites:direct:py37:mongodbioIT") + dependsOn(":sdks:python:test-suites:direct:py37:spannerioIT") dependsOn(":sdks:python:test-suites:portable:py37:postCommitPy37") + dependsOn(":sdks:python:test-suites:portable:py37:xlangSpannerIOIT") } task("python38PostCommit") { diff --git a/sdks/python/apache_beam/io/gcp/experimental/spannerio_read_it_test.py b/sdks/python/apache_beam/io/gcp/experimental/spannerio_read_it_test.py index bce7a66..3be3e74 100644 --- a/sdks/python/apache_beam/io/gcp/experimental/spannerio_read_it_test.py +++ b/sdks/python/apache_beam/io/gcp/experimental/spannerio_read_it_test.py @@ -105,7 +105,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): cls._add_dummy_entries() _LOGGER.info("Spanner Read IT Setup Complete...") - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_read_via_table(self): _LOGGER.info("Spanner Read via table") with beam.Pipeline(argv=self.args) as p: @@ -117,7 +117,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): columns=["UserId", "Key"]) assert_that(r, equal_to(self._data)) - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_read_via_sql(self): _LOGGER.info("Running Spanner via sql") with beam.Pipeline(argv=self.args) as p: @@ -128,7 +128,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): sql="select * from Users") assert_that(r, equal_to(self._data)) - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_transaction_table_metrics_ok_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') @@ -151,7 +151,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): self.verify_table_read_call_metric( self.project, self.TEST_DATABASE, 'Users', 'ok', 1) - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_transaction_table_metrics_error_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') @@ -177,7 +177,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): self.verify_table_read_call_metric( self.project, self.TEST_DATABASE, 'INVALID_TABLE', '404', 1) - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_transaction_sql_metrics_ok_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') @@ -200,7 +200,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): self.verify_sql_read_call_metric( self.project, self.TEST_DATABASE, 'query-1', 'ok', 1) - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_transaction_sql_metrics_error_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') @@ -226,7 +226,7 @@ class SpannerReadIntegrationTest(unittest.TestCase): self.verify_sql_read_call_metric( self.project, self.TEST_DATABASE, 'query-2', '400', 1) - @pytest.mark.it_postcommit + @pytest.mark.spannerio_it def test_table_metrics_ok_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') @@ -245,7 +245,7 @@ class
[beam] branch master updated: Merge pull request #16117 from [BEAM-13368][Playground][Bugfix] Fix CI and failed unit tests on master
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 45955da Merge pull request #16117 from [BEAM-13368][Playground][Bugfix] Fix CI and failed unit tests on master 45955da is described below commit 45955daa14be8fec7e986474c2fef1622c13664c Author: Artur Khanin AuthorDate: Fri Dec 3 18:42:11 2021 +0300 Merge pull request #16117 from [BEAM-13368][Playground][Bugfix] Fix CI and failed unit tests on master * Fixed RunCode test in controller_test.go * Update tests for backend Co-authored-by: Sergey Kalinin --- playground/backend/build.gradle.kts | 2 +- playground/backend/cmd/server/controller_test.go | 21 +++-- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/playground/backend/build.gradle.kts b/playground/backend/build.gradle.kts index e73b00e..be66356 100644 --- a/playground/backend/build.gradle.kts +++ b/playground/backend/build.gradle.kts @@ -46,7 +46,7 @@ task("test") { doLast { exec { executable("go") - args("test", "internal/...") + args("test", "./...") } } } diff --git a/playground/backend/cmd/server/controller_test.go b/playground/backend/cmd/server/controller_test.go index c7ddbd6..8e7b730 100644 --- a/playground/backend/cmd/server/controller_test.go +++ b/playground/backend/cmd/server/controller_test.go @@ -125,10 +125,9 @@ func TestPlaygroundController_RunCode(t *testing.T) { request *pb.RunCodeRequest } tests := []struct { - name string - args args - wantStatus pb.Status - wantErrbool + namestring + argsargs + wantErr bool }{ { // Test case with calling RunCode method with incorrect SDK. @@ -154,8 +153,7 @@ func TestPlaygroundController_RunCode(t *testing.T) { Sdk: pb.Sdk_SDK_JAVA, }, }, - wantStatus: pb.Status_STATUS_COMPILING, - wantErr:false, + wantErr: false, }, } for _, tt := range tests { @@ -175,19 +173,14 @@ func TestPlaygroundController_RunCode(t *testing.T) { t.Errorf("PlaygroundController_RunCode() response shoudn't be nil") } else { if response.PipelineUuid == "" { - t.Errorf("PlaygroundController_RunCode() response.pipeLineId shoudn't be nil") - } else { - path := os.Getenv("APP_WORK_DIR") + "/executable_files" - os.RemoveAll(path) + t.Errorf("PlaygroundController_RunCode() response.pipelineId shoudn't be nil") } status, _ := cacheService.GetValue(tt.args.ctx, uuid.MustParse(response.PipelineUuid), cache.Status) + path := os.Getenv("APP_WORK_DIR") + "/executable_files" + os.RemoveAll(path) if status == nil { t.Errorf("PlaygroundController_RunCode() status shoudn't be nil") } - if !reflect.DeepEqual(status, tt.wantStatus) { - t.Errorf("PlaygroundController_RunCode() status = %v, wantStatus %v", status, tt.wantStatus) - } - } } })