[beam] branch release-2.35.0 updated: [BEAM-13388] Cherry-pick more changes for google cloud dlp update. (#16268)

2021-12-16 Thread tvalentyn
This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch release-2.35.0
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/release-2.35.0 by this push:
 new edcfdc4  [BEAM-13388] Cherry-pick more changes for google cloud dlp 
update. (#16268)
edcfdc4 is described below

commit edcfdc40954b05dd47905cbefb13d7a7437ea99b
Author: tvalentyn 
AuthorDate: Thu Dec 16 22:18:33 2021 -0800

[BEAM-13388] Cherry-pick more changes for google cloud dlp update. (#16268)

Co-authored-by: Brian Hulette 
Co-authored-by: Yichi Zhang 
Co-authored-by: Kyle Weaver 
Co-authored-by: Andy Ye 
---
 .../apache_beam/io/gcp/pubsub_integration_test.py  |  2 ++
 sdks/python/apache_beam/ml/gcp/cloud_dlp.py| 22 -
 sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py   | 37 ++
 3 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py 
b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
index 541bb52..bbb914e 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py
@@ -210,6 +210,8 @@ class PubSubIntegrationTest(unittest.TestCase):
 
   @pytest.mark.it_postcommit
   def test_streaming_with_attributes(self):
+if self.runner_name == 'TestDataflowRunner':
+  pytest.skip("BEAM-13218")
 self._test_streaming(with_attributes=True)
 
 
diff --git a/sdks/python/apache_beam/ml/gcp/cloud_dlp.py 
b/sdks/python/apache_beam/ml/gcp/cloud_dlp.py
index 93510c8..e3fddba 100644
--- a/sdks/python/apache_beam/ml/gcp/cloud_dlp.py
+++ b/sdks/python/apache_beam/ml/gcp/cloud_dlp.py
@@ -20,9 +20,11 @@ functionality.
 """
 
 import logging
+from typing import List
 
 from google.cloud import dlp_v2
 
+from apache_beam import typehints
 from apache_beam.options.pipeline_options import GoogleCloudOptions
 from apache_beam.transforms import DoFn
 from apache_beam.transforms import ParDo
@@ -35,6 +37,8 @@ _LOGGER = logging.getLogger(__name__)
 
 
 @experimental()
+@typehints.with_input_types(str)
+@typehints.with_output_types(str)
 class MaskDetectedDetails(PTransform):
   """Scrubs sensitive information detected in text.
   The ``PTransform`` returns a ``PCollection`` of ``str``
@@ -126,6 +130,8 @@ class MaskDetectedDetails(PTransform):
 
 
 @experimental()
+@typehints.with_input_types(str)
+@typehints.with_output_types(List[dlp_v2.types.dlp.Finding])
 class InspectForDetails(PTransform):
   """Inspects input text for sensitive information.
   the ``PTransform`` returns a ``PCollection`` of
@@ -190,13 +196,13 @@ class _DeidentifyFn(DoFn):
   self.client = dlp_v2.DlpServiceClient()
 self.params = {
 'timeout': self.timeout,
-'parent': self.client.project_path(self.project)
 }
-self.params.update(self.config)
+self.parent = self.client.common_project_path(self.project)
 
   def process(self, element, **kwargs):
-operation = self.client.deidentify_content(
-item={"value": element}, **self.params)
+request = {'item': {'value': element}, 'parent': self.parent}
+request.update(self.config)
+operation = self.client.deidentify_content(request=request, **self.params)
 yield operation.item.value
 
 
@@ -213,12 +219,12 @@ class _InspectFn(DoFn):
   self.client = dlp_v2.DlpServiceClient()
 self.params = {
 'timeout': self.timeout,
-"parent": self.client.project_path(self.project)
 }
-self.params.update(self.config)
+self.parent = self.client.common_project_path(self.project)
 
   def process(self, element, **kwargs):
-operation = self.client.inspect_content(
-item={"value": element}, **self.params)
+request = {'item': {'value': element}, 'parent': self.parent}
+request.update(self.config)
+operation = self.client.inspect_content(request=request, **self.params)
 hits = [x for x in operation.result.findings]
 yield hits
diff --git a/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py 
b/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py
index 111e5be..d4153e5 100644
--- a/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py
+++ b/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py
@@ -37,6 +37,7 @@ else:
   from apache_beam.ml.gcp.cloud_dlp import MaskDetectedDetails
   from apache_beam.ml.gcp.cloud_dlp import _DeidentifyFn
   from apache_beam.ml.gcp.cloud_dlp import _InspectFn
+  from google.cloud.dlp_v2.types import dlp
 # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
 
 _LOGGER = logging.getLogger(__name__)
@@ -56,6 +57,9 @@ class TestDeidentifyFn(unittest.TestCase):
   def test_deidentify_called(self):
 class ClientMock(object):
   def deidentify_content(self, *args, **kwargs):
+# Check that we can marshal a valid request.
+dlp.DeidentifyContentRequest(kwargs['request'])
+
 

[beam] branch master updated: [BEAM-13434] Bump google pubsublite on master. (#16265)

2021-12-16 Thread tvalentyn
This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new b150ace  [BEAM-13434] Bump google pubsublite on master. (#16265)
b150ace is described below

commit b150ace0884c88bc93da21f6dfe3b7684f886e94
Author: tvalentyn 
AuthorDate: Thu Dec 16 20:07:44 2021 -0800

[BEAM-13434] Bump google pubsublite on master. (#16265)
---
 .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 5f1a2a0..786a048 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -452,7 +452,7 @@ class BeamModulePlugin implements Plugin {
 def errorprone_version = "2.3.4"
 def google_clients_version = "1.32.1"
 def google_cloud_bigdataoss_version = "2.2.4"
-def google_cloud_pubsublite_version = "1.4.5"
+def google_cloud_pubsublite_version = "1.4.6"
 def google_code_gson_version = "2.8.9"
 def google_oauth_clients_version = "1.32.1"
 // Try to keep grpc_version consistent with gRPC version in 
google_cloud_platform_libraries_bom
@@ -535,7 +535,7 @@ class BeamModulePlugin implements Plugin {
 commons_lang3   : 
"org.apache.commons:commons-lang3:3.9",
 commons_math3   : 
"org.apache.commons:commons-math3:3.6.1",
 error_prone_annotations : 
"com.google.errorprone:error_prone_annotations:$errorprone_version",
-flogger_system_backend  : 
"com.google.flogger:flogger-system-backend:0.7.2",
+flogger_system_backend  : 
"com.google.flogger:flogger-system-backend:0.7.3",
 gax : "com.google.api:gax", // 
google_cloud_platform_libraries_bom sets version
 gax_grpc: 
"com.google.api:gax-grpc", // google_cloud_platform_libraries_bom sets version
 gax_httpjson: 
"com.google.api:gax-httpjson", // google_cloud_platform_libraries_bom sets 
version


[beam] branch nightly-refs/heads/master updated (112b3cd -> a4cca44)

2021-12-16 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch nightly-refs/heads/master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from 112b3cd  [BEAM-13399] Add infrastructure to start JARs from Go 
functions (#16214)
 add dcd984d  [BEAM-12164] Add Spanner Change Stream DAOs
 add 759da41  Merge pull request #16124 from [BEAM-12164] Add Spanner 
Change Stream DAOs
 add 4835700  Merge pull request #16061 from [BEAM-13428] [Playground] 
Integrate Google Analytics
 add 75fe1a6  Clarify CoGroupByKey creates Iterable, not list. (#16099)
 add 1504892  [BEAM-12931] Allow for DoFn#getAllowedTimestampSkew() when 
checking the output timestamp
 add e83b6f1   [BEAM-13467] Properly handle null argument types for logical 
types. (#16249)
 add 673507a  [BEAM-10277] Initial implementation for encoding position in 
Python RowCoder (#15410)
 add 251dd0c  [BEAM-11545] State & timer for batched RPC calls pattern 
(#13643)
 add 38dcb9e  Automatically prune local images before building an RC. 
(#16238)
 add a09a8ec  Add verbose error messages to container-related scripts. 
(#16056)
 add bb1104a  [BEAM-13456] Rollback #15890 to fix timeout in Java 
PostCommit (#16257)
 add a4cca44  [BEAM-13015] Add a state backed iterable that can be mutated 
under certain circumstances. (#16252)

No new revisions were added by this update.

Summary of changes:
 CHANGES.md |   1 +
 .../org/apache/beam/gradle/BeamModulePlugin.groovy |   6 +
 .../apache/beam/examples/snippets/Snippets.java|   8 -
 .../beam/model/fnexecution/v1/standard_coders.yaml |  33 ++
 playground/frontend/analysis_options.yaml  |   2 +-
 playground/frontend/build.gradle   |   2 +
 playground/frontend/gradle.properties  |   1 +
 .../toggle_theme_button/toggle_theme_button.dart   |   7 +-
 playground/frontend/lib/config.g.dart  |   1 +
 playground/frontend/lib/constants/colors.dart  |   2 +
 .../frontend/lib/constants/links.dart  |  21 +-
 .../actions/components/new_example_action.dart |   6 +-
 .../modules/actions/components/reset_action.dart   |   6 +-
 .../lib/modules/analytics/analytics_events.dart|  32 +-
 .../lib/modules/analytics/analytics_service.dart   | 114 +
 .../example_list/expansion_panel_item.dart |   2 +
 .../notifications/components/notification.dart |   1 -
 .../components/editor_textarea_wrapper.dart|   7 +-
 .../pages/playground/components/more_actions.dart  |  37 +-
 .../playground/components/playground_feedback.dart |  11 +-
 .../components/playground_page_footer.dart |  23 +-
 .../components/playground_page_providers.dart  |   2 +
 .../components/playground_privacy_policy.dart  |  85 
 .../lib/pages/playground/playground_page.dart  |   7 +-
 playground/frontend/pubspec.lock   |   7 +
 playground/frontend/pubspec.yaml   |   1 +
 playground/frontend/web/index.html |   8 +-
 .../src/main/scripts/build_release_candidate.sh|   2 +
 .../core/construction/PTransformMatchersTest.java  |  21 +
 .../apache/beam/runners/core/SimpleDoFnRunner.java |  92 +++-
 .../beam/runners/core/SimpleDoFnRunnerTest.java| 355 ++-
 .../beam/runners/direct/NanosOffsetClock.java  |   6 +-
 .../runners/direct/TransformEvaluatorRegistry.java |  16 +
 .../runners/direct/UnboundedReadDeduplicator.java  |   3 +-
 .../runners/direct/CloningBundleFactoryTest.java   |  67 +++
 .../flink/FlinkStreamingPipelineTranslator.java|   4 +-
 .../flink/FlinkStreamingTransformTranslators.java  |  17 +
 .../wrappers/streaming/DoFnOperator.java   |   2 +-
 .../state/FlinkBroadcastStateInternals.java| 131 ++
 .../streaming/ExecutableStageDoFnOperatorTest.java |   1 -
 .../beam/runners/dataflow/DataflowRunner.java  | 110 +
 .../runners/dataflow/DataflowPipelineJobTest.java  |  25 ++
 .../beam/runners/dataflow/worker/ReaderCache.java  |   3 +-
 .../beam/runners/dataflow/worker/StateFetcher.java |   4 +-
 .../dataflow/worker/StreamingDataflowWorker.java   |   2 +-
 .../fn/data/RemoteGrpcPortWriteOperation.java  |   6 +-
 .../common/worker/CachingShuffleBatchReader.java   |   4 +-
 .../control/DefaultJobBundleFactory.java   |  16 +-
 .../fnexecution/control/RemoteExecutionTest.java   |   2 +
 .../beam/runners/spark/io/MicrobatchSource.java|   2 +-
 .../translation/utils/SideInputStorage.java|   4 +-
 .../runners/spark/util/GlobalWatermarkHolder.java  |   4 +-
 .../beam/runners/spark/util/SideInputStorage.java  |   4 +-
 .../go/test/regression/coders/fromyaml/fromyaml.go |  15 +-
 .../src/main/java/org/apache/beam/sdk/io/Read.java |   3 +-
 .../org/apache/beam/sdk/schemas/SchemaCoder.java   |  20 +
 .../apache/beam/sdk/schemas/SchemaTranslation.java |  13 +-
 .../apache/beam/sdk/values/PCollectionViews.java   |

[beam] branch master updated (a4cca44 -> 4f2bbff)

2021-12-16 Thread ibzib
This is an automated email from the ASF dual-hosted git repository.

ibzib pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from a4cca44  [BEAM-13015] Add a state backed iterable that can be mutated 
under certain circumstances. (#16252)
 add 4f2bbff  [BEAM-13388] Update Cloud DLP after breaking changes. (#16236)

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/ml/gcp/cloud_dlp.py  | 22 ++--
 sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py | 33 +++-
 2 files changed, 35 insertions(+), 20 deletions(-)


[beam] branch release-2.35.0 updated: [BEAM-13434] Upgrade to pubsublite 1.4.0.2 on the release branch. (#16264)

2021-12-16 Thread tvalentyn
This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch release-2.35.0
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/release-2.35.0 by this push:
 new 46e825a  [BEAM-13434] Upgrade to pubsublite 1.4.0.2 on the release 
branch. (#16264)
46e825a is described below

commit 46e825a0de1ac9331002a21d4287aeb250040738
Author: tvalentyn 
AuthorDate: Thu Dec 16 18:10:12 2021 -0800

[BEAM-13434] Upgrade to pubsublite 1.4.0.2 on the release branch. (#16264)
---
 .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 3cc9496..b5ea272 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -452,7 +452,7 @@ class BeamModulePlugin implements Plugin {
 def errorprone_version = "2.3.4"
 def google_clients_version = "1.32.1"
 def google_cloud_bigdataoss_version = "2.2.4"
-def google_cloud_pubsublite_version = "1.4.0.1"
+def google_cloud_pubsublite_version = "1.4.0.2"
 def google_code_gson_version = "2.8.9"
 def google_oauth_clients_version = "1.32.1"
 // Try to keep grpc_version consistent with gRPC version in 
google_cloud_platform_libraries_bom
@@ -535,7 +535,7 @@ class BeamModulePlugin implements Plugin {
 commons_lang3   : 
"org.apache.commons:commons-lang3:3.9",
 commons_math3   : 
"org.apache.commons:commons-math3:3.6.1",
 error_prone_annotations : 
"com.google.errorprone:error_prone_annotations:$errorprone_version",
-flogger_system_backend  : 
"com.google.flogger:flogger-system-backend:0.7.2",
+flogger_system_backend  : 
"com.google.flogger:flogger-system-backend:0.7.3",
 gax : "com.google.api:gax", // 
google_cloud_platform_libraries_bom sets version
 gax_grpc: 
"com.google.api:gax-grpc", // google_cloud_platform_libraries_bom sets version
 gax_httpjson: 
"com.google.api:gax-httpjson", // google_cloud_platform_libraries_bom sets 
version


[beam] branch master updated (bb1104a -> a4cca44)

2021-12-16 Thread lcwik
This is an automated email from the ASF dual-hosted git repository.

lcwik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from bb1104a  [BEAM-13456] Rollback #15890 to fix timeout in Java 
PostCommit (#16257)
 add a4cca44  [BEAM-13015] Add a state backed iterable that can be mutated 
under certain circumstances. (#16252)

No new revisions were added by this update.

Summary of changes:
 .../org/apache/beam/sdk/fn/stream/DataStreams.java |  10 +-
 .../apache/beam/sdk/fn/stream/DataStreamsTest.java |  44 +-
 sdks/java/harness/build.gradle |   6 -
 .../java/org/apache/beam/fn/harness/Cache.java |  28 +-
 .../java/org/apache/beam/fn/harness/Caches.java| 239 +++
 .../fn/harness/control/ProcessBundleHandler.java   |   7 +-
 .../fn/harness/state/StateFetchingIterators.java   | 476 -
 .../org/apache/beam/fn/harness/CachesTest.java |  96 +++--
 .../harness/state/StateFetchingIteratorsTest.java  | 362 ++--
 9 files changed, 1046 insertions(+), 222 deletions(-)


[beam] branch master updated: [BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257)

2021-12-16 Thread lcwik
This is an automated email from the ASF dual-hosted git repository.

lcwik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new bb1104a  [BEAM-13456] Rollback #15890 to fix timeout in Java 
PostCommit (#16257)
bb1104a is described below

commit bb1104a82878d5e3b48210089f6d00e9d460dfea
Author: Brian Hulette 
AuthorDate: Thu Dec 16 13:30:51 2021 -0800

[BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257)

* Revert "[BEAM-11936] Fix errorprone warnings (#15890)"

This reverts commit 06a5e67332aae53ea90dedb4ef6421c2a7d65035.

* spotless
---
 .../org/apache/beam/gradle/BeamModulePlugin.groovy |   6 +
 .../apache/beam/examples/snippets/Snippets.java|   8 --
 .../core/construction/PTransformMatchersTest.java  |  21 
 .../beam/runners/direct/NanosOffsetClock.java  |   6 +-
 .../runners/direct/TransformEvaluatorRegistry.java |  16 +++
 .../runners/direct/UnboundedReadDeduplicator.java  |   3 +-
 .../runners/direct/CloningBundleFactoryTest.java   |  67 +++
 .../flink/FlinkStreamingPipelineTranslator.java|   4 +-
 .../flink/FlinkStreamingTransformTranslators.java  |  17 +++
 .../wrappers/streaming/DoFnOperator.java   |   2 +-
 .../state/FlinkBroadcastStateInternals.java| 131 +
 .../streaming/ExecutableStageDoFnOperatorTest.java |   1 -
 .../beam/runners/dataflow/DataflowRunner.java  | 110 +
 .../runners/dataflow/DataflowPipelineJobTest.java  |  25 
 .../beam/runners/dataflow/worker/ReaderCache.java  |   3 +-
 .../beam/runners/dataflow/worker/StateFetcher.java |   4 +-
 .../dataflow/worker/StreamingDataflowWorker.java   |   2 +-
 .../fn/data/RemoteGrpcPortWriteOperation.java  |   6 +-
 .../common/worker/CachingShuffleBatchReader.java   |   4 +-
 .../control/DefaultJobBundleFactory.java   |  16 +--
 .../fnexecution/control/RemoteExecutionTest.java   |   2 +
 .../beam/runners/spark/io/MicrobatchSource.java|   2 +-
 .../translation/utils/SideInputStorage.java|   4 +-
 .../runners/spark/util/GlobalWatermarkHolder.java  |   4 +-
 .../beam/runners/spark/util/SideInputStorage.java  |   4 +-
 .../src/main/java/org/apache/beam/sdk/io/Read.java |   3 +-
 .../org/apache/beam/sdk/schemas/SchemaCoder.java   |  20 
 .../apache/beam/sdk/values/PCollectionViews.java   |  59 ++
 .../apache/beam/sdk/coders/CoderRegistryTest.java  |   4 +
 .../apache/beam/sdk/testing/ExpectedLogsTest.java  |  10 +-
 .../beam/sdk/testing/SystemNanoTimeSleeper.java|   4 +-
 .../sdk/transforms/reflect/DoFnSignaturesTest.java |   9 ++
 .../GrowableOffsetRangeTrackerTest.java|   2 +-
 .../core/translate/TimestampExtractTransform.java  |   8 ++
 .../sql/meta/provider/kafka/BeamKafkaTable.java|   3 +-
 .../org/apache/beam/sdk/fn/CancellableQueue.java   |   4 +-
 .../harness/state/StateFetchingIteratorsTest.java  |   2 +-
 .../bigquery/StorageApiWriteUnshardedRecords.java  |   3 +-
 .../bigquery/StorageApiWritesShardedRecords.java   |   3 +-
 .../internal/LimitingTopicBacklogReader.java   |   6 +-
 .../beam/sdk/io/gcp/spanner/SpannerAccessor.java   |  24 
 .../sdk/io/hadoop/format/TestRowDBWritable.java|  10 ++
 .../beam/sdk/io/kafka/KafkaExactlyOnceSink.java|   3 +-
 .../beam/sdk/io/kafka/KafkaUnboundedReader.java|   4 +-
 .../org/apache/beam/sdk/io/xml/XmlSourceTest.java  |   4 +-
 45 files changed, 592 insertions(+), 61 deletions(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 60d76c9..5f1a2a0 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -1140,12 +1140,18 @@ class BeamModulePlugin implements Plugin {
 options.errorprone.errorproneArgs.add("-Xep:EqualsGetClass:OFF")
 options.errorprone.errorproneArgs.add("-Xep:EqualsUnsafeCast:OFF")
 options.errorprone.errorproneArgs.add("-Xep:ExtendsAutoValue:OFF")
+
options.errorprone.errorproneArgs.add("-Xep:FloatingPointAssertionWithinEpsilon:OFF")
 
options.errorprone.errorproneArgs.add("-Xep:JavaTimeDefaultTimeZone:OFF")
+options.errorprone.errorproneArgs.add("-Xep:LockNotBeforeTry:OFF")
 
options.errorprone.errorproneArgs.add("-Xep:MixedMutabilityReturnType:OFF")
+
options.errorprone.errorproneArgs.add("-Xep:PreferJavaTimeOverload:OFF")
+options.errorprone.errorproneArgs.add("-Xep:ModifiedButNotUsed:OFF")
 options.errorprone.errorproneArgs.add("-Xep:ThreadPriorityCheck:OFF")
 
options.errorprone.errorproneArgs.add("-Xep:TimeUnitConversionChecker:OFF")
 options.errorprone.errorproneArgs.add("-Xep:UndefinedEquals:OFF")
 options.errorprone.errorproneArgs.add("

[beam] branch master updated: Add verbose error messages to container-related scripts. (#16056)

2021-12-16 Thread tvalentyn
This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new a09a8ec  Add verbose error messages to container-related scripts. 
(#16056)
a09a8ec is described below

commit a09a8ecbcb971c6779b687a517128260471d3a87
Author: tvalentyn 
AuthorDate: Thu Dec 16 13:22:44 2021 -0800

Add verbose error messages to container-related scripts. (#16056)
---
 sdks/python/container/Dockerfile  |  2 +-
 sdks/python/container/run_generate_requirements.sh| 19 +++
 .../www/site/content/en/contribute/release-guide.md   | 19 ++-
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
index b97cb3a..182d7bd 100644
--- a/sdks/python/container/Dockerfile
+++ b/sdks/python/container/Dockerfile
@@ -69,7 +69,7 @@ RUN ccache --set-config=sloppiness=file_macro && ccache 
--set-config=hash_dir=fa
 
 COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
 RUN pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp]
-RUN pip check
+RUN pip check || (echo "Container does not include required Beam dependencies 
or has conflicting dependencies. If Beam dependencies have changed, you need to 
regenerate base_image_requirements.txt files. See: 
https://s.apache.org/beam-python-requirements-generate"; && exit 1)
 # Log complete list of what exact packages and versions are installed.
 RUN pip freeze --all
 
diff --git a/sdks/python/container/run_generate_requirements.sh 
b/sdks/python/container/run_generate_requirements.sh
index 9b533b7..a3b8f61 100755
--- a/sdks/python/container/run_generate_requirements.sh
+++ b/sdks/python/container/run_generate_requirements.sh
@@ -31,15 +31,26 @@
 # https://s.apache.org/beam-python-dev-wiki
 
 if [[ $# != 2 ]]; then
-  printf "Usage: \n$> ./sdks/python/container/run_generate_requirements.sh 
 "
-  printf "\n\tpython_version: [required] Python version to generate 
dependencies for."
-  printf " Use 3.7 for Python3.7, 3.8 for Python3.8 etc."
+  printf "Example usage: \n$> 
./sdks/python/container/run_generate_requirements.sh 3.8 "
+  printf "\n\twhere 3.8 is the Python major.minor version."
+  exit 1
 fi
 
-set -ex
 PY_VERSION=$1
 SDK_TARBALL=$2
 
+if ! python$PY_VERSION --version > /dev/null 2>&1 ; then
+  echo "Please install a python${PY_VERSION} interpreter. See 
s.apache.org/beam-python-dev-wiki for Python installation tips."
+  exit 1
+fi
+
+if ! python$PY_VERSION -m venv --help > /dev/null 2>&1 ; then
+  echo "Your python${PY_VERSION} installation does not have a required venv 
module. See s.apache.org/beam-python-dev-wiki for Python installation tips."
+  exit 1
+fi
+
+set -ex
+
 ENV_PATH="$PWD/build/python${PY_VERSION/./}_requirements_gen"
 rm -rf $ENV_PATH 2>/dev/null || true
 python${PY_VERSION} -m venv $ENV_PATH
diff --git a/website/www/site/content/en/contribute/release-guide.md 
b/website/www/site/content/en/contribute/release-guide.md
index 370a572..727c94b 100644
--- a/website/www/site/content/en/contribute/release-guide.md
+++ b/website/www/site/content/en/contribute/release-guide.md
@@ -233,17 +233,10 @@ If you are not a PMC, please ask for help in dev@ mailing 
list.
 
 ## 3. Update base image dependencies for Python container images
 
-1. Check the versions specified in 
sdks/python/container/base_image_requirements_manual.txt` and update them if 
necessary.
-1. Regenerate full dependency list by running:
-`./gradlew :sdks:python:container:generatePythonRequirementsAll` and commiting
-the changes. Exectution takes about ~5 min per Python version and is somewhat 
resource-demanding.
-You can also regenerate the dependencies indiviually per version with targets 
like `./gradlew :sdks:python:container:py38:generatePythonRequirements`.
+See instructions at: https://s.apache.org/beam-python-requirements-generate
 
-
-Ideally, do this at least a week before the release cut, so that any issues
+Ideally, do the update at least a week before the release cut, so that any 
issues
 related to the update have time to surface.
-You will need Python intepreters for all versions supported by Beam, see:
-https://s.apache.org/beam-python-dev-wiki for tips how to install them.
 
 ## 4. Investigate performance regressions
 
@@ -589,12 +582,12 @@ See the source of the script for more details, or to run 
commands manually in ca
   Please note that dependencies for the SDKs with different Python 
versions vary.
   Need to verify all Python images by replacing `${ver}` with each 
supported Python version `X.Y`.
   ```
-  docker run -it --entrypoint=/bin/bash 
apache/beam_python${ver}_sdk:${RELEASE}_rc{RC_NUM}
+  docker run --rm -it --entrypoint=/bin/bash 
apache/beam_python${ver}_sdk:${RELEASE}_rc{RC_NUM}
   ls -al /opt/apache/

[beam] branch master updated: Automatically prune local images before building an RC. (#16238)

2021-12-16 Thread tvalentyn
This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 38dcb9e  Automatically prune local images before building an RC. 
(#16238)
38dcb9e is described below

commit 38dcb9e13eac610a629cf05fa7f1ff08e5795e72
Author: tvalentyn 
AuthorDate: Thu Dec 16 12:07:37 2021 -0800

Automatically prune local images before building an RC. (#16238)
---
 release/src/main/scripts/build_release_candidate.sh | 2 ++
 website/www/site/content/en/contribute/release-guide.md | 4 
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/release/src/main/scripts/build_release_candidate.sh 
b/release/src/main/scripts/build_release_candidate.sh
index 8e60ea6..0c8e7ae 100755
--- a/release/src/main/scripts/build_release_candidate.sh
+++ b/release/src/main/scripts/build_release_candidate.sh
@@ -317,9 +317,11 @@ if [[ $confirmation = "y" ]]; then
 fi
 
 echo "[Current Step]: Stage docker images"
+echo "Note: this step will also prune your local docker image and container 
cache."
 echo "Do you want to proceed? [y|N]"
 read confirmation
 if [[ $confirmation = "y" ]]; then
+  docker system prune -a -f
   echo "Staging SDK docker images on docker hub="
   cd ~
   wipe_local_clone_dir
diff --git a/website/www/site/content/en/contribute/release-guide.md 
b/website/www/site/content/en/contribute/release-guide.md
index 9896640..370a572 100644
--- a/website/www/site/content/en/contribute/release-guide.md
+++ b/website/www/site/content/en/contribute/release-guide.md
@@ -561,10 +561,6 @@ See the source of the script for more details, or to run 
commands manually in ca
 
 ### Run build_release_candidate.sh to create a release candidate
 
-Before you start, run this command to make sure you'll be using the latest 
docker images:
-
-  docker system prune -a
-
 * **Script:** 
[build_release_candidate.sh](https://github.com/apache/beam/blob/master/release/src/main/scripts/build_release_candidate.sh)
 
 * **Usage**


[beam] branch master updated: [BEAM-11545] State & timer for batched RPC calls pattern (#13643)

2021-12-16 Thread mbae
This is an automated email from the ASF dual-hosted git repository.

mbae pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 251dd0c  [BEAM-11545] State & timer for batched RPC calls pattern 
(#13643)
251dd0c is described below

commit 251dd0c0f0eccf83ed7346e385981180d562e1b1
Author: Matthias Baetens 
AuthorDate: Thu Dec 16 20:08:53 2021 +0100

[BEAM-11545] State & timer for batched RPC calls pattern (#13643)

* [BEAM-11545] State & timer for batched RPC calls pattern
---
 ...lements-for-efficient-external-service-calls.md | 56 ++
 .../content/en/documentation/patterns/overview.md  |  3 ++
 .../partials/section-menu/en/documentation.html|  1 +
 3 files changed, 60 insertions(+)

diff --git 
a/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md
 
b/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md
new file mode 100644
index 000..9409e83
--- /dev/null
+++ 
b/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md
@@ -0,0 +1,56 @@
+---
+title: "Pattern for grouping elements for efficient external service calls"
+---
+
+
+
+# Grouping elements for efficient external service calls using the 
`GroupIntoBatches`-transform
+
+{{< language-switcher java py >}}
+
+Usually, authoring an Apache Beam pipeline can be done with out-of-the-box 
tools and transforms like _ParDo_'s, _Window_'s and _GroupByKey_'s. However, 
when you want more tight control, you can keep state in an otherwise stateless 
_DoFn_.
+
+State is kept on a per-key and per-windows basis, and as such, the input to 
your stateful DoFn needs to be keyed (e.g. by the customer identifier if you're 
tracking clicks from an e-commerce website).
+
+Examples of use cases are: assigning a unique ID to each element, joining 
streams of data in 'more exotic' ways, or batching up API calls to external 
services. In this section we'll go over the last one in particular.
+
+Make sure to check the 
[docs](https://beam.apache.org/documentation/programming-guide/#state-and-timers)
 for deeper understanding on state and timers.
+
+The `GroupIntoBatches`-transform uses state and timers under the hood to allow 
the user to exercise tight control over the following parameters:
+
+- `maxBufferDuration`: limits the amount of waitingtime for a batch to be 
emitted.
+- `batchSize`: limits the number of elements in one batch.
+- `batchSizeBytes`: (in Java only) limits the bytesize of one batch (using 
input coder to determine elementsize).
+- `elementByteSize`: (in Java only) limits the bytesize of one batch (using a 
user defined function to determine elementsize).
+
+while abstracting away the implementation details from users.
+
+The `withShardedKey()` functionality increases parallellism by spreading one 
key over multiple threads.
+
+The transforms are used in the following way in Java & Python:
+
+{{< highlight java >}}
+input.apply(
+  "Batch Contents",
+  GroupIntoBatches.ofSize(batchSize)
+  .withMaxBufferingDuration(maxBufferingDuration)
+  .withShardedKey())
+{{< /highlight >}}
+
+{{< highlight py >}}
+input | GroupIntoBatches.WithShardedKey(batchSize, maxBufferingDuration)
+{{< /highlight >}}
+
+Applying these transforms will output groups of elements in a batch on a 
per-key basis, which you can then use to call an external API in bulk rather 
than on a per-element basis, resulting in a lower overhead in your pipeline.
diff --git a/website/www/site/content/en/documentation/patterns/overview.md 
b/website/www/site/content/en/documentation/patterns/overview.md
index b13c5d4..c5e6084 100644
--- a/website/www/site/content/en/documentation/patterns/overview.md
+++ b/website/www/site/content/en/documentation/patterns/overview.md
@@ -51,6 +51,9 @@ Pipeline patterns demonstrate common Beam use cases. Pipeline 
patterns are based
 **Cross-language patterns** - Patterns for creating cross-language pipelines
 * [Cross-language 
patterns](/documentation/patterns/cross-language/#cross-language-transforms)
 
+**State & timers patterns** - Patterns for using state & timers
+* [Grouping elements for efficient external service 
calls](/documentation/patterns/grouping-elements-for-efficient-external-service-calls/#grouping-elements-for-efficient-external-service-calls-using-the-`GroupIntoBatches`-transform)
+
 ## Contributing a pattern
 
 To contribute a new pipeline pattern, create an issue with the 
[`pipeline-patterns` 
label](https://issues.apache.org/jira/browse/BEAM-7449?jql=labels%20%3D%20pipeline-patterns)
 and add details to the issue description. See [Get started 
contributing](/contribute/) for more information.
diff --git 
a/website/www/site/layouts/partials/section-menu/en/documentation.html 
b/website/ww

[beam] branch master updated: [BEAM-10277] Initial implementation for encoding position in Python RowCoder (#15410)

2021-12-16 Thread bhulette
This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 673507a  [BEAM-10277] Initial implementation for encoding position in 
Python RowCoder (#15410)
673507a is described below

commit 673507a3c541715dc7c62ba073e4195d01be0899
Author: AlikRodriguez <74626882+alikrodrig...@users.noreply.github.com>
AuthorDate: Thu Dec 16 13:01:08 2021 -0600

[BEAM-10277] Initial implementation for encoding position in Python 
RowCoder (#15410)

* add test to encoding position

* respect encoding position

* enforcing encoding postion on trivial test

* add coder_yaml test

* fix types for go fromyaml_test

* skip unimplemented contents

* encoding position from init

* revert encoding position in schemas

* added payload id on testcase inyaml file

* change go test exeption in coders yaml

* change helper methods to coders test

* add condition to check encoding_position_set

* fix pylint

* encoding position in code decode with argsort

* check encoding position start at 0 and has no duplicates

* fix test, precompute argsort encoding position
---
 .../beam/model/fnexecution/v1/standard_coders.yaml | 33 ++
 .../go/test/regression/coders/fromyaml/fromyaml.go | 15 +++--
 sdks/python/apache_beam/coders/row_coder.py| 39 +---
 sdks/python/apache_beam/coders/row_coder_test.py   | 73 ++
 4 files changed, 147 insertions(+), 13 deletions(-)

diff --git 
a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
 
b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
index ef37772..df119ba 100644
--- 
a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
+++ 
b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
@@ -410,6 +410,39 @@ examples:
   
"\x01\x00\x00\x00\x00\x04\neverything\x00\x02is\x00\x05null!\x00\r\xc2\xaf\\_(\xe3\x83\x84)_/\xc2\xaf\x00":
 {f_map: {"everything": null, "is": null, "null!": null, "¯\\_(ツ)_/¯": null}}
 
 ---
+# Binary data generated with the python SDK:
+# schema1 = schema_pb2.Schema(
+# id="30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9",
+# fields=[
+# schema_pb2.Field(
+# name="str",
+# type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
+# encoding_position=1),
+# schema_pb2.Field(
+# name="f_bool",
+# type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN),
+# encoding_position=2),
+# schema_pb2.Field(
+# name="i32",
+# type=schema_pb2.FieldType(
+# atomic_type=schema_pb2.INT32, nullable=True),
+# encoding_position=0)
+#   ],
+#   encoding_positions_set=True)
+#
+# coder = RowCoder(schema1)
+# c = coder.schema.SerializeToString()
+# print("payload = %s" % c)
+# test = typing.NamedTuple("test", [ ("f_bool", bool), ("i32", np.int32), 
("str", str) ])
+# example = coder.encode(test(False,21,"str2"))
+# print("example = %s" % example)
+coder:
+  urn: "beam:coder:row:v1"
+  payload: 
"\n\x0b\n\x03str\x1a\x02\x10\x07(\x01\n\x0e\n\x06f_bool\x1a\x02\x10\x08(\x02\n\x0b\n\x03i32\x1a\x04\x08\x01\x10\x03\x12$30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9
 \x01"
+examples:
+  "\x03\x00\x15\x04str2\x00": {f_bool: False, i32: 21, str: "str2"}
+
+---
 
 coder:
   urn: "beam:coder:row:v1"
diff --git a/sdks/go/test/regression/coders/fromyaml/fromyaml.go 
b/sdks/go/test/regression/coders/fromyaml/fromyaml.go
index 337c48e..9147834 100644
--- a/sdks/go/test/regression/coders/fromyaml/fromyaml.go
+++ b/sdks/go/test/regression/coders/fromyaml/fromyaml.go
@@ -48,6 +48,11 @@ var unimplementedCoders = map[string]bool{
"beam:coder:custom_window:v1":true,
 }
 
+var filteredCases = []struct{ filter, reason string }{
+   {"logical", "BEAM-9615: Support logical types"},
+   {"30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9", "BEAM-13043: Support encoding 
position."},
+}
+
 // Coder is a representation a serialized beam coder.
 type Coder struct {
Urn  string  `yaml:"urn,omitempty"`
@@ -83,11 +88,13 @@ func (s *Spec) testStandardCoder() (err error) {
log.Printf("skipping unimplemented coder urn: %v", s.Coder.Urn)
return nil
}
-   // TODO(BEAM-9615): Support Logical types, and produce a better error 
message.
-   if strings.Contains(s.Coder.Payload, "logical") {
-   log.Printf("skipping coder with logical type. Unsupported in 
the Go SDK for now. Payload: %v", s.Coder.Payload)
-   return nil
+   for _, c := range filteredCases {
+   if strings.Contains(s.Coder.Payload, c.filter) {
+   

[beam] branch master updated (1504892 -> e83b6f1)

2021-12-16 Thread bhulette
This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from 1504892  [BEAM-12931] Allow for DoFn#getAllowedTimestampSkew() when 
checking the output timestamp
 add e83b6f1   [BEAM-13467] Properly handle null argument types for logical 
types. (#16249)

No new revisions were added by this update.

Summary of changes:
 .../apache/beam/sdk/schemas/SchemaTranslation.java | 13 ---
 .../beam/sdk/schemas}/SchemaTranslationTest.java   | 40 --
 2 files changed, 46 insertions(+), 7 deletions(-)
 rename 
{runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction
 => 
sdks/java/core/src/test/java/org/apache/beam/sdk/schemas}/SchemaTranslationTest.java
 (93%)


[beam] branch master updated (75fe1a6 -> 1504892)

2021-12-16 Thread lcwik
This is an automated email from the ASF dual-hosted git repository.

lcwik pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from 75fe1a6  Clarify CoGroupByKey creates Iterable, not list. (#16099)
 add 1504892  [BEAM-12931] Allow for DoFn#getAllowedTimestampSkew() when 
checking the output timestamp

No new revisions were added by this update.

Summary of changes:
 CHANGES.md |   1 +
 .../apache/beam/runners/core/SimpleDoFnRunner.java |  92 +-
 .../beam/runners/core/SimpleDoFnRunnerTest.java| 355 +++--
 .../org/apache/beam/sdk/transforms/ParDoTest.java  | 256 ++-
 .../apache/beam/fn/harness/FnApiDoFnRunner.java|  91 +-
 .../beam/fn/harness/FnApiDoFnRunnerTest.java   | 146 +
 6 files changed, 887 insertions(+), 54 deletions(-)


[beam] branch master updated (4835700 -> 75fe1a6)

2021-12-16 Thread bhulette
This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from 4835700  Merge pull request #16061 from [BEAM-13428] [Playground] 
Integrate Google Analytics
 add 75fe1a6  Clarify CoGroupByKey creates Iterable, not list. (#16099)

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/transforms/util.py | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)


[beam] branch master updated (759da41 -> 4835700)

2021-12-16 Thread pabloem
This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git.


from 759da41  Merge pull request #16124 from [BEAM-12164] Add Spanner 
Change Stream DAOs
 add 4835700  Merge pull request #16061 from [BEAM-13428] [Playground] 
Integrate Google Analytics

No new revisions were added by this update.

Summary of changes:
 playground/frontend/analysis_options.yaml  |   2 +-
 playground/frontend/build.gradle   |   2 +
 playground/frontend/gradle.properties  |   1 +
 .../toggle_theme_button/toggle_theme_button.dart   |   7 +-
 playground/frontend/lib/config.g.dart  |   1 +
 playground/frontend/lib/constants/colors.dart  |   2 +
 .../frontend/lib/constants/links.dart  |  21 ++--
 .../actions/components/new_example_action.dart |   6 +-
 .../modules/actions/components/reset_action.dart   |   6 +-
 .../lib/modules/analytics/analytics_events.dart|  32 +++---
 .../lib/modules/analytics/analytics_service.dart   | 114 +
 .../example_list/expansion_panel_item.dart |   2 +
 .../notifications/components/notification.dart |   1 -
 .../components/editor_textarea_wrapper.dart|   7 +-
 .../pages/playground/components/more_actions.dart  |  37 ---
 .../playground/components/playground_feedback.dart |  11 +-
 .../components/playground_page_footer.dart |  23 +++--
 .../components/playground_page_providers.dart  |   2 +
 .../components/playground_privacy_policy.dart  |  85 +++
 .../lib/pages/playground/playground_page.dart  |   7 +-
 playground/frontend/pubspec.lock   |   7 ++
 playground/frontend/pubspec.yaml   |   1 +
 playground/frontend/web/index.html |   8 +-
 23 files changed, 316 insertions(+), 69 deletions(-)
 copy .test-infra/jenkins/NoPhraseTriggeringPostCommitBuilder.groovy => 
playground/frontend/lib/constants/links.dart (61%)
 copy 
sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java
 => playground/frontend/lib/modules/analytics/analytics_events.dart (57%)
 create mode 100644 
playground/frontend/lib/modules/analytics/analytics_service.dart
 create mode 100644 
playground/frontend/lib/pages/playground/components/playground_privacy_policy.dart