[beam] branch release-2.35.0 updated: [BEAM-13388] Cherry-pick more changes for google cloud dlp update. (#16268)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch release-2.35.0 in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/release-2.35.0 by this push: new edcfdc4 [BEAM-13388] Cherry-pick more changes for google cloud dlp update. (#16268) edcfdc4 is described below commit edcfdc40954b05dd47905cbefb13d7a7437ea99b Author: tvalentyn AuthorDate: Thu Dec 16 22:18:33 2021 -0800 [BEAM-13388] Cherry-pick more changes for google cloud dlp update. (#16268) Co-authored-by: Brian Hulette Co-authored-by: Yichi Zhang Co-authored-by: Kyle Weaver Co-authored-by: Andy Ye --- .../apache_beam/io/gcp/pubsub_integration_test.py | 2 ++ sdks/python/apache_beam/ml/gcp/cloud_dlp.py| 22 - sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py | 37 ++ 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py index 541bb52..bbb914e 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py +++ b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py @@ -210,6 +210,8 @@ class PubSubIntegrationTest(unittest.TestCase): @pytest.mark.it_postcommit def test_streaming_with_attributes(self): +if self.runner_name == 'TestDataflowRunner': + pytest.skip("BEAM-13218") self._test_streaming(with_attributes=True) diff --git a/sdks/python/apache_beam/ml/gcp/cloud_dlp.py b/sdks/python/apache_beam/ml/gcp/cloud_dlp.py index 93510c8..e3fddba 100644 --- a/sdks/python/apache_beam/ml/gcp/cloud_dlp.py +++ b/sdks/python/apache_beam/ml/gcp/cloud_dlp.py @@ -20,9 +20,11 @@ functionality. """ import logging +from typing import List from google.cloud import dlp_v2 +from apache_beam import typehints from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.transforms import DoFn from apache_beam.transforms import ParDo @@ -35,6 +37,8 @@ _LOGGER = logging.getLogger(__name__) @experimental() +@typehints.with_input_types(str) +@typehints.with_output_types(str) class MaskDetectedDetails(PTransform): """Scrubs sensitive information detected in text. The ``PTransform`` returns a ``PCollection`` of ``str`` @@ -126,6 +130,8 @@ class MaskDetectedDetails(PTransform): @experimental() +@typehints.with_input_types(str) +@typehints.with_output_types(List[dlp_v2.types.dlp.Finding]) class InspectForDetails(PTransform): """Inspects input text for sensitive information. the ``PTransform`` returns a ``PCollection`` of @@ -190,13 +196,13 @@ class _DeidentifyFn(DoFn): self.client = dlp_v2.DlpServiceClient() self.params = { 'timeout': self.timeout, -'parent': self.client.project_path(self.project) } -self.params.update(self.config) +self.parent = self.client.common_project_path(self.project) def process(self, element, **kwargs): -operation = self.client.deidentify_content( -item={"value": element}, **self.params) +request = {'item': {'value': element}, 'parent': self.parent} +request.update(self.config) +operation = self.client.deidentify_content(request=request, **self.params) yield operation.item.value @@ -213,12 +219,12 @@ class _InspectFn(DoFn): self.client = dlp_v2.DlpServiceClient() self.params = { 'timeout': self.timeout, -"parent": self.client.project_path(self.project) } -self.params.update(self.config) +self.parent = self.client.common_project_path(self.project) def process(self, element, **kwargs): -operation = self.client.inspect_content( -item={"value": element}, **self.params) +request = {'item': {'value': element}, 'parent': self.parent} +request.update(self.config) +operation = self.client.inspect_content(request=request, **self.params) hits = [x for x in operation.result.findings] yield hits diff --git a/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py b/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py index 111e5be..d4153e5 100644 --- a/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py +++ b/sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py @@ -37,6 +37,7 @@ else: from apache_beam.ml.gcp.cloud_dlp import MaskDetectedDetails from apache_beam.ml.gcp.cloud_dlp import _DeidentifyFn from apache_beam.ml.gcp.cloud_dlp import _InspectFn + from google.cloud.dlp_v2.types import dlp # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports _LOGGER = logging.getLogger(__name__) @@ -56,6 +57,9 @@ class TestDeidentifyFn(unittest.TestCase): def test_deidentify_called(self): class ClientMock(object): def deidentify_content(self, *args, **kwargs): +# Check that we can marshal a valid request. +dlp.DeidentifyContentRequest(kwargs['request']) +
[beam] branch master updated: [BEAM-13434] Bump google pubsublite on master. (#16265)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new b150ace [BEAM-13434] Bump google pubsublite on master. (#16265) b150ace is described below commit b150ace0884c88bc93da21f6dfe3b7684f886e94 Author: tvalentyn AuthorDate: Thu Dec 16 20:07:44 2021 -0800 [BEAM-13434] Bump google pubsublite on master. (#16265) --- .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 5f1a2a0..786a048 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -452,7 +452,7 @@ class BeamModulePlugin implements Plugin { def errorprone_version = "2.3.4" def google_clients_version = "1.32.1" def google_cloud_bigdataoss_version = "2.2.4" -def google_cloud_pubsublite_version = "1.4.5" +def google_cloud_pubsublite_version = "1.4.6" def google_code_gson_version = "2.8.9" def google_oauth_clients_version = "1.32.1" // Try to keep grpc_version consistent with gRPC version in google_cloud_platform_libraries_bom @@ -535,7 +535,7 @@ class BeamModulePlugin implements Plugin { commons_lang3 : "org.apache.commons:commons-lang3:3.9", commons_math3 : "org.apache.commons:commons-math3:3.6.1", error_prone_annotations : "com.google.errorprone:error_prone_annotations:$errorprone_version", -flogger_system_backend : "com.google.flogger:flogger-system-backend:0.7.2", +flogger_system_backend : "com.google.flogger:flogger-system-backend:0.7.3", gax : "com.google.api:gax", // google_cloud_platform_libraries_bom sets version gax_grpc: "com.google.api:gax-grpc", // google_cloud_platform_libraries_bom sets version gax_httpjson: "com.google.api:gax-httpjson", // google_cloud_platform_libraries_bom sets version
[beam] branch nightly-refs/heads/master updated (112b3cd -> a4cca44)
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to branch nightly-refs/heads/master in repository https://gitbox.apache.org/repos/asf/beam.git. from 112b3cd [BEAM-13399] Add infrastructure to start JARs from Go functions (#16214) add dcd984d [BEAM-12164] Add Spanner Change Stream DAOs add 759da41 Merge pull request #16124 from [BEAM-12164] Add Spanner Change Stream DAOs add 4835700 Merge pull request #16061 from [BEAM-13428] [Playground] Integrate Google Analytics add 75fe1a6 Clarify CoGroupByKey creates Iterable, not list. (#16099) add 1504892 [BEAM-12931] Allow for DoFn#getAllowedTimestampSkew() when checking the output timestamp add e83b6f1 [BEAM-13467] Properly handle null argument types for logical types. (#16249) add 673507a [BEAM-10277] Initial implementation for encoding position in Python RowCoder (#15410) add 251dd0c [BEAM-11545] State & timer for batched RPC calls pattern (#13643) add 38dcb9e Automatically prune local images before building an RC. (#16238) add a09a8ec Add verbose error messages to container-related scripts. (#16056) add bb1104a [BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257) add a4cca44 [BEAM-13015] Add a state backed iterable that can be mutated under certain circumstances. (#16252) No new revisions were added by this update. Summary of changes: CHANGES.md | 1 + .../org/apache/beam/gradle/BeamModulePlugin.groovy | 6 + .../apache/beam/examples/snippets/Snippets.java| 8 - .../beam/model/fnexecution/v1/standard_coders.yaml | 33 ++ playground/frontend/analysis_options.yaml | 2 +- playground/frontend/build.gradle | 2 + playground/frontend/gradle.properties | 1 + .../toggle_theme_button/toggle_theme_button.dart | 7 +- playground/frontend/lib/config.g.dart | 1 + playground/frontend/lib/constants/colors.dart | 2 + .../frontend/lib/constants/links.dart | 21 +- .../actions/components/new_example_action.dart | 6 +- .../modules/actions/components/reset_action.dart | 6 +- .../lib/modules/analytics/analytics_events.dart| 32 +- .../lib/modules/analytics/analytics_service.dart | 114 + .../example_list/expansion_panel_item.dart | 2 + .../notifications/components/notification.dart | 1 - .../components/editor_textarea_wrapper.dart| 7 +- .../pages/playground/components/more_actions.dart | 37 +- .../playground/components/playground_feedback.dart | 11 +- .../components/playground_page_footer.dart | 23 +- .../components/playground_page_providers.dart | 2 + .../components/playground_privacy_policy.dart | 85 .../lib/pages/playground/playground_page.dart | 7 +- playground/frontend/pubspec.lock | 7 + playground/frontend/pubspec.yaml | 1 + playground/frontend/web/index.html | 8 +- .../src/main/scripts/build_release_candidate.sh| 2 + .../core/construction/PTransformMatchersTest.java | 21 + .../apache/beam/runners/core/SimpleDoFnRunner.java | 92 +++- .../beam/runners/core/SimpleDoFnRunnerTest.java| 355 ++- .../beam/runners/direct/NanosOffsetClock.java | 6 +- .../runners/direct/TransformEvaluatorRegistry.java | 16 + .../runners/direct/UnboundedReadDeduplicator.java | 3 +- .../runners/direct/CloningBundleFactoryTest.java | 67 +++ .../flink/FlinkStreamingPipelineTranslator.java| 4 +- .../flink/FlinkStreamingTransformTranslators.java | 17 + .../wrappers/streaming/DoFnOperator.java | 2 +- .../state/FlinkBroadcastStateInternals.java| 131 ++ .../streaming/ExecutableStageDoFnOperatorTest.java | 1 - .../beam/runners/dataflow/DataflowRunner.java | 110 + .../runners/dataflow/DataflowPipelineJobTest.java | 25 ++ .../beam/runners/dataflow/worker/ReaderCache.java | 3 +- .../beam/runners/dataflow/worker/StateFetcher.java | 4 +- .../dataflow/worker/StreamingDataflowWorker.java | 2 +- .../fn/data/RemoteGrpcPortWriteOperation.java | 6 +- .../common/worker/CachingShuffleBatchReader.java | 4 +- .../control/DefaultJobBundleFactory.java | 16 +- .../fnexecution/control/RemoteExecutionTest.java | 2 + .../beam/runners/spark/io/MicrobatchSource.java| 2 +- .../translation/utils/SideInputStorage.java| 4 +- .../runners/spark/util/GlobalWatermarkHolder.java | 4 +- .../beam/runners/spark/util/SideInputStorage.java | 4 +- .../go/test/regression/coders/fromyaml/fromyaml.go | 15 +- .../src/main/java/org/apache/beam/sdk/io/Read.java | 3 +- .../org/apache/beam/sdk/schemas/SchemaCoder.java | 20 + .../apache/beam/sdk/schemas/SchemaTranslation.java | 13 +- .../apache/beam/sdk/values/PCollectionViews.java |
[beam] branch master updated (a4cca44 -> 4f2bbff)
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from a4cca44 [BEAM-13015] Add a state backed iterable that can be mutated under certain circumstances. (#16252) add 4f2bbff [BEAM-13388] Update Cloud DLP after breaking changes. (#16236) No new revisions were added by this update. Summary of changes: sdks/python/apache_beam/ml/gcp/cloud_dlp.py | 22 ++-- sdks/python/apache_beam/ml/gcp/cloud_dlp_test.py | 33 +++- 2 files changed, 35 insertions(+), 20 deletions(-)
[beam] branch release-2.35.0 updated: [BEAM-13434] Upgrade to pubsublite 1.4.0.2 on the release branch. (#16264)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch release-2.35.0 in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/release-2.35.0 by this push: new 46e825a [BEAM-13434] Upgrade to pubsublite 1.4.0.2 on the release branch. (#16264) 46e825a is described below commit 46e825a0de1ac9331002a21d4287aeb250040738 Author: tvalentyn AuthorDate: Thu Dec 16 18:10:12 2021 -0800 [BEAM-13434] Upgrade to pubsublite 1.4.0.2 on the release branch. (#16264) --- .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 3cc9496..b5ea272 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -452,7 +452,7 @@ class BeamModulePlugin implements Plugin { def errorprone_version = "2.3.4" def google_clients_version = "1.32.1" def google_cloud_bigdataoss_version = "2.2.4" -def google_cloud_pubsublite_version = "1.4.0.1" +def google_cloud_pubsublite_version = "1.4.0.2" def google_code_gson_version = "2.8.9" def google_oauth_clients_version = "1.32.1" // Try to keep grpc_version consistent with gRPC version in google_cloud_platform_libraries_bom @@ -535,7 +535,7 @@ class BeamModulePlugin implements Plugin { commons_lang3 : "org.apache.commons:commons-lang3:3.9", commons_math3 : "org.apache.commons:commons-math3:3.6.1", error_prone_annotations : "com.google.errorprone:error_prone_annotations:$errorprone_version", -flogger_system_backend : "com.google.flogger:flogger-system-backend:0.7.2", +flogger_system_backend : "com.google.flogger:flogger-system-backend:0.7.3", gax : "com.google.api:gax", // google_cloud_platform_libraries_bom sets version gax_grpc: "com.google.api:gax-grpc", // google_cloud_platform_libraries_bom sets version gax_httpjson: "com.google.api:gax-httpjson", // google_cloud_platform_libraries_bom sets version
[beam] branch master updated (bb1104a -> a4cca44)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from bb1104a [BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257) add a4cca44 [BEAM-13015] Add a state backed iterable that can be mutated under certain circumstances. (#16252) No new revisions were added by this update. Summary of changes: .../org/apache/beam/sdk/fn/stream/DataStreams.java | 10 +- .../apache/beam/sdk/fn/stream/DataStreamsTest.java | 44 +- sdks/java/harness/build.gradle | 6 - .../java/org/apache/beam/fn/harness/Cache.java | 28 +- .../java/org/apache/beam/fn/harness/Caches.java| 239 +++ .../fn/harness/control/ProcessBundleHandler.java | 7 +- .../fn/harness/state/StateFetchingIterators.java | 476 - .../org/apache/beam/fn/harness/CachesTest.java | 96 +++-- .../harness/state/StateFetchingIteratorsTest.java | 362 ++-- 9 files changed, 1046 insertions(+), 222 deletions(-)
[beam] branch master updated: [BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new bb1104a [BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257) bb1104a is described below commit bb1104a82878d5e3b48210089f6d00e9d460dfea Author: Brian Hulette AuthorDate: Thu Dec 16 13:30:51 2021 -0800 [BEAM-13456] Rollback #15890 to fix timeout in Java PostCommit (#16257) * Revert "[BEAM-11936] Fix errorprone warnings (#15890)" This reverts commit 06a5e67332aae53ea90dedb4ef6421c2a7d65035. * spotless --- .../org/apache/beam/gradle/BeamModulePlugin.groovy | 6 + .../apache/beam/examples/snippets/Snippets.java| 8 -- .../core/construction/PTransformMatchersTest.java | 21 .../beam/runners/direct/NanosOffsetClock.java | 6 +- .../runners/direct/TransformEvaluatorRegistry.java | 16 +++ .../runners/direct/UnboundedReadDeduplicator.java | 3 +- .../runners/direct/CloningBundleFactoryTest.java | 67 +++ .../flink/FlinkStreamingPipelineTranslator.java| 4 +- .../flink/FlinkStreamingTransformTranslators.java | 17 +++ .../wrappers/streaming/DoFnOperator.java | 2 +- .../state/FlinkBroadcastStateInternals.java| 131 + .../streaming/ExecutableStageDoFnOperatorTest.java | 1 - .../beam/runners/dataflow/DataflowRunner.java | 110 + .../runners/dataflow/DataflowPipelineJobTest.java | 25 .../beam/runners/dataflow/worker/ReaderCache.java | 3 +- .../beam/runners/dataflow/worker/StateFetcher.java | 4 +- .../dataflow/worker/StreamingDataflowWorker.java | 2 +- .../fn/data/RemoteGrpcPortWriteOperation.java | 6 +- .../common/worker/CachingShuffleBatchReader.java | 4 +- .../control/DefaultJobBundleFactory.java | 16 +-- .../fnexecution/control/RemoteExecutionTest.java | 2 + .../beam/runners/spark/io/MicrobatchSource.java| 2 +- .../translation/utils/SideInputStorage.java| 4 +- .../runners/spark/util/GlobalWatermarkHolder.java | 4 +- .../beam/runners/spark/util/SideInputStorage.java | 4 +- .../src/main/java/org/apache/beam/sdk/io/Read.java | 3 +- .../org/apache/beam/sdk/schemas/SchemaCoder.java | 20 .../apache/beam/sdk/values/PCollectionViews.java | 59 ++ .../apache/beam/sdk/coders/CoderRegistryTest.java | 4 + .../apache/beam/sdk/testing/ExpectedLogsTest.java | 10 +- .../beam/sdk/testing/SystemNanoTimeSleeper.java| 4 +- .../sdk/transforms/reflect/DoFnSignaturesTest.java | 9 ++ .../GrowableOffsetRangeTrackerTest.java| 2 +- .../core/translate/TimestampExtractTransform.java | 8 ++ .../sql/meta/provider/kafka/BeamKafkaTable.java| 3 +- .../org/apache/beam/sdk/fn/CancellableQueue.java | 4 +- .../harness/state/StateFetchingIteratorsTest.java | 2 +- .../bigquery/StorageApiWriteUnshardedRecords.java | 3 +- .../bigquery/StorageApiWritesShardedRecords.java | 3 +- .../internal/LimitingTopicBacklogReader.java | 6 +- .../beam/sdk/io/gcp/spanner/SpannerAccessor.java | 24 .../sdk/io/hadoop/format/TestRowDBWritable.java| 10 ++ .../beam/sdk/io/kafka/KafkaExactlyOnceSink.java| 3 +- .../beam/sdk/io/kafka/KafkaUnboundedReader.java| 4 +- .../org/apache/beam/sdk/io/xml/XmlSourceTest.java | 4 +- 45 files changed, 592 insertions(+), 61 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 60d76c9..5f1a2a0 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -1140,12 +1140,18 @@ class BeamModulePlugin implements Plugin { options.errorprone.errorproneArgs.add("-Xep:EqualsGetClass:OFF") options.errorprone.errorproneArgs.add("-Xep:EqualsUnsafeCast:OFF") options.errorprone.errorproneArgs.add("-Xep:ExtendsAutoValue:OFF") + options.errorprone.errorproneArgs.add("-Xep:FloatingPointAssertionWithinEpsilon:OFF") options.errorprone.errorproneArgs.add("-Xep:JavaTimeDefaultTimeZone:OFF") +options.errorprone.errorproneArgs.add("-Xep:LockNotBeforeTry:OFF") options.errorprone.errorproneArgs.add("-Xep:MixedMutabilityReturnType:OFF") + options.errorprone.errorproneArgs.add("-Xep:PreferJavaTimeOverload:OFF") +options.errorprone.errorproneArgs.add("-Xep:ModifiedButNotUsed:OFF") options.errorprone.errorproneArgs.add("-Xep:ThreadPriorityCheck:OFF") options.errorprone.errorproneArgs.add("-Xep:TimeUnitConversionChecker:OFF") options.errorprone.errorproneArgs.add("-Xep:UndefinedEquals:OFF") options.errorprone.errorproneArgs.add("
[beam] branch master updated: Add verbose error messages to container-related scripts. (#16056)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new a09a8ec Add verbose error messages to container-related scripts. (#16056) a09a8ec is described below commit a09a8ecbcb971c6779b687a517128260471d3a87 Author: tvalentyn AuthorDate: Thu Dec 16 13:22:44 2021 -0800 Add verbose error messages to container-related scripts. (#16056) --- sdks/python/container/Dockerfile | 2 +- sdks/python/container/run_generate_requirements.sh| 19 +++ .../www/site/content/en/contribute/release-guide.md | 19 ++- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index b97cb3a..182d7bd 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -69,7 +69,7 @@ RUN ccache --set-config=sloppiness=file_macro && ccache --set-config=hash_dir=fa COPY target/apache-beam.tar.gz /opt/apache/beam/tars/ RUN pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp] -RUN pip check +RUN pip check || (echo "Container does not include required Beam dependencies or has conflicting dependencies. If Beam dependencies have changed, you need to regenerate base_image_requirements.txt files. See: https://s.apache.org/beam-python-requirements-generate"; && exit 1) # Log complete list of what exact packages and versions are installed. RUN pip freeze --all diff --git a/sdks/python/container/run_generate_requirements.sh b/sdks/python/container/run_generate_requirements.sh index 9b533b7..a3b8f61 100755 --- a/sdks/python/container/run_generate_requirements.sh +++ b/sdks/python/container/run_generate_requirements.sh @@ -31,15 +31,26 @@ # https://s.apache.org/beam-python-dev-wiki if [[ $# != 2 ]]; then - printf "Usage: \n$> ./sdks/python/container/run_generate_requirements.sh " - printf "\n\tpython_version: [required] Python version to generate dependencies for." - printf " Use 3.7 for Python3.7, 3.8 for Python3.8 etc." + printf "Example usage: \n$> ./sdks/python/container/run_generate_requirements.sh 3.8 " + printf "\n\twhere 3.8 is the Python major.minor version." + exit 1 fi -set -ex PY_VERSION=$1 SDK_TARBALL=$2 +if ! python$PY_VERSION --version > /dev/null 2>&1 ; then + echo "Please install a python${PY_VERSION} interpreter. See s.apache.org/beam-python-dev-wiki for Python installation tips." + exit 1 +fi + +if ! python$PY_VERSION -m venv --help > /dev/null 2>&1 ; then + echo "Your python${PY_VERSION} installation does not have a required venv module. See s.apache.org/beam-python-dev-wiki for Python installation tips." + exit 1 +fi + +set -ex + ENV_PATH="$PWD/build/python${PY_VERSION/./}_requirements_gen" rm -rf $ENV_PATH 2>/dev/null || true python${PY_VERSION} -m venv $ENV_PATH diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 370a572..727c94b 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -233,17 +233,10 @@ If you are not a PMC, please ask for help in dev@ mailing list. ## 3. Update base image dependencies for Python container images -1. Check the versions specified in sdks/python/container/base_image_requirements_manual.txt` and update them if necessary. -1. Regenerate full dependency list by running: -`./gradlew :sdks:python:container:generatePythonRequirementsAll` and commiting -the changes. Exectution takes about ~5 min per Python version and is somewhat resource-demanding. -You can also regenerate the dependencies indiviually per version with targets like `./gradlew :sdks:python:container:py38:generatePythonRequirements`. +See instructions at: https://s.apache.org/beam-python-requirements-generate - -Ideally, do this at least a week before the release cut, so that any issues +Ideally, do the update at least a week before the release cut, so that any issues related to the update have time to surface. -You will need Python intepreters for all versions supported by Beam, see: -https://s.apache.org/beam-python-dev-wiki for tips how to install them. ## 4. Investigate performance regressions @@ -589,12 +582,12 @@ See the source of the script for more details, or to run commands manually in ca Please note that dependencies for the SDKs with different Python versions vary. Need to verify all Python images by replacing `${ver}` with each supported Python version `X.Y`. ``` - docker run -it --entrypoint=/bin/bash apache/beam_python${ver}_sdk:${RELEASE}_rc{RC_NUM} + docker run --rm -it --entrypoint=/bin/bash apache/beam_python${ver}_sdk:${RELEASE}_rc{RC_NUM} ls -al /opt/apache/
[beam] branch master updated: Automatically prune local images before building an RC. (#16238)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 38dcb9e Automatically prune local images before building an RC. (#16238) 38dcb9e is described below commit 38dcb9e13eac610a629cf05fa7f1ff08e5795e72 Author: tvalentyn AuthorDate: Thu Dec 16 12:07:37 2021 -0800 Automatically prune local images before building an RC. (#16238) --- release/src/main/scripts/build_release_candidate.sh | 2 ++ website/www/site/content/en/contribute/release-guide.md | 4 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/release/src/main/scripts/build_release_candidate.sh b/release/src/main/scripts/build_release_candidate.sh index 8e60ea6..0c8e7ae 100755 --- a/release/src/main/scripts/build_release_candidate.sh +++ b/release/src/main/scripts/build_release_candidate.sh @@ -317,9 +317,11 @@ if [[ $confirmation = "y" ]]; then fi echo "[Current Step]: Stage docker images" +echo "Note: this step will also prune your local docker image and container cache." echo "Do you want to proceed? [y|N]" read confirmation if [[ $confirmation = "y" ]]; then + docker system prune -a -f echo "Staging SDK docker images on docker hub=" cd ~ wipe_local_clone_dir diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 9896640..370a572 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -561,10 +561,6 @@ See the source of the script for more details, or to run commands manually in ca ### Run build_release_candidate.sh to create a release candidate -Before you start, run this command to make sure you'll be using the latest docker images: - - docker system prune -a - * **Script:** [build_release_candidate.sh](https://github.com/apache/beam/blob/master/release/src/main/scripts/build_release_candidate.sh) * **Usage**
[beam] branch master updated: [BEAM-11545] State & timer for batched RPC calls pattern (#13643)
This is an automated email from the ASF dual-hosted git repository. mbae pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 251dd0c [BEAM-11545] State & timer for batched RPC calls pattern (#13643) 251dd0c is described below commit 251dd0c0f0eccf83ed7346e385981180d562e1b1 Author: Matthias Baetens AuthorDate: Thu Dec 16 20:08:53 2021 +0100 [BEAM-11545] State & timer for batched RPC calls pattern (#13643) * [BEAM-11545] State & timer for batched RPC calls pattern --- ...lements-for-efficient-external-service-calls.md | 56 ++ .../content/en/documentation/patterns/overview.md | 3 ++ .../partials/section-menu/en/documentation.html| 1 + 3 files changed, 60 insertions(+) diff --git a/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md b/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md new file mode 100644 index 000..9409e83 --- /dev/null +++ b/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md @@ -0,0 +1,56 @@ +--- +title: "Pattern for grouping elements for efficient external service calls" +--- + + + +# Grouping elements for efficient external service calls using the `GroupIntoBatches`-transform + +{{< language-switcher java py >}} + +Usually, authoring an Apache Beam pipeline can be done with out-of-the-box tools and transforms like _ParDo_'s, _Window_'s and _GroupByKey_'s. However, when you want more tight control, you can keep state in an otherwise stateless _DoFn_. + +State is kept on a per-key and per-windows basis, and as such, the input to your stateful DoFn needs to be keyed (e.g. by the customer identifier if you're tracking clicks from an e-commerce website). + +Examples of use cases are: assigning a unique ID to each element, joining streams of data in 'more exotic' ways, or batching up API calls to external services. In this section we'll go over the last one in particular. + +Make sure to check the [docs](https://beam.apache.org/documentation/programming-guide/#state-and-timers) for deeper understanding on state and timers. + +The `GroupIntoBatches`-transform uses state and timers under the hood to allow the user to exercise tight control over the following parameters: + +- `maxBufferDuration`: limits the amount of waitingtime for a batch to be emitted. +- `batchSize`: limits the number of elements in one batch. +- `batchSizeBytes`: (in Java only) limits the bytesize of one batch (using input coder to determine elementsize). +- `elementByteSize`: (in Java only) limits the bytesize of one batch (using a user defined function to determine elementsize). + +while abstracting away the implementation details from users. + +The `withShardedKey()` functionality increases parallellism by spreading one key over multiple threads. + +The transforms are used in the following way in Java & Python: + +{{< highlight java >}} +input.apply( + "Batch Contents", + GroupIntoBatches.ofSize(batchSize) + .withMaxBufferingDuration(maxBufferingDuration) + .withShardedKey()) +{{< /highlight >}} + +{{< highlight py >}} +input | GroupIntoBatches.WithShardedKey(batchSize, maxBufferingDuration) +{{< /highlight >}} + +Applying these transforms will output groups of elements in a batch on a per-key basis, which you can then use to call an external API in bulk rather than on a per-element basis, resulting in a lower overhead in your pipeline. diff --git a/website/www/site/content/en/documentation/patterns/overview.md b/website/www/site/content/en/documentation/patterns/overview.md index b13c5d4..c5e6084 100644 --- a/website/www/site/content/en/documentation/patterns/overview.md +++ b/website/www/site/content/en/documentation/patterns/overview.md @@ -51,6 +51,9 @@ Pipeline patterns demonstrate common Beam use cases. Pipeline patterns are based **Cross-language patterns** - Patterns for creating cross-language pipelines * [Cross-language patterns](/documentation/patterns/cross-language/#cross-language-transforms) +**State & timers patterns** - Patterns for using state & timers +* [Grouping elements for efficient external service calls](/documentation/patterns/grouping-elements-for-efficient-external-service-calls/#grouping-elements-for-efficient-external-service-calls-using-the-`GroupIntoBatches`-transform) + ## Contributing a pattern To contribute a new pipeline pattern, create an issue with the [`pipeline-patterns` label](https://issues.apache.org/jira/browse/BEAM-7449?jql=labels%20%3D%20pipeline-patterns) and add details to the issue description. See [Get started contributing](/contribute/) for more information. diff --git a/website/www/site/layouts/partials/section-menu/en/documentation.html b/website/ww
[beam] branch master updated: [BEAM-10277] Initial implementation for encoding position in Python RowCoder (#15410)
This is an automated email from the ASF dual-hosted git repository. bhulette pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 673507a [BEAM-10277] Initial implementation for encoding position in Python RowCoder (#15410) 673507a is described below commit 673507a3c541715dc7c62ba073e4195d01be0899 Author: AlikRodriguez <74626882+alikrodrig...@users.noreply.github.com> AuthorDate: Thu Dec 16 13:01:08 2021 -0600 [BEAM-10277] Initial implementation for encoding position in Python RowCoder (#15410) * add test to encoding position * respect encoding position * enforcing encoding postion on trivial test * add coder_yaml test * fix types for go fromyaml_test * skip unimplemented contents * encoding position from init * revert encoding position in schemas * added payload id on testcase inyaml file * change go test exeption in coders yaml * change helper methods to coders test * add condition to check encoding_position_set * fix pylint * encoding position in code decode with argsort * check encoding position start at 0 and has no duplicates * fix test, precompute argsort encoding position --- .../beam/model/fnexecution/v1/standard_coders.yaml | 33 ++ .../go/test/regression/coders/fromyaml/fromyaml.go | 15 +++-- sdks/python/apache_beam/coders/row_coder.py| 39 +--- sdks/python/apache_beam/coders/row_coder_test.py | 73 ++ 4 files changed, 147 insertions(+), 13 deletions(-) diff --git a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml index ef37772..df119ba 100644 --- a/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml +++ b/model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml @@ -410,6 +410,39 @@ examples: "\x01\x00\x00\x00\x00\x04\neverything\x00\x02is\x00\x05null!\x00\r\xc2\xaf\\_(\xe3\x83\x84)_/\xc2\xaf\x00": {f_map: {"everything": null, "is": null, "null!": null, "¯\\_(ツ)_/¯": null}} --- +# Binary data generated with the python SDK: +# schema1 = schema_pb2.Schema( +# id="30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9", +# fields=[ +# schema_pb2.Field( +# name="str", +# type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING), +# encoding_position=1), +# schema_pb2.Field( +# name="f_bool", +# type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN), +# encoding_position=2), +# schema_pb2.Field( +# name="i32", +# type=schema_pb2.FieldType( +# atomic_type=schema_pb2.INT32, nullable=True), +# encoding_position=0) +# ], +# encoding_positions_set=True) +# +# coder = RowCoder(schema1) +# c = coder.schema.SerializeToString() +# print("payload = %s" % c) +# test = typing.NamedTuple("test", [ ("f_bool", bool), ("i32", np.int32), ("str", str) ]) +# example = coder.encode(test(False,21,"str2")) +# print("example = %s" % example) +coder: + urn: "beam:coder:row:v1" + payload: "\n\x0b\n\x03str\x1a\x02\x10\x07(\x01\n\x0e\n\x06f_bool\x1a\x02\x10\x08(\x02\n\x0b\n\x03i32\x1a\x04\x08\x01\x10\x03\x12$30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9 \x01" +examples: + "\x03\x00\x15\x04str2\x00": {f_bool: False, i32: 21, str: "str2"} + +--- coder: urn: "beam:coder:row:v1" diff --git a/sdks/go/test/regression/coders/fromyaml/fromyaml.go b/sdks/go/test/regression/coders/fromyaml/fromyaml.go index 337c48e..9147834 100644 --- a/sdks/go/test/regression/coders/fromyaml/fromyaml.go +++ b/sdks/go/test/regression/coders/fromyaml/fromyaml.go @@ -48,6 +48,11 @@ var unimplementedCoders = map[string]bool{ "beam:coder:custom_window:v1":true, } +var filteredCases = []struct{ filter, reason string }{ + {"logical", "BEAM-9615: Support logical types"}, + {"30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9", "BEAM-13043: Support encoding position."}, +} + // Coder is a representation a serialized beam coder. type Coder struct { Urn string `yaml:"urn,omitempty"` @@ -83,11 +88,13 @@ func (s *Spec) testStandardCoder() (err error) { log.Printf("skipping unimplemented coder urn: %v", s.Coder.Urn) return nil } - // TODO(BEAM-9615): Support Logical types, and produce a better error message. - if strings.Contains(s.Coder.Payload, "logical") { - log.Printf("skipping coder with logical type. Unsupported in the Go SDK for now. Payload: %v", s.Coder.Payload) - return nil + for _, c := range filteredCases { + if strings.Contains(s.Coder.Payload, c.filter) { +
[beam] branch master updated (1504892 -> e83b6f1)
This is an automated email from the ASF dual-hosted git repository. bhulette pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 1504892 [BEAM-12931] Allow for DoFn#getAllowedTimestampSkew() when checking the output timestamp add e83b6f1 [BEAM-13467] Properly handle null argument types for logical types. (#16249) No new revisions were added by this update. Summary of changes: .../apache/beam/sdk/schemas/SchemaTranslation.java | 13 --- .../beam/sdk/schemas}/SchemaTranslationTest.java | 40 -- 2 files changed, 46 insertions(+), 7 deletions(-) rename {runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction => sdks/java/core/src/test/java/org/apache/beam/sdk/schemas}/SchemaTranslationTest.java (93%)
[beam] branch master updated (75fe1a6 -> 1504892)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 75fe1a6 Clarify CoGroupByKey creates Iterable, not list. (#16099) add 1504892 [BEAM-12931] Allow for DoFn#getAllowedTimestampSkew() when checking the output timestamp No new revisions were added by this update. Summary of changes: CHANGES.md | 1 + .../apache/beam/runners/core/SimpleDoFnRunner.java | 92 +- .../beam/runners/core/SimpleDoFnRunnerTest.java| 355 +++-- .../org/apache/beam/sdk/transforms/ParDoTest.java | 256 ++- .../apache/beam/fn/harness/FnApiDoFnRunner.java| 91 +- .../beam/fn/harness/FnApiDoFnRunnerTest.java | 146 + 6 files changed, 887 insertions(+), 54 deletions(-)
[beam] branch master updated (4835700 -> 75fe1a6)
This is an automated email from the ASF dual-hosted git repository. bhulette pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 4835700 Merge pull request #16061 from [BEAM-13428] [Playground] Integrate Google Analytics add 75fe1a6 Clarify CoGroupByKey creates Iterable, not list. (#16099) No new revisions were added by this update. Summary of changes: sdks/python/apache_beam/transforms/util.py | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-)
[beam] branch master updated (759da41 -> 4835700)
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 759da41 Merge pull request #16124 from [BEAM-12164] Add Spanner Change Stream DAOs add 4835700 Merge pull request #16061 from [BEAM-13428] [Playground] Integrate Google Analytics No new revisions were added by this update. Summary of changes: playground/frontend/analysis_options.yaml | 2 +- playground/frontend/build.gradle | 2 + playground/frontend/gradle.properties | 1 + .../toggle_theme_button/toggle_theme_button.dart | 7 +- playground/frontend/lib/config.g.dart | 1 + playground/frontend/lib/constants/colors.dart | 2 + .../frontend/lib/constants/links.dart | 21 ++-- .../actions/components/new_example_action.dart | 6 +- .../modules/actions/components/reset_action.dart | 6 +- .../lib/modules/analytics/analytics_events.dart| 32 +++--- .../lib/modules/analytics/analytics_service.dart | 114 + .../example_list/expansion_panel_item.dart | 2 + .../notifications/components/notification.dart | 1 - .../components/editor_textarea_wrapper.dart| 7 +- .../pages/playground/components/more_actions.dart | 37 --- .../playground/components/playground_feedback.dart | 11 +- .../components/playground_page_footer.dart | 23 +++-- .../components/playground_page_providers.dart | 2 + .../components/playground_privacy_policy.dart | 85 +++ .../lib/pages/playground/playground_page.dart | 7 +- playground/frontend/pubspec.lock | 7 ++ playground/frontend/pubspec.yaml | 1 + playground/frontend/web/index.html | 8 +- 23 files changed, 316 insertions(+), 69 deletions(-) copy .test-infra/jenkins/NoPhraseTriggeringPostCommitBuilder.groovy => playground/frontend/lib/constants/links.dart (61%) copy sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java => playground/frontend/lib/modules/analytics/analytics_events.dart (57%) create mode 100644 playground/frontend/lib/modules/analytics/analytics_service.dart create mode 100644 playground/frontend/lib/pages/playground/components/playground_privacy_policy.dart