[beam] branch master updated (5cb634e -> 0b9fb71)
This is an automated email from the ASF dual-hosted git repository. heejong pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 5cb634e [BEAM-13098] Fix translation of repeated TableRow fields (#15779) new 9174d17 [BEAM-12978] Customizable dependency for Java external transform new 551c6d1 add ConfigT param to getDependencies method new 0b9fb71 Merge pull request #15606 from ihji/BEAM-12978 The 6 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../sdk/transforms/ExternalTransformBuilder.java | 11 +++ .../sdk/expansion/service/ExpansionService.java| 85 +++--- 2 files changed, 68 insertions(+), 28 deletions(-)
svn commit: r50639 - /dev/beam/2.34.0/python/
Author: ibzib Date: Wed Oct 27 03:53:37 2021 New Revision: 50639 Log: Staging Python artifacts for Apache Beam 2.34.0 RC1 Added: dev/beam/2.34.0/python/ dev/beam/2.34.0/python/apache-beam-2.34.0.zip (with props) dev/beam/2.34.0/python/apache-beam-2.34.0.zip.asc dev/beam/2.34.0/python/apache-beam-2.34.0.zip.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-macosx_10_9_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-macosx_10_9_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-macosx_10_9_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux1_i686.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux1_i686.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux1_i686.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux1_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux1_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux1_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2010_i686.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2010_i686.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2010_i686.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2010_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2010_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2010_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2014_aarch64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2014_aarch64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-manylinux2014_aarch64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-win32.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-win32.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-win32.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-win_amd64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-win_amd64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp36-cp36m-win_amd64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-macosx_10_9_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-macosx_10_9_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-macosx_10_9_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux1_i686.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux1_i686.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux1_i686.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux1_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux1_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux1_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2010_i686.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2010_i686.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2010_i686.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2010_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2010_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2010_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2014_aarch64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2014_aarch64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-manylinux2014_aarch64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-win32.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-win32.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-win32.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-win_amd64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-win_amd64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp37-cp37m-win_amd64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp38-cp38-macosx_10_9_x86_64.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp38-cp38-macosx_10_9_x86_64.whl.asc dev/beam/2.34.0/python/apache_beam-2.34.0-cp38-cp38-macosx_10_9_x86_64.whl.sha512 dev/beam/2.34.0/python/apache_beam-2.34.0-cp38-cp38-manylinux1_i686.whl (with props) dev/beam/2.34.0/python/apache_beam-2.34.0-cp38-cp38-manylinux1_i686.whl.asc
[beam] tag nightly-master updated (fd4eb15 -> 5cb634e)
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to tag nightly-master in repository https://gitbox.apache.org/repos/asf/beam.git. *** WARNING: tag nightly-master was modified! *** from fd4eb15 (commit) to 5cb634e (commit) from fd4eb15 [BEAM-12522] Enable side inputs on all splittable DoFn execution time related methods. add 49ae709 Merge pull request #15744 from [BEAM-13072][Playground] Executor builder add 136eadc [BEAM-13104] ParquetIO: SplitReadFn must read the whole block add 46c649a Merge pull request #15789 from aromanenko-dev/BEAM-13104-ParquetIO-filter add e096daf [BEAM-6721] Set numShards dynamically for TextIO.write() add 84e24ea Merge pull request #15500 from [BEAM-6721] Set numShards dynamically for TextIO.write() add 62b33a5 Change sql.Options to an interface under sqlx. (#15790) add 95d1f9a [BEAM-4149] Ensure that we always provide and require the worker id. add a9ac63b [BEAM-11758] Update basics page: Splittable DoFn add 0fb4170 Merge branch 'master' into concepts4 add cbce59f Address review feedback add 63e21b1 [BEAM-11758] Update basics page: Splittable DoFn add 5cb634e [BEAM-13098] Fix translation of repeated TableRow fields (#15779) No new revisions were added by this update. Summary of changes: playground/backend/internal/executors/executor.go | 97 -- .../backend/internal/executors/executor_builder.go | 151 +++ .../backend/internal/executors/executor_test.go| 207 + playground/backend/internal/executors/go_helper.go | 29 --- .../backend/internal/executors/java_helper.go | 69 --- .../backend/internal/executors/java_helper_test.go | 76 .../backend/internal/fs_tool/path_checker.go | 5 +- .../backend/internal/validators/java_validators.go | 33 ++-- .../backend/internal/validators/validator.go | 7 +- .../worker/fn/BeamFnControlServiceTest.java| 25 ++- .../worker/fn/data/BeamFnDataGrpcServiceTest.java | 87 +++-- .../fn/logging/BeamFnLoggingServiceTest.java | 48 +++-- .../control/FnApiControlClientPoolService.java | 6 - .../environment/EmbeddedEnvironmentFactory.java| 3 +- .../provisioning/StaticGrpcProvisionService.java | 3 +- .../runners/fnexecution/EmbeddedSdkHarness.java| 4 +- .../control/FnApiControlClientPoolServiceTest.java | 54 -- .../fnexecution/control/RemoteExecutionTest.java | 7 +- .../StaticGrpcProvisionServiceTest.java| 5 +- .../status/BeamWorkerStatusGrpcServiceTest.java| 2 +- sdks/go/pkg/beam/transforms/sql/sql.go | 41 ++-- sdks/go/pkg/beam/transforms/sql/sqlx/sqlx.go | 8 + .../main/java/org/apache/beam/sdk/io/TextIO.java | 25 ++- .../java/org/apache/beam/sdk/io/WriteFiles.java| 3 +- .../sdk/fn/channel}/AddHarnessIdInterceptor.java | 2 +- .../server/GrpcContextHeaderAccessorProvider.java | 11 +- .../beam/fn/harness/ProcessBundleBenchmark.java| 7 +- .../java/org/apache/beam/fn/harness/FnHarness.java | 7 +- .../io/gcp/bigquery/TableRowToStorageApiProto.java | 18 +- .../bigquery/TableRowToStorageApiProtoTest.java| 68 ++- .../org/apache/beam/sdk/io/parquet/ParquetIO.java | 6 +- .../apache/beam/sdk/io/parquet/ParquetIOTest.java | 3 +- .../www/site/content/en/documentation/basics.md| 44 + 33 files changed, 716 insertions(+), 445 deletions(-) create mode 100644 playground/backend/internal/executors/executor_builder.go create mode 100644 playground/backend/internal/executors/executor_test.go delete mode 100644 playground/backend/internal/executors/go_helper.go delete mode 100644 playground/backend/internal/executors/java_helper.go delete mode 100644 playground/backend/internal/executors/java_helper_test.go copy learning/katas/go/io/textio/read/testdata/path.go => playground/backend/internal/validators/java_validators.go (61%) copy sdks/go/pkg/beam/core/runtime/graphx/v1/gen.go => playground/backend/internal/validators/validator.go (87%) rename sdks/java/{harness/src/main/java/org/apache/beam/fn/harness/control => fn-execution/src/main/java/org/apache/beam/sdk/fn/channel}/AddHarnessIdInterceptor.java (97%)
svn commit: r50638 - in /dev/beam/2.34.0: ./ apache-beam-2.34.0-source-release.zip apache-beam-2.34.0-source-release.zip.asc apache-beam-2.34.0-source-release.zip.sha512
Author: ibzib Date: Wed Oct 27 03:46:20 2021 New Revision: 50638 Log: Staging Java artifacts for Apache Beam 2.34.0 RC1 Added: dev/beam/2.34.0/ dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip (with props) dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.asc dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.sha512 Added: dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip == Binary file - no diff available. Propchange: dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip -- svn:mime-type = application/octet-stream Added: dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.asc == --- dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.asc (added) +++ dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.asc Wed Oct 27 03:46:20 2021 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAABCgAdFiEE8R431/AG0IYjKHZ5e21mc8ea6nIFAmF4yxwACgkQe21mc8ea +6nL0Yw/9EXTJHbOOEkfxg7aqB3CeYD8+xAiCIiGE1a1vqa6nEYbA0HckdGJniShA +CXjjPW8gwqAkQtMGzzcDpm/XFMVWbVr+V6cvUeHsax0ynUZ3QcTW4CSLBfxaz9gj +G1H6AYol+17+zltifeXuvmsT7hlWZkTud70OXeM2UO6FhlxDeCwbBU/fbRSR9TwY +S1FlUsiQlgwTywh94bs4nP/2+8sKBSdQ65CbVzL65GTjiKpGRnWswGCB3ESkIZ1k +CQP5k9PCNtmGw5nIZgERrbpAil9mLnpLR8GVkOKO7rFJ5SISrye0hciXTD8+zl88 +napKlVrSHPrdPwKU1guFDP041KTOWnFBAEJcRxApD7rPE4yMUoHgznHfbC1Qci8A +2YHUijQfT6cwYw/XWwWOjET4yduMP16etBVwUR+8OSOJQF+a4kikLuzlaFU8lsN4 +OD+HDBnJAKFPWyv8DcWfmj8S+rFo3AjZ5BfN3RYW5KTESxhWG5eznDhek5g8X8yM +9Q4KyvbazqB72Oem7RiLbywFbTOWr6USlHvPrTp+CAfadU0dBfhNUyt4PEBvdvES +qlBF9S2SYE7S+ut0Whak/9uk4BC4J8dd2EjKD2TER+xh1IdU+wgQCH3XJOqqesRL +BlZT98Des2F0PcRMiW0mAngOMnuHxS+UVTGFDlzYxptNIkwwj6I= +=POBo +-END PGP SIGNATURE- Added: dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.sha512 == --- dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.sha512 (added) +++ dev/beam/2.34.0/apache-beam-2.34.0-source-release.zip.sha512 Wed Oct 27 03:46:20 2021 @@ -0,0 +1 @@ +c7ac361491e44c4a82a888d752280f11cce0ee261d570b12db50063472d38b51143be5bf1f377d4f56ed367fc5d1addd3e5aded0d7e3eec63d0f13cfdd1b1371 apache-beam-2.34.0-source-release.zip
[beam] branch asf-site updated: Publishing website 2021/10/27 00:01:45 at commit 5cb634e
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new b5fbe9e Publishing website 2021/10/27 00:01:45 at commit 5cb634e b5fbe9e is described below commit b5fbe9e0dc7f19c65d536b81f83089af018a322f Author: jenkins AuthorDate: Wed Oct 27 00:01:46 2021 + Publishing website 2021/10/27 00:01:45 at commit 5cb634e --- .../documentation/basics/index.html| 26 +-- website/generated-content/documentation/index.xml | 38 ++ website/generated-content/sitemap.xml | 2 +- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/website/generated-content/documentation/basics/index.html b/website/generated-content/documentation/basics/index.html index 14a033f..8dc0acd 100644 --- a/website/generated-content/documentation/basics/index.html +++ b/website/generated-content/documentation/basics/index.html @@ -18,7 +18,7 @@ function addPlaceholder(){$('input:text').attr('placeholder',"What are you looking for?");} function endSearch(){var search=document.querySelector(".searchBar");search.classList.add("disappear");var icons=document.querySelector("#iconsBar");icons.classList.remove("disappear");} function blockScroll(){$("body").toggleClass("fixedPosition");} -function openMenu(){addPlaceholder();blockScroll();}DocumentationUsing the DocumentationDocumentationUsing the DocumentationPipeline - A pipeline is a user-constructed graph of transformations that defines the desired data processing operations.PCollection - A PCollection is a data set or data @@ -33,7 +33,10 @@ a PCollection. The schema for a PCollection defines el PCollection as an ordered list of named fields.SDK - A language-specific library that lets pipeline authors build transforms, construct their pipelines, and submit them to a runner.Runner - A runner runs a Beam pipeline using the capabilities of -your chosen data processing engine.The following sections cover these concepts in more detail and provide links to +your chosen data processing engine.Splittable DoFn - Splittable DoFns let you process +elements in a non-monolithic way. You can checkpoint the processing of an +element, and the runner can split the remaining work to yield additional +parallelism.The following sections cover these concepts in more detail and provide links to additional documentation.PipelineA Beam pipeline is a graph (specifically, a https://en.wikipedia.org/wiki/Directed_acyclic_graph>directed acyclic graph) of all the data and computations in your data processing task. This includes @@ -182,7 +185,24 @@ Flink runner translates a Beam pipeline into a Flink job. The Direct Runner runs pipelines locally so you can test, debug, and validate that your pipeline adheres to the Apache Beam model as closely as possible.For an up-to-date list of Beam runners and which features of the Apache Beam model they support, see the runner -capability matrix.For more information about runners, see the following pages:Choosing a RunnerBeam Capability MatrixLast updated on 2021/10/25Have you found everything you were looking for?Was it all useful and clear? Is there an [...] +capability matrix.For more information about runners, see the following pages:Choosing a RunnerBeam Capability MatrixSplittable DoFnSplittable DoFn (SDF) is a generalization of DoFn that lets you process +elements in a non-monolithic way. Splittable DoFn makes it easier to create +complex, modular I/O connectors in Beam.A regular ParDo processes an entire element at a time, applying your regular +DoFn and waiting for the call to terminate. When you instead apply a +splittable DoFn to each element, the runner has the option of splitting the +elements processing into smaller tasks. You can checkpoint the processing of an +element, and you can split the remaining work to yield additional parallelism.For example, imagine you want to read every line from very large text files. +When you write your splittable DoFn, you can have separate pieces of logic to +read a segment of a file, split a segment of a file into sub-segments, and +report progress through the current segment. The runner can then invoke your +splittable DoFn intelligently to split up each input and read portions +separately, in parallel.A common computation pattern has the following steps:The runner splits an incoming element before starting any processing.The runner starts running your processing logic on each sub-element.If the runner notices that some sub-elements are taking longer than others, +the runner splits those sub-elements further and repeats step 2.The sub-element either finishes processing, or the user chooses to +checkpoint the sub-element and the runner repeats step 2.You can also write your splittable DoFn so the runner
[beam] annotated tag sdks/v2.34.0-RC1 updated (f635f86 -> 8759203)
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a change to annotated tag sdks/v2.34.0-RC1 in repository https://gitbox.apache.org/repos/asf/beam.git. *** WARNING: tag sdks/v2.34.0-RC1 was modified! *** from f635f86 (commit) to 8759203 (tag) tagging f635f8683979027acadbf785c34396f1dceffdaf (commit) replaces jupyterlab-sidepanel-v1.0.0 by Kyle Weaver on Tue Oct 26 16:33:11 2021 -0700 - Log - sdks/v2.34.0-RC1 --- No new revisions were added by this update. Summary of changes:
[beam] annotated tag v2.34.0-RC1 updated (f635f86 -> f19a87b)
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a change to annotated tag v2.34.0-RC1 in repository https://gitbox.apache.org/repos/asf/beam.git. *** WARNING: tag v2.34.0-RC1 was modified! *** from f635f86 (commit) to f19a87b (tag) tagging f635f8683979027acadbf785c34396f1dceffdaf (commit) replaces jupyterlab-sidepanel-v1.0.0 by Kyle Weaver on Tue Oct 26 16:26:20 2021 -0700 - Log - v2.34.0-RC1 --- No new revisions were added by this update. Summary of changes:
[beam] branch master updated (63e21b1 -> 5cb634e)
This is an automated email from the ASF dual-hosted git repository. reuvenlax pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 63e21b1 [BEAM-11758] Update basics page: Splittable DoFn add 5cb634e [BEAM-13098] Fix translation of repeated TableRow fields (#15779) No new revisions were added by this update. Summary of changes: .../io/gcp/bigquery/TableRowToStorageApiProto.java | 18 ++ .../bigquery/TableRowToStorageApiProtoTest.java| 68 -- 2 files changed, 68 insertions(+), 18 deletions(-)
[beam] branch master updated (95d1f9a -> 63e21b1)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 95d1f9a [BEAM-4149] Ensure that we always provide and require the worker id. add a9ac63b [BEAM-11758] Update basics page: Splittable DoFn add 0fb4170 Merge branch 'master' into concepts4 add cbce59f Address review feedback add 63e21b1 [BEAM-11758] Update basics page: Splittable DoFn No new revisions were added by this update. Summary of changes: .../www/site/content/en/documentation/basics.md| 44 ++ 1 file changed, 44 insertions(+)
[beam] branch master updated (62b33a5 -> 95d1f9a)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 62b33a5 Change sql.Options to an interface under sqlx. (#15790) add 95d1f9a [BEAM-4149] Ensure that we always provide and require the worker id. No new revisions were added by this update. Summary of changes: .../worker/fn/BeamFnControlServiceTest.java| 25 +-- .../worker/fn/data/BeamFnDataGrpcServiceTest.java | 87 -- .../fn/logging/BeamFnLoggingServiceTest.java | 48 +++- .../control/FnApiControlClientPoolService.java | 6 -- .../environment/EmbeddedEnvironmentFactory.java| 3 +- .../provisioning/StaticGrpcProvisionService.java | 3 +- .../runners/fnexecution/EmbeddedSdkHarness.java| 4 +- .../control/FnApiControlClientPoolServiceTest.java | 54 +- .../fnexecution/control/RemoteExecutionTest.java | 7 +- .../StaticGrpcProvisionServiceTest.java| 5 +- .../status/BeamWorkerStatusGrpcServiceTest.java| 2 +- .../sdk/fn/channel}/AddHarnessIdInterceptor.java | 2 +- .../server/GrpcContextHeaderAccessorProvider.java | 11 +-- .../beam/fn/harness/ProcessBundleBenchmark.java| 7 +- .../java/org/apache/beam/fn/harness/FnHarness.java | 7 +- 15 files changed, 135 insertions(+), 136 deletions(-) rename sdks/java/{harness/src/main/java/org/apache/beam/fn/harness/control => fn-execution/src/main/java/org/apache/beam/sdk/fn/channel}/AddHarnessIdInterceptor.java (97%)
[beam] branch release-2.34.0 updated: [BEAM-13104] ParquetIO: SplitReadFn must read the whole block
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a commit to branch release-2.34.0 in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/release-2.34.0 by this push: new 2d655ea [BEAM-13104] ParquetIO: SplitReadFn must read the whole block new 621a4f9 Merge pull request #15806 from ibzib/parquet-cp 2d655ea is described below commit 2d655eac45d036dc23962d09e45642b77c6f5cea Author: Alexey Romanenko AuthorDate: Mon Oct 25 17:21:47 2021 +0200 [BEAM-13104] ParquetIO: SplitReadFn must read the whole block --- .../src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java | 6 +++--- .../src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java index 81f5978..c733f67 100644 --- a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java +++ b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java @@ -888,12 +888,12 @@ public class ParquetIO { continue; } if (record == null) { - // only happens with FilteredRecordReader at end of block + // it happens when a record is filtered out in this block LOG.debug( - "filtered record reader reached end of block in block {} in file {}", + "record is filtered out by reader in block {} in file {}", currentBlock, file.toString()); - break; + continue; } if (recordReader.shouldSkipCurrentRecord()) { // this record is being filtered via the filter2 package diff --git a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java index d2609b4..261abd9 100644 --- a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java +++ b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java @@ -574,7 +574,8 @@ public class ParquetIOTest implements Serializable { readPipeline.apply( ParquetIO.read(SCHEMA) .from(temporaryFolder.getRoot().getAbsolutePath() + "/*") -.withConfiguration(configuration)); +.withConfiguration(configuration) +.withSplit()); PAssert.that(readBack).containsInAnyOrder(expectedRecords); readPipeline.run().waitUntilFinish(); }
[beam] branch master updated (84e24ea -> 62b33a5)
This is an automated email from the ASF dual-hosted git repository. lostluck pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 84e24ea Merge pull request #15500 from [BEAM-6721] Set numShards dynamically for TextIO.write() add 62b33a5 Change sql.Options to an interface under sqlx. (#15790) No new revisions were added by this update. Summary of changes: sdks/go/pkg/beam/transforms/sql/sql.go | 41 ++-- sdks/go/pkg/beam/transforms/sql/sqlx/sqlx.go | 8 ++ 2 files changed, 28 insertions(+), 21 deletions(-)
[beam] branch master updated: [BEAM-6721] Set numShards dynamically for TextIO.write()
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new e096daf [BEAM-6721] Set numShards dynamically for TextIO.write() new 84e24ea Merge pull request #15500 from [BEAM-6721] Set numShards dynamically for TextIO.write() e096daf is described below commit e096daf9747a4837d8c054dcb384cf8d5c48023c Author: Minbo Bae AuthorDate: Fri Sep 10 01:08:04 2021 -0700 [BEAM-6721] Set numShards dynamically for TextIO.write() --- .../main/java/org/apache/beam/sdk/io/TextIO.java | 25 -- .../java/org/apache/beam/sdk/io/WriteFiles.java| 3 ++- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java index baae960..8f8a699 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java @@ -268,7 +268,6 @@ public class TextIO { .setDelimiter(new char[] {'\n'}) .setWritableByteChannelFactory(FileBasedSink.CompressionType.UNCOMPRESSED) .setWindowedWrites(false) -.setNumShards(0) .setNoSpilling(false) .build(); } @@ -623,7 +622,7 @@ public class TextIO { abstract @Nullable String getFooter(); /** Requested number of shards. 0 for automatic. */ -abstract int getNumShards(); +abstract @Nullable ValueProvider getNumShards(); /** The shard template of each file written, combined with prefix and suffix. */ abstract @Nullable String getShardTemplate(); @@ -689,7 +688,8 @@ public class TextIO { abstract Builder setFormatFunction( @Nullable SerializableFunction formatFunction); - abstract Builder setNumShards(int numShards); + abstract Builder setNumShards( + @Nullable ValueProvider numShards); abstract Builder setWindowedWrites(boolean windowedWrites); @@ -846,6 +846,14 @@ public class TextIO { */ public TypedWrite withNumShards(int numShards) { checkArgument(numShards >= 0); + return withNumShards(StaticValueProvider.of(numShards)); +} + +/** + * Like {@link #withNumShards(int)}. Specifying {@code null} means runner-determined sharding. + */ +public TypedWrite withNumShards( +@Nullable ValueProvider numShards) { return toBuilder().setNumShards(numShards).build(); } @@ -1002,7 +1010,7 @@ public class TextIO { getHeader(), getFooter(), getWritableByteChannelFactory())); - if (getNumShards() > 0) { + if (getNumShards() != null) { write = write.withNumShards(getNumShards()); } if (getWindowedWrites()) { @@ -1020,8 +1028,8 @@ public class TextIO { resolveDynamicDestinations().populateDisplayData(builder); builder - .addIfNotDefault( - DisplayData.item("numShards", getNumShards()).withLabel("Maximum Output Shards"), 0) + .addIfNotNull( + DisplayData.item("numShards", getNumShards()).withLabel("Maximum Output Shards")) .addIfNotNull( DisplayData.item("tempDirectory", getTempDirectory()) .withLabel("Directory for temporary files")) @@ -1139,6 +1147,11 @@ public class TextIO { return new Write(inner.withNumShards(numShards)); } +/** See {@link TypedWrite#withNumShards(ValueProvider)}. */ +public Write withNumShards(@Nullable ValueProvider numShards) { + return new Write(inner.withNumShards(numShards)); +} + /** See {@link TypedWrite#withoutSharding()}. */ public Write withoutSharding() { return new Write(inner.withoutSharding()); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java index 9dabb40..7afac9a 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java @@ -884,7 +884,8 @@ public abstract class WriteFiles shardCount = context.sideInput(numShardsView); } else { checkNotNull(getNumShardsProvider()); -shardCount = getNumShardsProvider().get(); +shardCount = +checkNotNull(getNumShardsProvider().get(), "Must have non-null number of shards."); } checkArgument( shardCount > 0,
[beam] branch master updated: [BEAM-13104] ParquetIO: SplitReadFn must read the whole block
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 136eadc [BEAM-13104] ParquetIO: SplitReadFn must read the whole block new 46c649a Merge pull request #15789 from aromanenko-dev/BEAM-13104-ParquetIO-filter 136eadc is described below commit 136eadc121e136e25aafc2b65f130526e7f20142 Author: Alexey Romanenko AuthorDate: Mon Oct 25 17:21:47 2021 +0200 [BEAM-13104] ParquetIO: SplitReadFn must read the whole block --- .../src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java | 6 +++--- .../src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java index 81f5978..c733f67 100644 --- a/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java +++ b/sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java @@ -888,12 +888,12 @@ public class ParquetIO { continue; } if (record == null) { - // only happens with FilteredRecordReader at end of block + // it happens when a record is filtered out in this block LOG.debug( - "filtered record reader reached end of block in block {} in file {}", + "record is filtered out by reader in block {} in file {}", currentBlock, file.toString()); - break; + continue; } if (recordReader.shouldSkipCurrentRecord()) { // this record is being filtered via the filter2 package diff --git a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java index d2609b4..261abd9 100644 --- a/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java +++ b/sdks/java/io/parquet/src/test/java/org/apache/beam/sdk/io/parquet/ParquetIOTest.java @@ -574,7 +574,8 @@ public class ParquetIOTest implements Serializable { readPipeline.apply( ParquetIO.read(SCHEMA) .from(temporaryFolder.getRoot().getAbsolutePath() + "/*") -.withConfiguration(configuration)); +.withConfiguration(configuration) +.withSplit()); PAssert.that(readBack).containsInAnyOrder(expectedRecords); readPipeline.run().waitUntilFinish(); }
[beam] branch master updated: Merge pull request #15744 from [BEAM-13072][Playground] Executor builder
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 49ae709 Merge pull request #15744 from [BEAM-13072][Playground] Executor builder 49ae709 is described below commit 49ae709e95c6f5fcd88dd4aba86398c80279 Author: daria.malkova AuthorDate: Tue Oct 26 10:21:54 2021 +0300 Merge pull request #15744 from [BEAM-13072][Playground] Executor builder * Add environment_service.go and structures for beam sdk, network envs and application envs * Add executor config structure to beam environment structure Fix tests, add new * add not yet supported * add java configuration file * Add executor builder * resolve conflicts with master * adapt server to new envs * Bugfix for a moved file * test bugfix Co-authored-by: Ilya Kozyrev --- playground/backend/internal/executors/executor.go | 97 -- .../backend/internal/executors/executor_builder.go | 151 +++ .../backend/internal/executors/executor_test.go| 207 + .../backend/internal/executors/java_helper.go | 69 --- .../backend/internal/executors/java_helper_test.go | 76 .../backend/internal/fs_tool/path_checker.go | 5 +- .../go_helper.go => validators/java_validators.go} | 28 ++- .../go_helper.go => validators/validator.go} | 16 +- 8 files changed, 417 insertions(+), 232 deletions(-) diff --git a/playground/backend/internal/executors/executor.go b/playground/backend/internal/executors/executor.go index b85641b..f34895b 100644 --- a/playground/backend/internal/executors/executor.go +++ b/playground/backend/internal/executors/executor.go @@ -17,81 +17,52 @@ package executors import ( - pb "beam.apache.org/playground/backend/internal/api/v1" - "beam.apache.org/playground/backend/internal/fs_tool" - "fmt" + "beam.apache.org/playground/backend/internal/validators" "os/exec" ) -type validatorWithArgs struct { - validator func(filePath string, args ...interface{}) error - args []interface{} +//CmdConfiguration for base cmd code execution +type CmdConfiguration struct { + fileNamestring + workingDir string + commandName string + commandArgs []string } -// Executor interface for all executors (Java/Python/Go/SCIO) +// Executor struct for all executors (Java/Python/Go/SCIO) type Executor struct { - relativeFilePath string - absoulteFilePath string - dirPath string - executableDirstring - validators []validatorWithArgs - compileName string - compileArgs []string - runName string - runArgs []string + compileArgs CmdConfiguration + runArgs CmdConfiguration + validators []validators.Validator } -// Validate checks that the file exists and that extension of the file matches the SDK. -// Return result of validation (true/false) and error if it occurs -func (ex *Executor) Validate() error { - for _, validator := range ex.validators { - err := validator.validator(ex.absoulteFilePath, validator.args...) - if err != nil { - return err +// Validate return the function that apply all validators of executor +func (ex *Executor) Validate() func() error { + return func() error { + for _, validator := range ex.validators { + err := validator.Validator(validator.Args...) + if err != nil { + return err + } } + return nil } - return nil } -// Compile compiles the code and creates executable file. -// Return error if it occurs -func (ex *Executor) Compile() error { - args := append(ex.compileArgs, ex.relativeFilePath) - cmd := exec.Command(ex.compileName, args...) - cmd.Dir = ex.dirPath - s := cmd.String() - fmt.Println(s) - out, err := cmd.CombinedOutput() - if err != nil { - return {string(out)} - } - return nil -} - -// Run runs the executable file. -// Return logs and error if it occurs -func (ex *Executor) Run(name string) (string, error) { - args := append(ex.runArgs, name) - cmd := exec.Command(ex.runName, args...) - cmd.Dir = ex.dirPath - out, err := cmd.Output() - return string(out), err -} - -// NewExecutor executes the compilation, running and validation of code -func NewExecutor(apacheBeamSdk pb.Sdk, fs *fs_tool.LifeCycle) (*Executor, error) { - switch apacheBeamSdk { - case pb.Sdk_SDK_JAVA: - return NewJavaExecutor(fs, GetJavaValidators()), nil -