[beam] branch nightly-refs/heads/master updated (e9afde092c1 -> 64a92d73895)
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to branch nightly-refs/heads/master in repository https://gitbox.apache.org/repos/asf/beam.git from e9afde092c1 Merge pull request #17268: [BEAM-10529] enable nullable for kafkaIO xlang add 4070ea50496 Update python beam-master container image. (#17313) add 2db2f6e1fb3 [BEAM-14236] Parquet IO support for list to conform with Apache Parquet specification for Python SDK. add 5b0e92f6914 Merge pull request #17279 from Shiv22Wabale/BEAM-14236 add a1780922bec [BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks (#17106) add 1a22a935683 [BEAM-11714] Change spotBugs jenkins config add 8f7713cb06d [BEAM-11714] Add dummy class for testing add e607c81efb0 [BEAM-11714] Remove dummy class used for testing add 1d51c8f9d24 [BEAM-11714] Spotbugs print toJenkins UI precommit_Java17 add 6ae6f310341 Merge pull request #17273 from benWize/BEAM-11714 add 973752b07ad Nit: correct description for precommit cron jobs. add f7be5ae5a47 Merge pull request #17276 from ibzib/job-description add 70f9820e0ce [BEAM-10708] Updated beam_sql error message (#17314) add 2dc2ad455d1 [BEAM-14281] add as_deterministic_coder to nullable coder (#17322) add e7039b64644 Cleanup docs on Shared. add c1c4fb3bfaf Merge pull request #17272 Cleanup docs on Shared. add 2d27f44f581 Improvements to Beam/Spark quickstart. (#17129) add 18d41d22a84 Disable BigQueryIOStorageWriteIT for Runner v2 test suite add 64a92d73895 Merge pull request #17325: [BEAM-14263] Disable BigQueryIOStorageWriteIT for Runner v2 test suite No new revisions were added by this update. Summary of changes: .test-infra/jenkins/PrecommitJobBuilder.groovy | 4 +- .test-infra/jenkins/job_PreCommit_Java.groovy | 6 +-- .test-infra/jenkins/job_PreCommit_SQL.groovy | 6 +-- .../jenkins/job_PreCommit_SQL_Java11.groovy| 6 +-- .../jenkins/job_PreCommit_SQL_Java17.groovy| 6 +-- .../org/apache/beam/gradle/BeamModulePlugin.groovy | 6 --- runners/google-cloud-dataflow-java/build.gradle| 1 + sdks/python/apache_beam/coders/coders.py | 8 +++ sdks/python/apache_beam/coders/coders_test.py | 16 ++ sdks/python/apache_beam/io/parquetio.py| 21 +++- sdks/python/apache_beam/io/parquetio_test.py | 58 +- .../apache_beam/runners/dataflow/internal/names.py | 4 +- .../runners/interactive/sql/beam_sql_magics.py | 14 -- .../apache_beam/runners/interactive/sql/utils.py | 3 +- .../runners/interactive/sql/utils_test.py | 11 sdks/python/apache_beam/utils/shared.py| 31 +--- .../www/site/content/en/get-started/from-spark.md | 22 +--- 17 files changed, 162 insertions(+), 61 deletions(-)
svn commit: r53709 - /dev/beam/2.38.0/python/
Author: danoliveira Date: Sat Apr 9 03:55:41 2022 New Revision: 53709 Log: Staging Python artifacts for Apache Beam 2.38.0 RC1 Added: dev/beam/2.38.0/python/ dev/beam/2.38.0/python/apache-beam-2.38.0.zip (with props) dev/beam/2.38.0/python/apache-beam-2.38.0.zip.asc dev/beam/2.38.0/python/apache-beam-2.38.0.zip.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-macosx_10_9_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-macosx_10_9_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-macosx_10_9_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_i686.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_i686.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_i686.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_i686.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_i686.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_i686.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2014_aarch64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2014_aarch64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2014_aarch64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win32.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win32.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win32.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win_amd64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win_amd64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win_amd64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-macosx_10_9_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-macosx_10_9_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-macosx_10_9_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_i686.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_i686.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_i686.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_i686.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_i686.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_i686.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2014_aarch64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2014_aarch64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2014_aarch64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win32.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win32.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win32.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win_amd64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win_amd64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win_amd64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-macosx_10_9_x86_64.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-macosx_10_9_x86_64.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-macosx_10_9_x86_64.whl.sha512 dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-manylinux1_i686.whl (with props) dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-manylinux1_i686.whl.asc dev/beam/2.38.0/python/apache_beam-2.38.0-c
svn commit: r53708 - in /dev/beam/2.38.0: ./ apache-beam-2.38.0-source-release.zip apache-beam-2.38.0-source-release.zip.asc apache-beam-2.38.0-source-release.zip.sha512
Author: danoliveira Date: Sat Apr 9 03:51:38 2022 New Revision: 53708 Log: Staging Java artifacts for Apache Beam 2.38.0 RC1 Added: dev/beam/2.38.0/ dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip (with props) dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512 Added: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip == Binary file - no diff available. Propchange: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip -- svn:mime-type = application/octet-stream Added: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc == --- dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc (added) +++ dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc Sat Apr 9 03:51:38 2022 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAABCgAdFiEE0Oe2nZEa2jwEgrqhxOay+McddC8FAmJRAo4ACgkQxOay+Mcd +dC8CyQ//UHmas5ahuv8ZkvKCXJwSetDub4o47Lvum7JR8CiBIppH3UwK0UOjx8Ot +CdzNS//zkOhay2UQeEW4DLNBpiJ16IDZ3IGTnWmp3RILtxOd03zutZBFhGYoNsrb +DPEaz3vZrMnhhrw4QZd16cXCcH/vYyIU8sdmaJbKV7RFRaGYvfn4fVnzsZWXY5Qv +oyToUr3ceyWCOp+lJrUvi1g6D48ZVqKODehqfHWLf1NXUDppnoHaVI7Kqr1JohEi +KtYO9lw3zjWC21MexK3EQl1pi5nNhlY3NGKtRlgG6HrMOHLdc0XHo4QtIC3jjVzc +sChHkwnlSgGN3XqhQgEsNjxH6BffEo2EjPIkGQDu2jg6D89ndu09pGTZrwartAI6 +Fp3501qTCK68JJcXAz3nj/p/Pz7HIkGGtsaWcKvGA0lbqfWEfK3ivnrzdiSJ5v99 +SkJce0WvDxXnalyF7iv9H8rFVKavU6bOEo0DFfnFh8mAORX2l103UAL8CdU7CZMD +qQYp6aE1DoL7ksUxChc81IpmkdHI9YwfB9N6hIjncEmTi+deGSkYnBRsn5dRMjek +7BtcNiLAkFkcOJUTZPxYsDgwbOS5OdhPHf3iVvuSFwxKN9TDbi772UrB6Mi1hrgh +NvCrcSgMLiIFU4WZTrEpHJBVuv4N5xr5shRWIrmrG6CY0jzHrN4= +=zLP4 +-END PGP SIGNATURE- Added: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512 == --- dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512 (added) +++ dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512 Sat Apr 9 03:51:38 2022 @@ -0,0 +1 @@ +56d156e07955241cc86c591b1146a931db6281517991efe0654b32d86076fe1b9ff68bd56ca4ab442dc9b1a157f6430302b4bcc59280ef20325acb17c139720c apache-beam-2.38.0-source-release.zip
[beam] branch master updated (2d27f44f581 -> 64a92d73895)
This is an automated email from the ASF dual-hosted git repository. chamikara pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git from 2d27f44f581 Improvements to Beam/Spark quickstart. (#17129) add 18d41d22a84 Disable BigQueryIOStorageWriteIT for Runner v2 test suite add 64a92d73895 Merge pull request #17325: [BEAM-14263] Disable BigQueryIOStorageWriteIT for Runner v2 test suite No new revisions were added by this update. Summary of changes: runners/google-cloud-dataflow-java/build.gradle | 1 + 1 file changed, 1 insertion(+)
[beam] branch pr-bot-state updated: Updating config from bot
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a commit to branch pr-bot-state in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/pr-bot-state by this push: new bc0ec5f9f48 Updating config from bot bc0ec5f9f48 is described below commit bc0ec5f9f483ee449f11da671fb6f92b8987eb2f Author: github-actions AuthorDate: Fri Apr 8 23:04:31 2022 + Updating config from bot --- scripts/ci/pr-bot/state/reviewers-for-label-go.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/pr-bot/state/reviewers-for-label-go.json b/scripts/ci/pr-bot/state/reviewers-for-label-go.json index 6a80cc72870..59ef2533a65 100644 --- a/scripts/ci/pr-bot/state/reviewers-for-label-go.json +++ b/scripts/ci/pr-bot/state/reviewers-for-label-go.json @@ -2,7 +2,7 @@ "label": "go", "dateOfLastReviewAssignment": { "lostluck": 1649273609426, -"jrmccluskey": 1649084918234, +"jrmccluskey": 1649459069353, "youngoli": 1649271861723, "damccorm": 1649263365896, "riteshghorse": 1649354699193
[beam] branch pr-bot-state updated: Updating config from bot
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a commit to branch pr-bot-state in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/pr-bot-state by this push: new d19b53430dd Updating config from bot d19b53430dd is described below commit d19b53430dd14dbe23a9310d9b73501247f02eb1 Author: github-actions AuthorDate: Fri Apr 8 23:04:30 2022 + Updating config from bot --- scripts/ci/pr-bot/state/pr-state/pr-17324.json | 10 ++ 1 file changed, 10 insertions(+) diff --git a/scripts/ci/pr-bot/state/pr-state/pr-17324.json b/scripts/ci/pr-bot/state/pr-state/pr-17324.json new file mode 100644 index 000..1c2c2e31975 --- /dev/null +++ b/scripts/ci/pr-bot/state/pr-state/pr-17324.json @@ -0,0 +1,10 @@ +{ + "commentedAboutFailingChecks": false, + "reviewersAssignedForLabels": { +"go": "jrmccluskey" + }, + "nextAction": "Reviewers", + "stopReviewerNotifications": false, + "remindAfterTestsPass": [], + "committerAssigned": false +} \ No newline at end of file
[beam] branch asf-site updated: Publishing website 2022/04/08 22:17:40 at commit 2d27f44
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new 3a202e7e536 Publishing website 2022/04/08 22:17:40 at commit 2d27f44 3a202e7e536 is described below commit 3a202e7e5364a61f3c1b9cf7b9f05acdc838de76 Author: jenkins AuthorDate: Fri Apr 8 22:17:41 2022 + Publishing website 2022/04/08 22:17:40 at commit 2d27f44 --- .../get-started/from-spark/index.html | 26 +- website/generated-content/get-started/index.xml| 26 +- website/generated-content/sitemap.xml | 2 +- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/website/generated-content/get-started/from-spark/index.html b/website/generated-content/get-started/from-spark/index.html index 14a5fadc834..659e1951f3a 100644 --- a/website/generated-content/get-started/from-spark/index.html +++ b/website/generated-content/get-started/from-spark/index.html @@ -19,8 +19,8 @@ function addPlaceholder(){$('input:text').attr('placeholder',"What are you looki function endSearch(){var search=document.querySelector(".searchBar");search.classList.add("disappear");var icons=document.querySelector("#iconsBar");icons.classList.remove("disappear");} function blockScroll(){$("body").toggleClass("fixedPosition");} function openMenu(){addPlaceholder();blockScroll();}Get startedBeam OverviewTour of BeamApache Beam is familiar. -The Beam and Spark APIs are similar, so you already know the basic concepts.Spark stores data Spark DataFrames for structured data, +using Beam should be easy. +The basic concepts are the same, and the APIs are similar as well.Spark stores data Spark DataFrames for structured data, and in Resilient Distributed Datasets (RDD) for unstructured data. We are using RDDs for this guide.A Spark RDD represents a collection of elements, while in Beam it’s called a Parallel Collection (PCollection). @@ -46,7 +46,8 @@ methods like data.map(...), but they’re doing the same thing. | beam.Map(print) )ℹ️ Note that we called print inside a Map transform. That’s because we can only access the elements of a PCollection -from within a PTransform.Another thing to note is that Beam pipelines are constructed lazily. +from within a PTransform. +To inspect the data locally, you can use the https://cloud.google.com/dataflow/docs/guides/interactive-pipeline-development#creating_your_pipeline>InteractiveRunnerAnother thing to note is that Beam pipelines are constructed lazily. This means that when you pipe | data you’re only declaring the transformations and the order you want them to happen, but the actual computation doesn’t happen. @@ -74,10 +75,11 @@ we can’t guarantee that the results we’ve calculated are available a sc = pyspark.SparkContext() values = sc.parallelize([1, 2, 3, 4]) -total = values.reduce(lambda x, y: x + y) +min_value = values.reduce(min) +max_value = values.reduce(max) -# We can simply use `total` since it's already a Python `int` value from `reduce`. -scaled_values = values.map(lambda x: x / total) +# We can simply use `min_value` and `max_value` since it's already a Python `int` value from `reduce`. +scaled_values = values.map(lambda x: (x - min_value) / (max_value - min_value)) # But to access `scaled_values`, we need to call `collect`. print(scaled_values.collect())In Beam the results from all transforms result in a PCollection. @@ -93,15 +95,19 @@ and access them as an https://docs.python.org/3/glossary.html#term-itera with beam.Pipeline() as pipeline: values = pipeline | beam.Create([1, 2, 3, 4]) -total = values | beam.CombineGlobally(sum) +min_value = values | beam.CombineGlobally(min) +max_value = values | beam.CombineGlobally(max) # To access `total`, we need to pass it as a side input. scaled_values = values | beam.Map( -lambda x, total: x / total, -total=beam.pvalue.AsSingleton(total)) +lambda x, min_value, max_value: x / lambda x: (x - min_value) / (max_va [...] +min_value =beam.pvalue.AsSingleton(min_value), +max_value =beam.pvalue.AsSingleton(max_value)) scaled_values | beam.Map(print)ℹ️ In Beam we need to pass a side input explicitly, but we get the -benefit that a reduction or aggregation does not have to fit into memory.Next StepsTake a look at all the available transforms in the Python transform gallery.Learn how to read from and write to files in the Pipeline I/O section of the Programming guideWalk through additional WordCount [...] +benefit that a reduction or aggregation does not have to fit into memory. +Lazily computing side inputs also allows us to compute values only once, +rather than for each distinct reduction (or requiring explicit caching of the RDD).Next StepsTake a look at all the available
[beam] branch master updated (c1c4fb3bfaf -> 2d27f44f581)
This is an automated email from the ASF dual-hosted git repository. robertwb pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git from c1c4fb3bfaf Merge pull request #17272 Cleanup docs on Shared. add 2d27f44f581 Improvements to Beam/Spark quickstart. (#17129) No new revisions were added by this update. Summary of changes: .../www/site/content/en/get-started/from-spark.md | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-)
[beam] branch master updated (2dc2ad455d1 -> c1c4fb3bfaf)
This is an automated email from the ASF dual-hosted git repository. robertwb pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git from 2dc2ad455d1 [BEAM-14281] add as_deterministic_coder to nullable coder (#17322) add e7039b64644 Cleanup docs on Shared. add c1c4fb3bfaf Merge pull request #17272 Cleanup docs on Shared. No new revisions were added by this update. Summary of changes: sdks/python/apache_beam/utils/shared.py | 31 --- 1 file changed, 12 insertions(+), 19 deletions(-)
[beam] branch master updated: [BEAM-14281] add as_deterministic_coder to nullable coder (#17322)
This is an automated email from the ASF dual-hosted git repository. bhulette pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 2dc2ad455d1 [BEAM-14281] add as_deterministic_coder to nullable coder (#17322) 2dc2ad455d1 is described below commit 2dc2ad455d1a5a6877da851e05ad90d67c084871 Author: johnjcasey <95318300+johnjca...@users.noreply.github.com> AuthorDate: Fri Apr 8 16:46:58 2022 -0400 [BEAM-14281] add as_deterministic_coder to nullable coder (#17322) * [BEAM-14281] add as_deterministic_coder to nullable coder * Update coders_test.py Co-authored-by: Robert Bradshaw --- sdks/python/apache_beam/coders/coders.py | 8 sdks/python/apache_beam/coders/coders_test.py | 16 2 files changed, 24 insertions(+) diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index fce397df626..19463443386 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -627,6 +627,14 @@ class NullableCoder(FastCoder): # type: () -> bool return self._value_coder.is_deterministic() + def as_deterministic_coder(self, step_label, error_message=None): +if self.is_deterministic(): + return self +else: + deterministic_value_coder = self._value_coder.as_deterministic_coder( + step_label, error_message) + return NullableCoder(deterministic_value_coder) + def __eq__(self, other): return ( type(self) == type(other) and self._value_coder == other._value_coder) diff --git a/sdks/python/apache_beam/coders/coders_test.py b/sdks/python/apache_beam/coders/coders_test.py index 49ad33202f0..0a30a320e90 100644 --- a/sdks/python/apache_beam/coders/coders_test.py +++ b/sdks/python/apache_beam/coders/coders_test.py @@ -21,7 +21,9 @@ import logging import unittest import proto +import pytest +from apache_beam import typehints from apache_beam.coders import proto2_coder_test_messages_pb2 as test_message from apache_beam.coders import coders from apache_beam.coders.avro_record import AvroRecord @@ -220,6 +222,20 @@ class FallbackCoderTest(unittest.TestCase): self.assertEqual(DummyClass(), coder.decode(coder.encode(DummyClass( +class NullableCoderTest(unittest.TestCase): + def test_determinism(self): +deterministic = coders_registry.get_coder(typehints.Optional[int]) +deterministic.as_deterministic_coder('label') + +complex_deterministic = coders_registry.get_coder( +typehints.Optional[DummyClass]) +complex_deterministic.as_deterministic_coder('label') + +nondeterministic = coders.NullableCoder(coders.Base64PickleCoder()) +with pytest.raises(ValueError): + nondeterministic.as_deterministic_coder('label') + + if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) unittest.main()
[beam] branch master updated (f7be5ae5a47 -> 70f9820e0ce)
This is an automated email from the ASF dual-hosted git repository. ningk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git from f7be5ae5a47 Merge pull request #17276 from ibzib/job-description add 70f9820e0ce [BEAM-10708] Updated beam_sql error message (#17314) No new revisions were added by this update. Summary of changes: .../apache_beam/runners/interactive/sql/beam_sql_magics.py | 14 +- sdks/python/apache_beam/runners/interactive/sql/utils.py | 3 ++- .../apache_beam/runners/interactive/sql/utils_test.py | 11 +++ 3 files changed, 22 insertions(+), 6 deletions(-)
[beam] branch master updated: Nit: correct description for precommit cron jobs.
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 973752b07ad Nit: correct description for precommit cron jobs. new f7be5ae5a47 Merge pull request #17276 from ibzib/job-description 973752b07ad is described below commit 973752b07ad95847375b1731289d08c73ed20047 Author: Kyle Weaver AuthorDate: Mon Apr 4 16:39:10 2022 -0700 Nit: correct description for precommit cron jobs. The default setting runs each job every 6 hours. --- .test-infra/jenkins/PrecommitJobBuilder.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.test-infra/jenkins/PrecommitJobBuilder.groovy b/.test-infra/jenkins/PrecommitJobBuilder.groovy index df2ef22d193..5a0df8eae89 100644 --- a/.test-infra/jenkins/PrecommitJobBuilder.groovy +++ b/.test-infra/jenkins/PrecommitJobBuilder.groovy @@ -57,11 +57,11 @@ class PrecommitJobBuilder { definePhraseJob additionalCustomization } - /** Create a pre-commit job which runs on a daily schedule. */ + /** Create a pre-commit job which runs on a regular schedule. */ private void defineCronJob(Closure additionalCustomization) { def job = createBaseJob 'Cron' job.with { - description buildDescription('on a daily schedule.') + description buildDescription('on a regular schedule.') commonJobProperties.setAutoJob delegate } job.with additionalCustomization
[beam] branch master updated (a1780922bec -> 6ae6f310341)
This is an automated email from the ASF dual-hosted git repository. kileysok pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git from a1780922bec [BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks (#17106) new 1a22a935683 [BEAM-11714] Change spotBugs jenkins config new 8f7713cb06d [BEAM-11714] Add dummy class for testing new e607c81efb0 [BEAM-11714] Remove dummy class used for testing new 1d51c8f9d24 [BEAM-11714] Spotbugs print toJenkins UI precommit_Java17 new 6ae6f310341 Merge pull request #17273 from benWize/BEAM-11714 The 35278 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .test-infra/jenkins/job_PreCommit_Java.groovy | 6 ++ .test-infra/jenkins/job_PreCommit_SQL.groovy| 6 ++ .test-infra/jenkins/job_PreCommit_SQL_Java11.groovy | 6 ++ .test-infra/jenkins/job_PreCommit_SQL_Java17.groovy | 6 ++ 4 files changed, 8 insertions(+), 16 deletions(-)
[beam] branch master updated: [BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks (#17106)
This is an automated email from the ASF dual-hosted git repository. yichi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new a1780922bec [BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks (#17106) a1780922bec is described below commit a1780922becdaaf10859351aded1f7267c4dec70 Author: Yichi Zhang AuthorDate: Fri Apr 8 09:47:45 2022 -0700 [BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks (#17106) --- .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 6 -- 1 file changed, 6 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index cc1284af283..797d52f181a 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -1066,12 +1066,6 @@ class BeamModulePlugin implements Plugin { } project.check.dependsOn project.javadoc - // Apply the eclipse plugins. This adds the "eclipse" task and - // connects the apt-eclipse plugin to update the eclipse project files - // with the instructions needed to run apt within eclipse to handle the AutoValue - // and additional annotations - project.apply plugin: 'eclipse' - // Enables a plugin which can apply code formatting to source. project.apply plugin: "com.diffplug.spotless" // scan CVE
[beam] branch master updated: [BEAM-14236] Parquet IO support for list to conform with Apache Parquet specification for Python SDK.
This is an automated email from the ASF dual-hosted git repository. heejong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 2db2f6e1fb3 [BEAM-14236] Parquet IO support for list to conform with Apache Parquet specification for Python SDK. new 5b0e92f6914 Merge pull request #17279 from Shiv22Wabale/BEAM-14236 2db2f6e1fb3 is described below commit 2db2f6e1fb3895a7003467451b9e6332880ed40a Author: Luke Cwik AuthorDate: Mon Apr 4 14:32:41 2022 -0700 [BEAM-14236] Parquet IO support for list to conform with Apache Parquet specification for Python SDK. --- sdks/python/apache_beam/io/parquetio.py | 21 +- sdks/python/apache_beam/io/parquetio_test.py | 58 +++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/io/parquetio.py b/sdks/python/apache_beam/io/parquetio.py index 872140d5d7f..67edb832099 100644 --- a/sdks/python/apache_beam/io/parquetio.py +++ b/sdks/python/apache_beam/io/parquetio.py @@ -369,6 +369,7 @@ class WriteToParquet(PTransform): record_batch_size=1000, codec='none', use_deprecated_int96_timestamps=False, + use_compliant_nested_type=False, file_name_suffix='', num_shards=0, shard_name_template=None, @@ -428,6 +429,7 @@ class WriteToParquet(PTransform): by the pyarrow specification is accepted. use_deprecated_int96_timestamps: Write nanosecond resolution timestamps to INT96 Parquet format. Defaults to False. + use_compliant_nested_type: Write compliant Parquet nested type (lists). file_name_suffix: Suffix for the files written. num_shards: The number of files (shards) used for output. If not set, the service will decide on the optimal number of shards. @@ -456,6 +458,7 @@ class WriteToParquet(PTransform): row_group_buffer_size, record_batch_size, use_deprecated_int96_timestamps, + use_compliant_nested_type, file_name_suffix, num_shards, shard_name_template, @@ -476,6 +479,7 @@ def _create_parquet_sink( row_group_buffer_size, record_batch_size, use_deprecated_int96_timestamps, +use_compliant_nested_type, file_name_suffix, num_shards, shard_name_template, @@ -488,6 +492,7 @@ def _create_parquet_sink( row_group_buffer_size, record_batch_size, use_deprecated_int96_timestamps, +use_compliant_nested_type, file_name_suffix, num_shards, shard_name_template, @@ -505,6 +510,7 @@ class _ParquetSink(filebasedsink.FileBasedSink): row_group_buffer_size, record_batch_size, use_deprecated_int96_timestamps, + use_compliant_nested_type, file_name_suffix, num_shards, shard_name_template, @@ -528,6 +534,12 @@ class _ParquetSink(filebasedsink.FileBasedSink): f"codec. Your pyarrow version: {pa.__version__}") self._row_group_buffer_size = row_group_buffer_size self._use_deprecated_int96_timestamps = use_deprecated_int96_timestamps +if use_compliant_nested_type and ARROW_MAJOR_VERSION < 4: + raise ValueError( + "With ARROW-11497, use_compliant_nested_type is only supported in " + "pyarrow version >= 4.x, please use a different pyarrow version. " + f"Your pyarrow version: {pa.__version__}") +self._use_compliant_nested_type = use_compliant_nested_type self._buffer = [[] for _ in range(len(schema.names))] self._buffer_size = record_batch_size self._record_batches = [] @@ -536,11 +548,18 @@ class _ParquetSink(filebasedsink.FileBasedSink): def open(self, temp_path): self._file_handle = super().open(temp_path) +if ARROW_MAJOR_VERSION < 4: + return pq.ParquetWriter( + self._file_handle, + self._schema, + compression=self._codec, + use_deprecated_int96_timestamps=self._use_deprecated_int96_timestamps) return pq.ParquetWriter( self._file_handle, self._schema, compression=self._codec, -use_deprecated_int96_timestamps=self._use_deprecated_int96_timestamps) +use_deprecated_int96_timestamps=self._use_deprecated_int96_timestamps, +use_compliant_nested_type=self._use_compliant_nested_type) def write_record(self, writer, value): if len(self._buffer[0]) >= self._buffer_size: diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py index 0232bac45e0..6f47a0a5558 100644 --- a/sdks/python/apache_beam/io/parquetio_test.py +++ b/sdks/python/apache_beam/io/parquetio_test.py @@ -57,6 +57,8 @@ except ImportError: pl = None pq = None +ARROW_MAJOR_VERSION, _, _ = map(int, pa.__version__.split('.')) + @unittest.skipIf(pa is None, "PyArrow is not installed.") @pytest.mark.