[beam] branch nightly-refs/heads/master updated (e9afde092c1 -> 64a92d73895)

2022-04-08 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch nightly-refs/heads/master
in repository https://gitbox.apache.org/repos/asf/beam.git


from e9afde092c1 Merge pull request #17268: [BEAM-10529] enable nullable 
for kafkaIO xlang
 add 4070ea50496 Update python beam-master container image. (#17313)
 add 2db2f6e1fb3 [BEAM-14236] Parquet IO support for list to conform with 
Apache Parquet specification for Python SDK.
 add 5b0e92f6914 Merge pull request #17279 from Shiv22Wabale/BEAM-14236
 add a1780922bec [BEAM-13767] Remove eclipse plugin as it generates a lot 
of unused tasks (#17106)
 add 1a22a935683 [BEAM-11714] Change spotBugs jenkins config
 add 8f7713cb06d [BEAM-11714] Add dummy class for testing
 add e607c81efb0 [BEAM-11714] Remove dummy class used for testing
 add 1d51c8f9d24 [BEAM-11714] Spotbugs print toJenkins UI precommit_Java17
 add 6ae6f310341 Merge pull request #17273 from benWize/BEAM-11714
 add 973752b07ad Nit: correct description for precommit cron jobs.
 add f7be5ae5a47 Merge pull request #17276 from ibzib/job-description
 add 70f9820e0ce [BEAM-10708] Updated beam_sql error message (#17314)
 add 2dc2ad455d1 [BEAM-14281] add as_deterministic_coder to nullable coder 
(#17322)
 add e7039b64644 Cleanup docs on Shared.
 add c1c4fb3bfaf Merge pull request #17272 Cleanup docs on Shared.
 add 2d27f44f581 Improvements to Beam/Spark quickstart. (#17129)
 add 18d41d22a84 Disable BigQueryIOStorageWriteIT for Runner v2 test suite
 add 64a92d73895 Merge pull request #17325: [BEAM-14263] Disable 
BigQueryIOStorageWriteIT for Runner v2 test suite

No new revisions were added by this update.

Summary of changes:
 .test-infra/jenkins/PrecommitJobBuilder.groovy |  4 +-
 .test-infra/jenkins/job_PreCommit_Java.groovy  |  6 +--
 .test-infra/jenkins/job_PreCommit_SQL.groovy   |  6 +--
 .../jenkins/job_PreCommit_SQL_Java11.groovy|  6 +--
 .../jenkins/job_PreCommit_SQL_Java17.groovy|  6 +--
 .../org/apache/beam/gradle/BeamModulePlugin.groovy |  6 ---
 runners/google-cloud-dataflow-java/build.gradle|  1 +
 sdks/python/apache_beam/coders/coders.py   |  8 +++
 sdks/python/apache_beam/coders/coders_test.py  | 16 ++
 sdks/python/apache_beam/io/parquetio.py| 21 +++-
 sdks/python/apache_beam/io/parquetio_test.py   | 58 +-
 .../apache_beam/runners/dataflow/internal/names.py |  4 +-
 .../runners/interactive/sql/beam_sql_magics.py | 14 --
 .../apache_beam/runners/interactive/sql/utils.py   |  3 +-
 .../runners/interactive/sql/utils_test.py  | 11 
 sdks/python/apache_beam/utils/shared.py| 31 +---
 .../www/site/content/en/get-started/from-spark.md  | 22 +---
 17 files changed, 162 insertions(+), 61 deletions(-)



svn commit: r53709 - /dev/beam/2.38.0/python/

2022-04-08 Thread danoliveira
Author: danoliveira
Date: Sat Apr  9 03:55:41 2022
New Revision: 53709

Log:
Staging Python artifacts for Apache Beam 2.38.0 RC1

Added:
dev/beam/2.38.0/python/
dev/beam/2.38.0/python/apache-beam-2.38.0.zip   (with props)
dev/beam/2.38.0/python/apache-beam-2.38.0.zip.asc
dev/beam/2.38.0/python/apache-beam-2.38.0.zip.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-macosx_10_9_x86_64.whl 
  (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-macosx_10_9_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-macosx_10_9_x86_64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_i686.whl   
(with props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_i686.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_i686.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_x86_64.whl  
 (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux1_x86_64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_i686.whl 
  (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_i686.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_i686.whl.sha512

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_x86_64.whl   
(with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2010_x86_64.whl.sha512

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2014_aarch64.whl  
 (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2014_aarch64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-manylinux2014_aarch64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win32.whl   (with 
props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win32.whl.asc
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win32.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win_amd64.whl   (with 
props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win_amd64.whl.asc
dev/beam/2.38.0/python/apache_beam-2.38.0-cp36-cp36m-win_amd64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-macosx_10_9_x86_64.whl 
  (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-macosx_10_9_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-macosx_10_9_x86_64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_i686.whl   
(with props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_i686.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_i686.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_x86_64.whl  
 (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux1_x86_64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_i686.whl 
  (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_i686.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_i686.whl.sha512

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_x86_64.whl   
(with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2010_x86_64.whl.sha512

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2014_aarch64.whl  
 (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2014_aarch64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-manylinux2014_aarch64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win32.whl   (with 
props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win32.whl.asc
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win32.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win_amd64.whl   (with 
props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win_amd64.whl.asc
dev/beam/2.38.0/python/apache_beam-2.38.0-cp37-cp37m-win_amd64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-macosx_10_9_x86_64.whl  
 (with props)

dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-macosx_10_9_x86_64.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-macosx_10_9_x86_64.whl.sha512
dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-manylinux1_i686.whl   
(with props)
dev/beam/2.38.0/python/apache_beam-2.38.0-cp38-cp38-manylinux1_i686.whl.asc

dev/beam/2.38.0/python/apache_beam-2.38.0-c

svn commit: r53708 - in /dev/beam/2.38.0: ./ apache-beam-2.38.0-source-release.zip apache-beam-2.38.0-source-release.zip.asc apache-beam-2.38.0-source-release.zip.sha512

2022-04-08 Thread danoliveira
Author: danoliveira
Date: Sat Apr  9 03:51:38 2022
New Revision: 53708

Log:
Staging Java artifacts for Apache Beam 2.38.0 RC1

Added:
dev/beam/2.38.0/
dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip   (with props)
dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc
dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512

Added: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip
==
Binary file - no diff available.

Propchange: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip
--
svn:mime-type = application/octet-stream

Added: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc
==
--- dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc (added)
+++ dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.asc Sat Apr  9 
03:51:38 2022
@@ -0,0 +1,16 @@
+-BEGIN PGP SIGNATURE-
+
+iQIzBAABCgAdFiEE0Oe2nZEa2jwEgrqhxOay+McddC8FAmJRAo4ACgkQxOay+Mcd
+dC8CyQ//UHmas5ahuv8ZkvKCXJwSetDub4o47Lvum7JR8CiBIppH3UwK0UOjx8Ot
+CdzNS//zkOhay2UQeEW4DLNBpiJ16IDZ3IGTnWmp3RILtxOd03zutZBFhGYoNsrb
+DPEaz3vZrMnhhrw4QZd16cXCcH/vYyIU8sdmaJbKV7RFRaGYvfn4fVnzsZWXY5Qv
+oyToUr3ceyWCOp+lJrUvi1g6D48ZVqKODehqfHWLf1NXUDppnoHaVI7Kqr1JohEi
+KtYO9lw3zjWC21MexK3EQl1pi5nNhlY3NGKtRlgG6HrMOHLdc0XHo4QtIC3jjVzc
+sChHkwnlSgGN3XqhQgEsNjxH6BffEo2EjPIkGQDu2jg6D89ndu09pGTZrwartAI6
+Fp3501qTCK68JJcXAz3nj/p/Pz7HIkGGtsaWcKvGA0lbqfWEfK3ivnrzdiSJ5v99
+SkJce0WvDxXnalyF7iv9H8rFVKavU6bOEo0DFfnFh8mAORX2l103UAL8CdU7CZMD
+qQYp6aE1DoL7ksUxChc81IpmkdHI9YwfB9N6hIjncEmTi+deGSkYnBRsn5dRMjek
+7BtcNiLAkFkcOJUTZPxYsDgwbOS5OdhPHf3iVvuSFwxKN9TDbi772UrB6Mi1hrgh
+NvCrcSgMLiIFU4WZTrEpHJBVuv4N5xr5shRWIrmrG6CY0jzHrN4=
+=zLP4
+-END PGP SIGNATURE-

Added: dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512
==
--- dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512 (added)
+++ dev/beam/2.38.0/apache-beam-2.38.0-source-release.zip.sha512 Sat Apr  9 
03:51:38 2022
@@ -0,0 +1 @@
+56d156e07955241cc86c591b1146a931db6281517991efe0654b32d86076fe1b9ff68bd56ca4ab442dc9b1a157f6430302b4bcc59280ef20325acb17c139720c
  apache-beam-2.38.0-source-release.zip




[beam] branch master updated (2d27f44f581 -> 64a92d73895)

2022-04-08 Thread chamikara
This is an automated email from the ASF dual-hosted git repository.

chamikara pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 2d27f44f581 Improvements to Beam/Spark quickstart. (#17129)
 add 18d41d22a84 Disable BigQueryIOStorageWriteIT for Runner v2 test suite
 add 64a92d73895 Merge pull request #17325: [BEAM-14263] Disable 
BigQueryIOStorageWriteIT for Runner v2 test suite

No new revisions were added by this update.

Summary of changes:
 runners/google-cloud-dataflow-java/build.gradle | 1 +
 1 file changed, 1 insertion(+)



[beam] branch pr-bot-state updated: Updating config from bot

2022-04-08 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new bc0ec5f9f48 Updating config from bot
bc0ec5f9f48 is described below

commit bc0ec5f9f483ee449f11da671fb6f92b8987eb2f
Author: github-actions 
AuthorDate: Fri Apr 8 23:04:31 2022 +

Updating config from bot
---
 scripts/ci/pr-bot/state/reviewers-for-label-go.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/pr-bot/state/reviewers-for-label-go.json 
b/scripts/ci/pr-bot/state/reviewers-for-label-go.json
index 6a80cc72870..59ef2533a65 100644
--- a/scripts/ci/pr-bot/state/reviewers-for-label-go.json
+++ b/scripts/ci/pr-bot/state/reviewers-for-label-go.json
@@ -2,7 +2,7 @@
   "label": "go",
   "dateOfLastReviewAssignment": {
 "lostluck": 1649273609426,
-"jrmccluskey": 1649084918234,
+"jrmccluskey": 1649459069353,
 "youngoli": 1649271861723,
 "damccorm": 1649263365896,
 "riteshghorse": 1649354699193



[beam] branch pr-bot-state updated: Updating config from bot

2022-04-08 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch pr-bot-state
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/pr-bot-state by this push:
 new d19b53430dd Updating config from bot
d19b53430dd is described below

commit d19b53430dd14dbe23a9310d9b73501247f02eb1
Author: github-actions 
AuthorDate: Fri Apr 8 23:04:30 2022 +

Updating config from bot
---
 scripts/ci/pr-bot/state/pr-state/pr-17324.json | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/scripts/ci/pr-bot/state/pr-state/pr-17324.json 
b/scripts/ci/pr-bot/state/pr-state/pr-17324.json
new file mode 100644
index 000..1c2c2e31975
--- /dev/null
+++ b/scripts/ci/pr-bot/state/pr-state/pr-17324.json
@@ -0,0 +1,10 @@
+{
+  "commentedAboutFailingChecks": false,
+  "reviewersAssignedForLabels": {
+"go": "jrmccluskey"
+  },
+  "nextAction": "Reviewers",
+  "stopReviewerNotifications": false,
+  "remindAfterTestsPass": [],
+  "committerAssigned": false
+}
\ No newline at end of file



[beam] branch asf-site updated: Publishing website 2022/04/08 22:17:40 at commit 2d27f44

2022-04-08 Thread git-site-role
This is an automated email from the ASF dual-hosted git repository.

git-site-role pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 3a202e7e536 Publishing website 2022/04/08 22:17:40 at commit 2d27f44
3a202e7e536 is described below

commit 3a202e7e5364a61f3c1b9cf7b9f05acdc838de76
Author: jenkins 
AuthorDate: Fri Apr 8 22:17:41 2022 +

Publishing website 2022/04/08 22:17:40 at commit 2d27f44
---
 .../get-started/from-spark/index.html  | 26 +-
 website/generated-content/get-started/index.xml| 26 +-
 website/generated-content/sitemap.xml  |  2 +-
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/website/generated-content/get-started/from-spark/index.html 
b/website/generated-content/get-started/from-spark/index.html
index 14a5fadc834..659e1951f3a 100644
--- a/website/generated-content/get-started/from-spark/index.html
+++ b/website/generated-content/get-started/from-spark/index.html
@@ -19,8 +19,8 @@ function 
addPlaceholder(){$('input:text').attr('placeholder',"What are you looki
 function endSearch(){var 
search=document.querySelector(".searchBar");search.classList.add("disappear");var
 icons=document.querySelector("#iconsBar");icons.classList.remove("disappear");}
 function blockScroll(){$("body").toggleClass("fixedPosition");}
 function openMenu(){addPlaceholder();blockScroll();}Get 
startedBeam 
OverviewTour of 
BeamApache Beam is familiar.
-The Beam and Spark APIs are similar, so you already know the basic 
concepts.Spark stores data Spark DataFrames for structured data,
+using Beam should be easy.
+The basic concepts are the same, and the APIs are similar as well.Spark 
stores data Spark DataFrames for structured data,
 and in Resilient Distributed Datasets (RDD) for unstructured data.
 We are using RDDs for this guide.A Spark RDD represents a collection of 
elements,
 while in Beam it’s called a Parallel Collection (PCollection).
@@ -46,7 +46,8 @@ methods like data.map(...), but they’re 
doing the same thing.
 | beam.Map(print)
 )ℹ️ 
Note that we called print inside a Map transform.
 That’s because we can only access the elements of a PCollection
-from within a PTransform.Another thing to note is that 
Beam pipelines are constructed lazily.
+from within a PTransform.
+To inspect the data locally, you can use the https://cloud.google.com/dataflow/docs/guides/interactive-pipeline-development#creating_your_pipeline>InteractiveRunnerAnother
 thing to note is that Beam pipelines are constructed lazily.
 This means that when you pipe | data you’re only declaring 
the
 transformations and the order you want them to happen,
 but the actual computation doesn’t happen.
@@ -74,10 +75,11 @@ we can’t guarantee that the results we’ve 
calculated are available a
 
 sc = pyspark.SparkContext()
 values = sc.parallelize([1, 2, 3, 4])
-total = values.reduce(lambda x, y: x + y)
+min_value = values.reduce(min)
+max_value = values.reduce(max)
 
-# We can simply use `total` since it's already a Python 
`int` value from `reduce`.
-scaled_values = values.map(lambda x: x / total)
+# We can simply use `min_value` and `max_value` since it's 
already a Python `int` value from `reduce`.
+scaled_values = values.map(lambda x: (x - min_value) / (max_value - min_value))
 
 # But to access `scaled_values`, we need to call 
`collect`.
 print(scaled_values.collect())In Beam the results from 
all transforms result in a PCollection.
@@ -93,15 +95,19 @@ and access them as an https://docs.python.org/3/glossary.html#term-itera
 
 with beam.Pipeline() as pipeline:
 values = pipeline | beam.Create([1, 2, 3, 4])
-total = values | beam.CombineGlobally(sum)
+min_value = values | beam.CombineGlobally(min)
+max_value = values | beam.CombineGlobally(max)
 
 # To access `total`, we need to pass it as a side 
input.
 scaled_values = values | beam.Map(
-lambda x, total: x / total,
-total=beam.pvalue.AsSingleton(total))
+lambda x, min_value, max_value: x / lambda x: (x - min_value) / (max_va [...]
+min_value =beam.pvalue.AsSingleton(min_value),
+max_value =beam.pvalue.AsSingleton(max_value))
 
 scaled_values | beam.Map(print)ℹ️ In Beam we 
need to pass a side input explicitly, but we get the
-benefit that a reduction or aggregation does not have to fit into 
memory.Next StepsTake a look at 
all the available transforms in the Python transform 
gallery.Learn how to read from and write to files in the Pipeline I/O 
section of the Programming guideWalk through additional 
WordCount  [...]
+benefit that a reduction or aggregation does not have to fit into 
memory.
+Lazily computing side inputs also allows us to compute values 
only once,
+rather than for each distinct reduction (or requiring explicit caching of the 
RDD).Next StepsTake a look at 
all the available

[beam] branch master updated (c1c4fb3bfaf -> 2d27f44f581)

2022-04-08 Thread robertwb
This is an automated email from the ASF dual-hosted git repository.

robertwb pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from c1c4fb3bfaf Merge pull request #17272 Cleanup docs on Shared.
 add 2d27f44f581 Improvements to Beam/Spark quickstart. (#17129)

No new revisions were added by this update.

Summary of changes:
 .../www/site/content/en/get-started/from-spark.md  | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)



[beam] branch master updated (2dc2ad455d1 -> c1c4fb3bfaf)

2022-04-08 Thread robertwb
This is an automated email from the ASF dual-hosted git repository.

robertwb pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from 2dc2ad455d1 [BEAM-14281] add as_deterministic_coder to nullable coder 
(#17322)
 add e7039b64644 Cleanup docs on Shared.
 add c1c4fb3bfaf Merge pull request #17272 Cleanup docs on Shared.

No new revisions were added by this update.

Summary of changes:
 sdks/python/apache_beam/utils/shared.py | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)



[beam] branch master updated: [BEAM-14281] add as_deterministic_coder to nullable coder (#17322)

2022-04-08 Thread bhulette
This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 2dc2ad455d1 [BEAM-14281] add as_deterministic_coder to nullable coder 
(#17322)
2dc2ad455d1 is described below

commit 2dc2ad455d1a5a6877da851e05ad90d67c084871
Author: johnjcasey <95318300+johnjca...@users.noreply.github.com>
AuthorDate: Fri Apr 8 16:46:58 2022 -0400

[BEAM-14281] add as_deterministic_coder to nullable coder (#17322)

* [BEAM-14281] add as_deterministic_coder to nullable coder

* Update coders_test.py

Co-authored-by: Robert Bradshaw 
---
 sdks/python/apache_beam/coders/coders.py  |  8 
 sdks/python/apache_beam/coders/coders_test.py | 16 
 2 files changed, 24 insertions(+)

diff --git a/sdks/python/apache_beam/coders/coders.py 
b/sdks/python/apache_beam/coders/coders.py
index fce397df626..19463443386 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -627,6 +627,14 @@ class NullableCoder(FastCoder):
 # type: () -> bool
 return self._value_coder.is_deterministic()
 
+  def as_deterministic_coder(self, step_label, error_message=None):
+if self.is_deterministic():
+  return self
+else:
+  deterministic_value_coder = self._value_coder.as_deterministic_coder(
+  step_label, error_message)
+  return NullableCoder(deterministic_value_coder)
+
   def __eq__(self, other):
 return (
 type(self) == type(other) and self._value_coder == other._value_coder)
diff --git a/sdks/python/apache_beam/coders/coders_test.py 
b/sdks/python/apache_beam/coders/coders_test.py
index 49ad33202f0..0a30a320e90 100644
--- a/sdks/python/apache_beam/coders/coders_test.py
+++ b/sdks/python/apache_beam/coders/coders_test.py
@@ -21,7 +21,9 @@ import logging
 import unittest
 
 import proto
+import pytest
 
+from apache_beam import typehints
 from apache_beam.coders import proto2_coder_test_messages_pb2 as test_message
 from apache_beam.coders import coders
 from apache_beam.coders.avro_record import AvroRecord
@@ -220,6 +222,20 @@ class FallbackCoderTest(unittest.TestCase):
 self.assertEqual(DummyClass(), coder.decode(coder.encode(DummyClass(
 
 
+class NullableCoderTest(unittest.TestCase):
+  def test_determinism(self):
+deterministic = coders_registry.get_coder(typehints.Optional[int])
+deterministic.as_deterministic_coder('label')
+
+complex_deterministic = coders_registry.get_coder(
+typehints.Optional[DummyClass])
+complex_deterministic.as_deterministic_coder('label')
+
+nondeterministic = coders.NullableCoder(coders.Base64PickleCoder())
+with pytest.raises(ValueError):
+  nondeterministic.as_deterministic_coder('label')
+
+
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()



[beam] branch master updated (f7be5ae5a47 -> 70f9820e0ce)

2022-04-08 Thread ningk
This is an automated email from the ASF dual-hosted git repository.

ningk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from f7be5ae5a47 Merge pull request #17276 from ibzib/job-description
 add 70f9820e0ce [BEAM-10708] Updated beam_sql error message (#17314)

No new revisions were added by this update.

Summary of changes:
 .../apache_beam/runners/interactive/sql/beam_sql_magics.py | 14 +-
 sdks/python/apache_beam/runners/interactive/sql/utils.py   |  3 ++-
 .../apache_beam/runners/interactive/sql/utils_test.py  | 11 +++
 3 files changed, 22 insertions(+), 6 deletions(-)



[beam] branch master updated: Nit: correct description for precommit cron jobs.

2022-04-08 Thread ibzib
This is an automated email from the ASF dual-hosted git repository.

ibzib pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 973752b07ad Nit: correct description for precommit cron jobs.
 new f7be5ae5a47 Merge pull request #17276 from ibzib/job-description
973752b07ad is described below

commit 973752b07ad95847375b1731289d08c73ed20047
Author: Kyle Weaver 
AuthorDate: Mon Apr 4 16:39:10 2022 -0700

Nit: correct description for precommit cron jobs.

The default setting runs each job every 6 hours.
---
 .test-infra/jenkins/PrecommitJobBuilder.groovy | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.test-infra/jenkins/PrecommitJobBuilder.groovy 
b/.test-infra/jenkins/PrecommitJobBuilder.groovy
index df2ef22d193..5a0df8eae89 100644
--- a/.test-infra/jenkins/PrecommitJobBuilder.groovy
+++ b/.test-infra/jenkins/PrecommitJobBuilder.groovy
@@ -57,11 +57,11 @@ class PrecommitJobBuilder {
 definePhraseJob additionalCustomization
   }
 
-  /** Create a pre-commit job which runs on a daily schedule. */
+  /** Create a pre-commit job which runs on a regular schedule. */
   private void defineCronJob(Closure additionalCustomization) {
 def job = createBaseJob 'Cron'
 job.with {
-  description buildDescription('on a daily schedule.')
+  description buildDescription('on a regular schedule.')
   commonJobProperties.setAutoJob delegate
 }
 job.with additionalCustomization



[beam] branch master updated (a1780922bec -> 6ae6f310341)

2022-04-08 Thread kileysok
This is an automated email from the ASF dual-hosted git repository.

kileysok pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


from a1780922bec [BEAM-13767] Remove eclipse plugin as it generates a lot 
of unused tasks (#17106)
 new 1a22a935683 [BEAM-11714] Change spotBugs jenkins config
 new 8f7713cb06d [BEAM-11714] Add dummy class for testing
 new e607c81efb0 [BEAM-11714] Remove dummy class used for testing
 new 1d51c8f9d24 [BEAM-11714] Spotbugs print toJenkins UI precommit_Java17
 new 6ae6f310341 Merge pull request #17273 from benWize/BEAM-11714

The 35278 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .test-infra/jenkins/job_PreCommit_Java.groovy   | 6 ++
 .test-infra/jenkins/job_PreCommit_SQL.groovy| 6 ++
 .test-infra/jenkins/job_PreCommit_SQL_Java11.groovy | 6 ++
 .test-infra/jenkins/job_PreCommit_SQL_Java17.groovy | 6 ++
 4 files changed, 8 insertions(+), 16 deletions(-)



[beam] branch master updated: [BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks (#17106)

2022-04-08 Thread yichi
This is an automated email from the ASF dual-hosted git repository.

yichi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new a1780922bec [BEAM-13767] Remove eclipse plugin as it generates a lot 
of unused tasks (#17106)
a1780922bec is described below

commit a1780922becdaaf10859351aded1f7267c4dec70
Author: Yichi Zhang 
AuthorDate: Fri Apr 8 09:47:45 2022 -0700

[BEAM-13767] Remove eclipse plugin as it generates a lot of unused tasks 
(#17106)
---
 .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy  | 6 --
 1 file changed, 6 deletions(-)

diff --git 
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy 
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index cc1284af283..797d52f181a 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -1066,12 +1066,6 @@ class BeamModulePlugin implements Plugin {
   }
   project.check.dependsOn project.javadoc
 
-  // Apply the eclipse plugins.  This adds the "eclipse" task and
-  // connects the apt-eclipse plugin to update the eclipse project files
-  // with the instructions needed to run apt within eclipse to handle the 
AutoValue
-  // and additional annotations
-  project.apply plugin: 'eclipse'
-
   // Enables a plugin which can apply code formatting to source.
   project.apply plugin: "com.diffplug.spotless"
   // scan CVE



[beam] branch master updated: [BEAM-14236] Parquet IO support for list to conform with Apache Parquet specification for Python SDK.

2022-04-08 Thread heejong
This is an automated email from the ASF dual-hosted git repository.

heejong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
 new 2db2f6e1fb3 [BEAM-14236] Parquet IO support for list to conform with 
Apache Parquet specification for Python SDK.
 new 5b0e92f6914 Merge pull request #17279 from Shiv22Wabale/BEAM-14236
2db2f6e1fb3 is described below

commit 2db2f6e1fb3895a7003467451b9e6332880ed40a
Author: Luke Cwik 
AuthorDate: Mon Apr 4 14:32:41 2022 -0700

[BEAM-14236] Parquet IO support for list to conform with Apache Parquet 
specification for Python SDK.
---
 sdks/python/apache_beam/io/parquetio.py  | 21 +-
 sdks/python/apache_beam/io/parquetio_test.py | 58 +++-
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/io/parquetio.py 
b/sdks/python/apache_beam/io/parquetio.py
index 872140d5d7f..67edb832099 100644
--- a/sdks/python/apache_beam/io/parquetio.py
+++ b/sdks/python/apache_beam/io/parquetio.py
@@ -369,6 +369,7 @@ class WriteToParquet(PTransform):
   record_batch_size=1000,
   codec='none',
   use_deprecated_int96_timestamps=False,
+  use_compliant_nested_type=False,
   file_name_suffix='',
   num_shards=0,
   shard_name_template=None,
@@ -428,6 +429,7 @@ class WriteToParquet(PTransform):
 by the pyarrow specification is accepted.
   use_deprecated_int96_timestamps: Write nanosecond resolution timestamps 
to
 INT96 Parquet format. Defaults to False.
+  use_compliant_nested_type: Write compliant Parquet nested type (lists).
   file_name_suffix: Suffix for the files written.
   num_shards: The number of files (shards) used for output. If not set, the
 service will decide on the optimal number of shards.
@@ -456,6 +458,7 @@ class WriteToParquet(PTransform):
   row_group_buffer_size,
   record_batch_size,
   use_deprecated_int96_timestamps,
+  use_compliant_nested_type,
   file_name_suffix,
   num_shards,
   shard_name_template,
@@ -476,6 +479,7 @@ def _create_parquet_sink(
 row_group_buffer_size,
 record_batch_size,
 use_deprecated_int96_timestamps,
+use_compliant_nested_type,
 file_name_suffix,
 num_shards,
 shard_name_template,
@@ -488,6 +492,7 @@ def _create_parquet_sink(
 row_group_buffer_size,
 record_batch_size,
 use_deprecated_int96_timestamps,
+use_compliant_nested_type,
 file_name_suffix,
 num_shards,
 shard_name_template,
@@ -505,6 +510,7 @@ class _ParquetSink(filebasedsink.FileBasedSink):
   row_group_buffer_size,
   record_batch_size,
   use_deprecated_int96_timestamps,
+  use_compliant_nested_type,
   file_name_suffix,
   num_shards,
   shard_name_template,
@@ -528,6 +534,12 @@ class _ParquetSink(filebasedsink.FileBasedSink):
   f"codec. Your pyarrow version: {pa.__version__}")
 self._row_group_buffer_size = row_group_buffer_size
 self._use_deprecated_int96_timestamps = use_deprecated_int96_timestamps
+if use_compliant_nested_type and ARROW_MAJOR_VERSION < 4:
+  raise ValueError(
+  "With ARROW-11497, use_compliant_nested_type is only supported in "
+  "pyarrow version >= 4.x, please use a different pyarrow version. "
+  f"Your pyarrow version: {pa.__version__}")
+self._use_compliant_nested_type = use_compliant_nested_type
 self._buffer = [[] for _ in range(len(schema.names))]
 self._buffer_size = record_batch_size
 self._record_batches = []
@@ -536,11 +548,18 @@ class _ParquetSink(filebasedsink.FileBasedSink):
 
   def open(self, temp_path):
 self._file_handle = super().open(temp_path)
+if ARROW_MAJOR_VERSION < 4:
+  return pq.ParquetWriter(
+  self._file_handle,
+  self._schema,
+  compression=self._codec,
+  
use_deprecated_int96_timestamps=self._use_deprecated_int96_timestamps)
 return pq.ParquetWriter(
 self._file_handle,
 self._schema,
 compression=self._codec,
-use_deprecated_int96_timestamps=self._use_deprecated_int96_timestamps)
+use_deprecated_int96_timestamps=self._use_deprecated_int96_timestamps,
+use_compliant_nested_type=self._use_compliant_nested_type)
 
   def write_record(self, writer, value):
 if len(self._buffer[0]) >= self._buffer_size:
diff --git a/sdks/python/apache_beam/io/parquetio_test.py 
b/sdks/python/apache_beam/io/parquetio_test.py
index 0232bac45e0..6f47a0a5558 100644
--- a/sdks/python/apache_beam/io/parquetio_test.py
+++ b/sdks/python/apache_beam/io/parquetio_test.py
@@ -57,6 +57,8 @@ except ImportError:
   pl = None
   pq = None
 
+ARROW_MAJOR_VERSION, _, _ = map(int, pa.__version__.split('.'))
+
 
 @unittest.skipIf(pa is None, "PyArrow is not installed.")
 @pytest.mark.