[beam] branch master updated: Add test for timestamp from date in default timezone
This is an automated email from the ASF dual-hosted git repository. robinyqiu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 4eee2dc Add test for timestamp from date in default timezone new 1aa4fb3 Merge pull request #12154 from robinyqiu/timezone 4eee2dc is described below commit 4eee2dc58d771ba16a826939278b935716e69553 Author: Yueyang Qiu AuthorDate: Wed Jul 1 12:21:26 2020 -0700 Add test for timestamp from date in default timezone --- .../sql/zetasql/ZetaSQLDialectSpecTest.java| 116 - 1 file changed, 66 insertions(+), 50 deletions(-) diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java index 4d47043..532103e 100644 --- a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java +++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSQLDialectSpecTest.java @@ -2793,56 +2793,6 @@ public class ZetaSQLDialectSpecTest extends ZetaSQLTestBase { } @Test - public void testSelectNullIntersectDistinct() { -String sql = "SELECT NULL INTERSECT DISTINCT SELECT 2"; - -ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); -BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql); -PCollection stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode); -System.err.println("SCHEMA " + stream.getSchema()); - -PAssert.that(stream).empty(); - pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES)); - } - - @Test - public void testSelectNullIntersectAll() { -String sql = "SELECT NULL INTERSECT ALL SELECT 2"; - -ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); -BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql); -PCollection stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode); -System.err.println("SCHEMA " + stream.getSchema()); - -PAssert.that(stream).empty(); - pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES)); - } - - @Test - public void testSelectNullExceptDistinct() { -String sql = "SELECT NULL EXCEPT DISTINCT SELECT 2"; - -ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); -BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql); -PCollection stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode); - -PAssert.that(stream).containsInAnyOrder(Row.nullRow(stream.getSchema())); - pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES)); - } - - @Test - public void testSelectNullExceptAll() { -String sql = "SELECT NULL EXCEPT ALL SELECT 2"; - -ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); -BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql); -PCollection stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode); - -PAssert.that(stream).containsInAnyOrder(Row.nullRow(stream.getSchema())); - pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES)); - } - - @Test public void testMultipleSelectStatementsThrowsException() { String sql = "SELECT 1; SELECT 2;"; ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); @@ -3087,6 +3037,22 @@ public class ZetaSQLDialectSpecTest extends ZetaSQLTestBase { } @Test + public void testTimestampFromDate() { +String sql = "SELECT TIMESTAMP(DATE '2014-01-31')"; + +ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); +BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql); +PCollection stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode); + +PAssert.that(stream) +.containsInAnyOrder( + Row.withSchema(Schema.builder().addDateTimeField("f_timestamp").build()) +.addValues(parseTimestampWithTimeZone("2014-01-31 00:00:00+00")) +.build()); + pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES)); + } + + @Test public void testTimestampAdd() { String sql = "SELECT " @@ -3850,6 +3816,56 @@ public class ZetaSQLDialectSpecTest extends ZetaSQLTestBase { } @Test + public void testSelectNullIntersectDistinct() { +String sql = "SELECT NULL INTERSECT DISTINCT SELECT 2"; + +ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config); +BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql); +PCollection stream =
[beam] tag nightly-master updated (23d0c47 -> 0fb562e)
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to tag nightly-master in repository https://gitbox.apache.org/repos/asf/beam.git. *** WARNING: tag nightly-master was modified! *** from 23d0c47 (commit) to 0fb562e (commit) from 23d0c47 Remove misleading unsupported features mention for Python streaming (#12140) add 09258f2 [BEAM-10321] retain environments in flatten for preventing it from being fused into stages running in foreign language SDKs add 1805c66 Merge pull request #12087 from [BEAM-10321] retain environments in flatten for preventing it from be… add c982a1c Bug fix: non 200 response streams are read twice which cause "Cannot read from closed stream" errors. add e183ea6 Merge pull request #12103 from lastomato/fix_api_client: Bug fix: non 200 response streams are read twice which cause "Cannot … add ac0f4f0 MetricFilter matching by Metric objects. add 407b014 Merge pull request #12123 from rainwoodman/query-by-metric: MetricFilter matching by Metric objects. add 1817402 Update ZetaSQL to 2020.06.1 release. add 4863968 Merge pull request #12138 from ibzib/zetasql-release add f1eebfa [BEAM-10291] Adding full thread dump upon lull detection (#12047) add 592ab72 [BEAM-10322] allow only single assignment to producing stages by pcollection map add 709c2c5 Merge pull request #12086 from [BEAM-10322] allow only single assignment to producing stages by pcol… add 78be15f Add nexmark option to allow cancel streaming query job after complete add ea928a2 Merge pull request #12119 from y1chi/nexmark add 46276b5 Change GroupIntoBatches to group for real add 8f86cec Merge pull request #12129 from aaltay/gib add a4f3019 [BEAM-10391] Add attribute map null check in dataflow worker (#12136) add 68a3a73 [BEAM-10327] Create a pattern that shows use of Schema using Joins (#12097) add a5c32f3 [BEAM-9953] [ZetaSQL] Implement CREATE FUNCTION and scalar UDF. (#12153) add e5ee374 [BEAM-9754] Add Py 3.8 support to Dataflow runner. add 190b69d Print a warning when users install version of Beam that has not been sufficiently tested. add d6fcc1e Merge pull request #12156 from tvalentyn/py38_support add 02b184e Moving to 2.24.0-SNAPSHOT on master branch. add ea4aa6b [BEAM-10318] fix uninitialized grpc_server in FnApiRunner add 05fea6b Merge pull request #12085 from [BEAM-10318] fix uninitialized grpc_server in FnApiRunner add ea6aa88 Add Branching to Go SDK katas (#12113) add 0fb562e Fix typo (#12160) No new revisions were added by this update. Summary of changes: CHANGES.md | 6 +- .../org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- .../apache/beam/examples/snippets/Snippets.java| 70 .../beam/examples/snippets/SnippetsTest.java | 71 gradle.properties | 4 +- .../Flatten => Branching/Branching}/cmd/main.go| 12 +- .../Branching}/go.mod | 4 +- .../Branching}/go.sum | 21 +--- .../Branching}/pkg/task/task.go| 29 +++-- .../Branching}/task-info.yaml | 24 ++-- .../Branching/Branching/task-remote-info.yaml | 2 + .../Core Transforms/Branching/Branching/task.md| 16 ++- .../Branching}/test/task_test.go | 18 +-- .../Core Transforms/Branching/lesson-info.yaml | 0 .../Branching/lesson-remote-info.yaml | 3 + .../katas/go/Core Transforms/section-info.yaml | 1 + learning/katas/go/course-remote-info.yaml | 2 +- .../beam/runners/dataflow/worker/PubsubSink.java | 12 +- .../sql/meta/provider/FullNameTableProvider.java | 2 +- sdks/java/extensions/sql/zetasql/build.gradle | 2 +- .../sdk/extensions/sql/zetasql/SqlAnalyzer.java| 68 ++- .../extensions/sql/zetasql/ZetaSQLPlannerImpl.java | 59 +++--- .../translation/AggregateScanConverter.java| 7 +- .../translation/ArrayScanColumnRefToUncollect.java | 4 +- .../zetasql/translation/ExpressionConverter.java | 94 .../zetasql/translation/FilterScanConverter.java | 4 +- .../sql/zetasql/translation/JoinScanConverter.java | 5 +- .../LimitOffsetScanToLimitConverter.java | 6 +- .../LimitOffsetScanToOrderByLimitConverter.java| 6 +- .../sql/zetasql/ZetaSQLDialectSpecTest.java| 125 + .../io/gcp/healthcare/HttpHealthcareApiClient.java | 22 ++-- .../beam/sdk/io/gcp/pubsub/PubsubMessage.java | 10 +- .../apache/beam/sdk/nexmark/NexmarkLauncher.java | 3 + .../apache/beam/sdk/nexmark/NexmarkOptions.java| 6 + sdks/python/apache_beam/__init__.py| 9 +- .../aggregation/groupintobatches_test.py | 12 +-
[beam] branch master updated (ea6aa88 -> 0fb562e)
This is an automated email from the ASF dual-hosted git repository. bhulette pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from ea6aa88 Add Branching to Go SDK katas (#12113) add 0fb562e Fix typo (#12160) No new revisions were added by this update. Summary of changes: CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[beam] branch lukecwik-fix-typo created (now 7d2cf5c)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch lukecwik-fix-typo in repository https://gitbox.apache.org/repos/asf/beam.git. at 7d2cf5c Fix typo This branch includes the following new commits: new 7d2cf5c Fix typo The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[beam] 01/01: Fix typo
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a commit to branch lukecwik-fix-typo in repository https://gitbox.apache.org/repos/asf/beam.git commit 7d2cf5c370f51321cb79a1e384d979e0755dc416 Author: Lukasz Cwik AuthorDate: Wed Jul 1 17:45:31 2020 -0700 Fix typo --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 7586734..4cbc90f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -76,7 +76,7 @@ Previously nulls could only be represented with explicit null values, as in `{"foo": "bar", "baz": null}`, whereas an implicit null like `{"foo": "bar"}` would raise an exception. Now both JSON strings will yield the same result by default. This behavior can be - overriden with `RowJson.RowJsonDeserializer#withNullBehavior`. + overridden with `RowJson.RowJsonDeserializer#withNullBehavior`. * Fixed a bug in `GroupIntoBatches` experimental transform in Python to actually group batches by key. This changes the output type for this transform ([BEAM-6696](https://issues.apache.org/jira/browse/BEAM-6696)). * X behavior was changed ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
[beam] branch master updated (05fea6b -> ea6aa88)
This is an automated email from the ASF dual-hosted git repository. lostluck pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 05fea6b Merge pull request #12085 from [BEAM-10318] fix uninitialized grpc_server in FnApiRunner add ea6aa88 Add Branching to Go SDK katas (#12113) No new revisions were added by this update. Summary of changes: .../Flatten => Branching/Branching}/cmd/main.go| 12 - .../Branching}/go.mod | 4 +-- .../Branching}/go.sum | 21 ++-- .../Branching}/pkg/task/task.go| 29 -- .../Branching}/task-info.yaml | 24 +- .../Branching/Branching/task-remote-info.yaml | 2 ++ .../Core Transforms/Branching/Branching/task.md| 16 +++- .../Branching}/test/task_test.go | 18 -- .../Core Transforms/Branching/lesson-info.yaml | 0 .../Branching/lesson-remote-info.yaml | 3 +++ .../katas/go/Core Transforms/section-info.yaml | 1 + learning/katas/go/course-remote-info.yaml | 2 +- 12 files changed, 71 insertions(+), 61 deletions(-) copy learning/katas/go/Core Transforms/{Flatten/Flatten => Branching/Branching}/cmd/main.go (83%) copy learning/katas/go/Core Transforms/{Additional Outputs/Additional Outputs => Branching/Branching}/go.mod (87%) copy learning/katas/go/Core Transforms/{Additional Outputs/Additional Outputs => Branching/Branching}/go.sum (95%) copy learning/katas/go/Core Transforms/{Additional Outputs/Additional Outputs => Branching/Branching}/pkg/task/task.go (64%) copy learning/katas/go/Core Transforms/{Combine/CombineFn => Branching/Branching}/task-info.yaml (87%) create mode 100644 learning/katas/go/Core Transforms/Branching/Branching/task-remote-info.yaml copy learning/katas/{java => go}/Core Transforms/Branching/Branching/task.md (64%) copy learning/katas/go/Core Transforms/{Map/ParDo => Branching/Branching}/test/task_test.go (67%) copy learning/katas/{python => go}/Core Transforms/Branching/lesson-info.yaml (100%) create mode 100644 learning/katas/go/Core Transforms/Branching/lesson-remote-info.yaml
[beam] branch master updated: [BEAM-10318] fix uninitialized grpc_server in FnApiRunner
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new ea4aa6b [BEAM-10318] fix uninitialized grpc_server in FnApiRunner new 05fea6b Merge pull request #12085 from [BEAM-10318] fix uninitialized grpc_server in FnApiRunner ea4aa6b is described below commit ea4aa6b64d88f59ba299605d5ea296d32b1873a4 Author: Heejong Lee AuthorDate: Wed Jun 24 18:25:48 2020 -0700 [BEAM-10318] fix uninitialized grpc_server in FnApiRunner --- .../apache_beam/runners/portability/fn_api_runner/worker_handlers.py| 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py index 5bf3282..9d27549 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py @@ -824,6 +824,8 @@ class WorkerHandlerManager(object): self._grpc_server = GrpcServer( self.state_servicer, self._job_provision_info, self) grpc_server = self._grpc_server +else: + grpc_server = self._grpc_server worker_handler_list = self._cached_handlers[environment_id] if len(worker_handler_list) < num_workers:
[beam] branch release-2.23.0 created (now 7a8ec98)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a change to branch release-2.23.0 in repository https://gitbox.apache.org/repos/asf/beam.git. at 7a8ec98 Create release branch for version 2.23.0. This branch includes the following new commits: new 7a8ec98 Create release branch for version 2.23.0. The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[beam] 01/01: Create release branch for version 2.23.0.
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch release-2.23.0 in repository https://gitbox.apache.org/repos/asf/beam.git commit 7a8ec98715265f04c88972c3880b35d9eca646b6 Author: Valentyn Tymofieiev AuthorDate: Wed Jul 1 16:42:06 2020 -0700 Create release branch for version 2.23.0. --- gradle.properties | 2 +- runners/google-cloud-dataflow-java/build.gradle | 2 +- sdks/python/apache_beam/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gradle.properties b/gradle.properties index 22f2c25..2a27030 100644 --- a/gradle.properties +++ b/gradle.properties @@ -24,7 +24,7 @@ signing.gnupg.executable=gpg signing.gnupg.useLegacyGpg=true version=2.23.0-SNAPSHOT -sdk_version=2.23.0.dev +sdk_version=2.23.0 javaVersion=1.8 diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index afb3071..41d111d 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -39,7 +39,7 @@ processResources { filter org.apache.tools.ant.filters.ReplaceTokens, tokens: [ 'dataflow.legacy_environment_major_version' : '8', 'dataflow.fnapi_environment_major_version' : '8', -'dataflow.container_version' : 'beam-master-20200629' +'dataflow.container_version' : 'beam-2.23.0' ] } diff --git a/sdks/python/apache_beam/version.py b/sdks/python/apache_beam/version.py index 964e464..d6d389c 100644 --- a/sdks/python/apache_beam/version.py +++ b/sdks/python/apache_beam/version.py @@ -17,4 +17,4 @@ """Apache Beam SDK version information and utilities.""" -__version__ = '2.23.0.dev' +__version__ = '2.23.0'
[beam] branch master updated: Moving to 2.24.0-SNAPSHOT on master branch.
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 02b184e Moving to 2.24.0-SNAPSHOT on master branch. 02b184e is described below commit 02b184e4020dfed9b07c307c2d6657ebfdfe441b Author: Valentyn Tymofieiev AuthorDate: Wed Jul 1 16:41:08 2020 -0700 Moving to 2.24.0-SNAPSHOT on master branch. --- .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 2 +- gradle.properties | 4 ++-- sdks/python/apache_beam/version.py| 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 114c72a..ac6e5b6 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -317,7 +317,7 @@ class BeamModulePlugin implements Plugin { // Automatically use the official release version if we are performing a release // otherwise append '-SNAPSHOT' -project.version = '2.23.0' +project.version = '2.24.0' if (!isRelease(project)) { project.version += '-SNAPSHOT' } diff --git a/gradle.properties b/gradle.properties index 22f2c25..49a5819 100644 --- a/gradle.properties +++ b/gradle.properties @@ -23,8 +23,8 @@ offlineRepositoryRoot=offline-repository signing.gnupg.executable=gpg signing.gnupg.useLegacyGpg=true -version=2.23.0-SNAPSHOT -sdk_version=2.23.0.dev +version=2.24.0-SNAPSHOT +sdk_version=2.24.0.dev javaVersion=1.8 diff --git a/sdks/python/apache_beam/version.py b/sdks/python/apache_beam/version.py index 964e464..e1d3ef1 100644 --- a/sdks/python/apache_beam/version.py +++ b/sdks/python/apache_beam/version.py @@ -17,4 +17,4 @@ """Apache Beam SDK version information and utilities.""" -__version__ = '2.23.0.dev' +__version__ = '2.24.0.dev'
[beam] branch master updated (a5c32f3 -> d6fcc1e)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from a5c32f3 [BEAM-9953] [ZetaSQL] Implement CREATE FUNCTION and scalar UDF. (#12153) new e5ee374 [BEAM-9754] Add Py 3.8 support to Dataflow runner. new 190b69d Print a warning when users install version of Beam that has not been sufficiently tested. new d6fcc1e Merge pull request #12156 from tvalentyn/py38_support The 27635 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: sdks/python/apache_beam/__init__.py| 9 ++- .../runners/dataflow/dataflow_runner_test.py | 63 - .../runners/dataflow/internal/apiclient.py | 6 +- .../runners/dataflow/internal/apiclient_test.py| 66 -- .../apache_beam/runners/dataflow/internal/names.py | 4 +- .../runners/dataflow/template_runner_test.py | 3 - sdks/python/setup.py | 10 +++- 7 files changed, 22 insertions(+), 139 deletions(-)
[beam] branch master updated (68a3a73 -> a5c32f3)
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 68a3a73 [BEAM-10327] Create a pattern that shows use of Schema using Joins (#12097) add a5c32f3 [BEAM-9953] [ZetaSQL] Implement CREATE FUNCTION and scalar UDF. (#12153) No new revisions were added by this update. Summary of changes: .../sql/meta/provider/FullNameTableProvider.java | 2 +- .../sdk/extensions/sql/zetasql/SqlAnalyzer.java| 68 ++- .../extensions/sql/zetasql/ZetaSQLPlannerImpl.java | 59 +++--- .../translation/AggregateScanConverter.java| 7 +- .../translation/ArrayScanColumnRefToUncollect.java | 4 +- .../zetasql/translation/ExpressionConverter.java | 94 .../zetasql/translation/FilterScanConverter.java | 4 +- .../sql/zetasql/translation/JoinScanConverter.java | 5 +- .../LimitOffsetScanToLimitConverter.java | 6 +- .../LimitOffsetScanToOrderByLimitConverter.java| 6 +- .../sql/zetasql/ZetaSQLDialectSpecTest.java| 125 + 11 files changed, 333 insertions(+), 47 deletions(-)
[beam] branch master updated (68a3a73 -> a5c32f3)
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 68a3a73 [BEAM-10327] Create a pattern that shows use of Schema using Joins (#12097) add a5c32f3 [BEAM-9953] [ZetaSQL] Implement CREATE FUNCTION and scalar UDF. (#12153) No new revisions were added by this update. Summary of changes: .../sql/meta/provider/FullNameTableProvider.java | 2 +- .../sdk/extensions/sql/zetasql/SqlAnalyzer.java| 68 ++- .../extensions/sql/zetasql/ZetaSQLPlannerImpl.java | 59 +++--- .../translation/AggregateScanConverter.java| 7 +- .../translation/ArrayScanColumnRefToUncollect.java | 4 +- .../zetasql/translation/ExpressionConverter.java | 94 .../zetasql/translation/FilterScanConverter.java | 4 +- .../sql/zetasql/translation/JoinScanConverter.java | 5 +- .../LimitOffsetScanToLimitConverter.java | 6 +- .../LimitOffsetScanToOrderByLimitConverter.java| 6 +- .../sql/zetasql/ZetaSQLDialectSpecTest.java| 125 + 11 files changed, 333 insertions(+), 47 deletions(-)
[beam] branch master updated (a4f3019 -> 68a3a73)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from a4f3019 [BEAM-10391] Add attribute map null check in dataflow worker (#12136) add 68a3a73 [BEAM-10327] Create a pattern that shows use of Schema using Joins (#12097) No new revisions were added by this update. Summary of changes: .../apache/beam/examples/snippets/Snippets.java| 70 + .../beam/examples/snippets/SnippetsTest.java | 71 ++ .../content/en/documentation/patterns/overview.md | 3 + .../content/en/documentation/patterns/schema.md| 57 + .../partials/section-menu/en/documentation.html| 1 + 5 files changed, 202 insertions(+) create mode 100644 website/www/site/content/en/documentation/patterns/schema.md
[beam] branch master updated (8f86cec -> a4f3019)
This is an automated email from the ASF dual-hosted git repository. bhulette pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 8f86cec Merge pull request #12129 from aaltay/gib add a4f3019 [BEAM-10391] Add attribute map null check in dataflow worker (#12136) No new revisions were added by this update. Summary of changes: .../org/apache/beam/runners/dataflow/worker/PubsubSink.java | 12 ++-- .../org/apache/beam/sdk/io/gcp/pubsub/PubsubMessage.java | 10 ++ 2 files changed, 12 insertions(+), 10 deletions(-)
[beam] branch master updated: Change GroupIntoBatches to group for real
This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 46276b5 Change GroupIntoBatches to group for real new 8f86cec Merge pull request #12129 from aaltay/gib 46276b5 is described below commit 46276b5191bd21359c05e03b692dbcb40e688e02 Author: Ahmet Altay AuthorDate: Mon Jun 29 19:58:12 2020 -0700 Change GroupIntoBatches to group for real --- CHANGES.md | 4 +++- .../snippets/transforms/aggregation/groupintobatches_test.py | 12 ++-- sdks/python/apache_beam/transforms/util.py | 9 +++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a8aceeb..7586734 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -77,7 +77,9 @@ `{"foo": "bar", "baz": null}`, whereas an implicit null like `{"foo": "bar"}` would raise an exception. Now both JSON strings will yield the same result by default. This behavior can be overriden with `RowJson.RowJsonDeserializer#withNullBehavior`. - +* Fixed a bug in `GroupIntoBatches` experimental transform in Python to actually group batches by key. + This changes the output type for this transform ([BEAM-6696](https://issues.apache.org/jira/browse/BEAM-6696)). +* X behavior was changed ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)). ## Deprecations diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py index 374401d..76c94da 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py @@ -31,14 +31,14 @@ from . import groupintobatches def check_batches_with_keys(actual): expected = '''[START batches_with_keys] -[('spring', ''), ('spring', '凌'), ('spring', '')] -[('summer', '凌'), ('summer', ''), ('summer', '')] -[('spring', '')] -[('fall', '凌'), ('fall', '')] -[('winter', '')] +('spring', ['', '凌', '']) +('summer', ['凌', '', '']) +('spring', ['']) +('fall', ['凌', '']) +('winter', ['']) [END batches_with_keys]'''.splitlines()[1:-1] assert_matches_stdout( - actual, expected, lambda batch: (batch[0][0], len(batch))) + actual, expected, lambda batch: (batch[0], len(batch[1]))) @mock.patch('apache_beam.Pipeline', TestPipeline) diff --git a/sdks/python/apache_beam/transforms/util.py b/sdks/python/apache_beam/transforms/util.py index afb4192..2f68d08 100644 --- a/sdks/python/apache_beam/transforms/util.py +++ b/sdks/python/apache_beam/transforms/util.py @@ -741,6 +741,7 @@ def WithKeys(pcoll, k): @experimental() @typehints.with_input_types(Tuple[K, V]) +@typehints.with_output_types(Tuple[K, Iterable[V]]) class GroupIntoBatches(PTransform): """PTransform that batches the input into desired batch size. Elements are buffered until they are equal to batch size provided in the argument at which @@ -786,7 +787,9 @@ def _pardo_group_into_batches(batch_size, input_coder): count = count_state.read() if count >= batch_size: batch = [element for element in element_state.read()] -yield batch +key, _ = batch[0] +batch_values = [v for (k, v) in batch] +yield (key, batch_values) element_state.clear() count_state.clear() @@ -797,7 +800,9 @@ def _pardo_group_into_batches(batch_size, input_coder): count_state=DoFn.StateParam(COUNT_STATE)): batch = [element for element in element_state.read()] if batch: -yield batch +key, _ = batch[0] +batch_values = [v for (k, v) in batch] +yield (key, batch_values) element_state.clear() count_state.clear()
[beam] branch master updated: Add nexmark option to allow cancel streaming query job after complete
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 78be15f Add nexmark option to allow cancel streaming query job after complete new ea928a2 Merge pull request #12119 from y1chi/nexmark 78be15f is described below commit 78be15f5efe11d9dd0476d181430b55569caccd0 Author: Yichi Zhang AuthorDate: Mon Jun 29 11:25:35 2020 -0700 Add nexmark option to allow cancel streaming query job after complete --- .../src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java | 3 +++ .../src/main/java/org/apache/beam/sdk/nexmark/NexmarkOptions.java | 6 ++ 2 files changed, 9 insertions(+) diff --git a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java index 66385af..619a517 100644 --- a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java +++ b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkLauncher.java @@ -480,6 +480,9 @@ public class NexmarkLauncher { && quietFor.isLongerThan(DONE_DELAY)) { NexmarkUtils.console("streaming query appears to have finished waiting for completion."); waitingForShutdown = true; + if (options.getCancelStreamingJobAfterFinish()) { +cancelJob = true; + } } else if (quietFor.isLongerThan(STUCK_TERMINATE_DELAY)) { NexmarkUtils.console( "ERROR: streaming query appears to have been stuck for %d minutes, cancelling job.", diff --git a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkOptions.java b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkOptions.java index db9b97e..ea2be42 100644 --- a/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkOptions.java +++ b/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkOptions.java @@ -139,6 +139,12 @@ public interface NexmarkOptions void setStreamTimeout(Integer streamTimeout); + @Description("Proactively cancels streaming job after query is completed") + @Default.Boolean(false) + boolean getCancelStreamingJobAfterFinish(); + + void setCancelStreamingJobAfterFinish(boolean cancelStreamingJobAfterFinish); + @Description("Number of unbounded sources to create events.") @Nullable Integer getNumEventGenerators();
[beam] branch master updated: [BEAM-10322] allow only single assignment to producing stages by pcollection map
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 592ab72 [BEAM-10322] allow only single assignment to producing stages by pcollection map new 709c2c5 Merge pull request #12086 from [BEAM-10322] allow only single assignment to producing stages by pcol… 592ab72 is described below commit 592ab72d4c98f54eac52e0e37834177af31b5298 Author: Heejong Lee AuthorDate: Wed Jun 24 18:28:24 2020 -0700 [BEAM-10322] allow only single assignment to producing stages by pcollection map --- sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py index acf04b7..251a696 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py @@ -375,6 +375,8 @@ class FnApiRunnerExecutionContext(object): for o in transform.outputs.values(): if o in s.side_inputs(): continue + if o in producing_stages_by_pcoll: +continue producing_stages_by_pcoll[o] = s for side_pc in all_side_inputs:
[beam] branch master updated: [BEAM-10291] Adding full thread dump upon lull detection (#12047)
This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new f1eebfa [BEAM-10291] Adding full thread dump upon lull detection (#12047) f1eebfa is described below commit f1eebfa32cebad89e8ca629cfc4010c468e1835c Author: David Yan AuthorDate: Wed Jul 1 10:51:35 2020 -0700 [BEAM-10291] Adding full thread dump upon lull detection (#12047) --- .../apache_beam/runners/worker/sdk_worker.py | 26 ++-- .../apache_beam/runners/worker/sdk_worker_test.py | 49 ++ 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py index ecf93d7..5cbc498 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py @@ -65,6 +65,7 @@ from apache_beam.runners.worker.data_plane import PeriodicThread from apache_beam.runners.worker.statecache import StateCache from apache_beam.runners.worker.worker_id_interceptor import WorkerIdInterceptor from apache_beam.runners.worker.worker_status import FnApiWorkerStatusHandler +from apache_beam.runners.worker.worker_status import thread_dump from apache_beam.utils import thread_pool_executor if TYPE_CHECKING: @@ -75,11 +76,14 @@ _LOGGER = logging.getLogger(__name__) # This SDK harness will (by default), log a "lull" in processing if it sees no # transitions in over 5 minutes. -# 5 minutes * 60 seconds * 1020 millis * 1000 micros * 1000 nanoseconds +# 5 minutes * 60 seconds * 1000 millis * 1000 micros * 1000 nanoseconds DEFAULT_LOG_LULL_TIMEOUT_NS = 5 * 60 * 1000 * 1000 * 1000 DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S = 60 +# Full thread dump is performed at most every 20 minutes. +LOG_LULL_FULL_THREAD_DUMP_S = 20 * 60 + class ShortIdCache(object): """ Cache for MonitoringInfo "short ids" @@ -465,6 +469,7 @@ class SdkWorker(object): self.profiler_factory = profiler_factory self.log_lull_timeout_ns = ( log_lull_timeout_ns or DEFAULT_LOG_LULL_TIMEOUT_NS) +self._last_full_thread_dump_secs = 0 def do_instruction(self, request): # type: (beam_fn_api_pb2.InstructionRequest) -> beam_fn_api_pb2.InstructionResponse @@ -545,8 +550,10 @@ class SdkWorker(object): error='Instruction not running: %s' % instruction_id) def _log_lull_in_bundle_processor(self, processor): -state_sampler = processor.state_sampler -sampler_info = state_sampler.get_info() +sampler_info = processor.state_sampler.get_info() +self._log_lull_sampler_info(sampler_info) + + def _log_lull_sampler_info(self, sampler_info): if (sampler_info and sampler_info.time_since_transition and sampler_info.time_since_transition > self.log_lull_timeout_ns): step_name = sampler_info.state_name.step_name @@ -570,6 +577,19 @@ class SdkWorker(object): step_name_log, stack_trace) + if self._should_log_full_thread_dump(): +self._log_full_thread_dump() + + def _should_log_full_thread_dump(self): +now = time.time() +if self._last_full_thread_dump_secs + LOG_LULL_FULL_THREAD_DUMP_S < now: + self._last_full_thread_dump_secs = now + return True +return False + + def _log_full_thread_dump(self): +thread_dump() + def process_bundle_progress(self, request, # type: beam_fn_api_pb2.ProcessBundleProgressRequest instruction_id # type: str diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py index 59baf15..a950ded 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py @@ -25,11 +25,14 @@ from __future__ import print_function import contextlib import logging +import threading +import time import unittest from builtins import range from collections import namedtuple import grpc +import mock from apache_beam.coders import VarIntCoder from apache_beam.portability.api import beam_fn_api_pb2 @@ -38,8 +41,11 @@ from apache_beam.portability.api import beam_runner_api_pb2 from apache_beam.portability.api import metrics_pb2 from apache_beam.runners.worker import sdk_worker from apache_beam.runners.worker import statecache +from apache_beam.runners.worker import statesampler from apache_beam.runners.worker.sdk_worker import CachingStateHandler +from apache_beam.runners.worker.sdk_worker import SdkWorker from apache_beam.utils import thread_pool_executor +from apache_beam.utils.counters import CounterName _LOGGER = logging.getLogger(__name__) @@ -121,6 +127,49 @@ class SdkWorkerTest(unittest.TestCase): def test_fn_registration(self):
[beam] branch master updated: Update ZetaSQL to 2020.06.1 release.
This is an automated email from the ASF dual-hosted git repository. ibzib pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 1817402 Update ZetaSQL to 2020.06.1 release. new 4863968 Merge pull request #12138 from ibzib/zetasql-release 1817402 is described below commit 1817402493f2e3490b438ea3530539689c0a4ddb Author: Kyle Weaver AuthorDate: Tue Jun 30 14:13:35 2020 -0700 Update ZetaSQL to 2020.06.1 release. --- sdks/java/extensions/sql/zetasql/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/extensions/sql/zetasql/build.gradle b/sdks/java/extensions/sql/zetasql/build.gradle index 1327aaa..eda287b 100644 --- a/sdks/java/extensions/sql/zetasql/build.gradle +++ b/sdks/java/extensions/sql/zetasql/build.gradle @@ -25,7 +25,7 @@ applyJavaNature(automaticModuleName: 'org.apache.beam.sdk.extensions.sql.zetasql description = "Apache Beam :: SDKs :: Java :: Extensions :: SQL :: ZetaSQL" ext.summary = "ZetaSQL to Calcite translator" -def zetasql_version = "2020.04.1" +def zetasql_version = "2020.06.1" dependencies { compile project(":sdks:java:core")
[GitHub] [beam-wheels] TobKed opened a new pull request #18: Deprecate repository (depends on https://github.com/apache/beam/pull/12150)
TobKed opened a new pull request #18: URL: https://github.com/apache/beam-wheels/pull/18 When GitHub Actions used to build Python Source Distribution and Wheels will be used in release process this repository may be deprecated. https://github.com/apache/beam/pull/12150 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[beam] branch asf-site updated: Publishing website 2020/07/01 06:01:51 at commit 407b014
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new 289a54d Publishing website 2020/07/01 06:01:51 at commit 407b014 289a54d is described below commit 289a54defc19c96fe5275e5475a7955e70d04185 Author: jenkins AuthorDate: Wed Jul 1 06:01:51 2020 + Publishing website 2020/07/01 06:01:51 at commit 407b014 --- .../documentation/sdks/python-streaming/index.html | 6 +++--- website/generated-content/sitemap.xml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/website/generated-content/documentation/sdks/python-streaming/index.html b/website/generated-content/documentation/sdks/python-streaming/index.html index 88a8c22..af25a28 100644 --- a/website/generated-content/documentation/sdks/python-streaming/index.html +++ b/website/generated-content/documentation/sdks/python-streaming/index.html @@ -1,7 +1,7 @@ Apache Beam Python Streaming PipelinesGet StartedDocumentationLanguagesRUNNERSRoadmap< [...] +Get StartedDocumentationLanguagesRUNNERSRoadmap< [...] limitations) starting with Beam SDK version 2.5.0.Why use streaming execution?Beam creates an unbounded PCollection if your pipeline reads from a streaming or continously-updating data source (such as Cloud Pub/Sub). A runner must process an unbounded PCollection using a streaming job that runs continuously, @@ -62,7 +62,7 @@ example streaming pipeline. Specify your Cloud Pub/Sub project and input topic python -m apache_beam.examples.streaming_wordcount \ --input_topic projects/YOUR_PUBSUB_PROJECT_NAME/topics/YOUR_INPUT_TOPIC \ --output_topic projects/YOUR_PUBSUB_PROJECT_NAME/topics/YOUR_OUTPUT_TOPIC \ - --streamingSee https://beam.apache.org/roadmap/portability/#python-on-flink for more information.See https://beam.apache.org/documentation/runners/flink/ for more information.See https://beam.apache.org/roadmap/portability/#python-on-spark for more information.See https://beam.apache.org/documentation/runners/flink/ for more information.See https://beam.apache.org/documentation/runners/spark/ for more information.# As part of the initial setup, install Google Cloud Platform specific extra components. pip install apache-beam[gcp] # DataflowRunner requires the --streaming option @@ -74,7 +74,7 @@ python -m apache_beam.examples.streaming_wordcount \ --input_topic projects/YOUR_PUBSUB_PROJECT_NAME/topics/YOUR_INPUT_TOPIC \ --output_topic projects/YOUR_PUBSUB_PROJECT_NAME/topics/YOUR_OUTPUT_TOPIC \ --streamingCheck your runners documentation for any additional runner-specific information -about executing streaming pipelines:DirectRunner streaming executionDataflowRunner streaming executionUnsupported featuresPython streaming execution does not currently support the following features.General Beam featuresThese unsupported Beam features appl [...] +about executing streaming pipelines:DirectRunner streaming executionDataflowRunner streaming executionPortable Flink runnerUnsupported featuresPython streaming execution does not currently support the following features:Custom source [...] http://www.apache.org>The Apache Software Foundation | Privacy Policy | RSS FeedApache Beam, Apache, Beam, the Beam logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation. All other products or name brands are trademarks of their respective holders, including The Apache Software Foundation. \ No newline at end of file diff --git a/website/generated-content/sitemap.xml b/website/generated-content/sitemap.xml index e8f7963..0968d83 100644 --- a/website/generated-content/sitemap.xml +++ b/website/generated-content/sitemap.xml @@ -1 +1 @@ -http://www.sitemaps.org/schemas/sitemap/0.9; xmlns:xhtml="http://www.w3.org/1999/xhtml;>/blog/beam-2.22.0/2020-06-08T14:13:37-07:00/categories/blog/2020-06-08T14:13:37-07:00/blog/2020-06-08T14:13:37-07:00/categories/2020-06-08T14:13:37-07:00/blog/b [...] \ No newline at end of file +http://www.sitemaps.org/schemas/sitemap/0.9; xmlns:xhtml="http://www.w3.org/1999/xhtml;>/blog/beam-2.22.0/2020-06-08T14:13:37-07:00/categories/blog/2020-06-08T14:13:37-07:00/blog/2020-06-08T14:13:37-07:00/categories/2020-06-08T14:13:37-07:00/blog/b [...] \ No newline at end of file