[beam] Diff for: [GitHub] reuvenlax merged pull request #7500: [BEAM-4076] Add antlr4 to beam
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index dcc78a5416ab..eb8a57d639e5 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -359,6 +359,8 @@ class BeamModulePlugin implements Plugin { activemq_junit : "org.apache.activemq.tooling:activemq-junit:5.13.1", activemq_kahadb_store : "org.apache.activemq:activemq-kahadb-store:5.13.1", activemq_mqtt : "org.apache.activemq:activemq-mqtt:5.13.1", +antlr : "org.antlr:antlr4:4.7", +antlr_runtime : "org.antlr:antlr4-runtime:4.7", apex_common : "org.apache.apex:apex-common:$apex_core_version", apex_engine : "org.apache.apex:apex-engine:$apex_core_version", args4j : "args4j:args4j:2.33", @@ -626,7 +628,7 @@ class BeamModulePlugin implements Plugin { '-Xlint:all', '-Werror', '-XepDisableWarningsInGeneratedCode', - '-XepExcludedPaths:(.*/)?(build/generated.*avro-java|build/generated)/.*', + '-XepExcludedPaths:(.*/)?(build/generated-src|build/generated.*avro-java|build/generated)/.*', '-Xep:MutableConstantField:OFF' // Guava's immutable collections cannot appear on API surface. ] + (defaultLintSuppressions + configuration.disableLintWarnings).collect { "-Xlint:-${it}" }) @@ -1442,6 +1444,20 @@ class BeamModulePlugin implements Plugin { // or be left here. project.ext.applyAvroNature = { project.apply plugin: "com.commercehub.gradle.plugin.avro" } +project.ext.applyAntlrNature = { + project.apply plugin: 'antlr' + def generatedDir = "${project.buildDir}/generated/source-src/antlr/main/java/" + project.sourceSets { +generated { java.srcDir generatedDir } + } + project.idea { +module { + sourceDirs += project.file(generatedDir) + generatedSourceDirs += project.file(generatedDir) +} + } +} + // Creates a task to run the quickstart for a runner. // Releases version and URL, can be overriden for a RC release with // ./gradlew :release:runJavaExamplesValidationTask -Pver=2.3.0 -Prepourl=https://repository.apache.org/content/repositories/orgapachebeam-1027 diff --git a/sdks/java/build-tools/src/main/resources/beam/findbugs-filter.xml b/sdks/java/build-tools/src/main/resources/beam/findbugs-filter.xml index c8aa2f291719..545c2f8cc26c 100644 --- a/sdks/java/build-tools/src/main/resources/beam/findbugs-filter.xml +++ b/sdks/java/build-tools/src/main/resources/beam/findbugs-filter.xml @@ -410,6 +410,13 @@ + + + + + + diff --git a/sdks/java/core/build.gradle b/sdks/java/core/build.gradle index 689efea49ac2..c6d29fc615fd 100644 --- a/sdks/java/core/build.gradle +++ b/sdks/java/core/build.gradle @@ -22,13 +22,20 @@ applyJavaNature(shadowClosure: DEFAULT_SHADOW_CLOSURE << { include(dependency(library.java.protobuf_java)) include(dependency(library.java.byte_buddy)) include(dependency("org.apache.commons:.*")) +include(dependency(library.java.antlr_runtime)) } relocate "com.google.thirdparty", getJavaRelocatedPath("com.google.thirdparty") relocate "com.google.protobuf", getJavaRelocatedPath("com.google.protobuf") relocate "net.bytebuddy", getJavaRelocatedPath("net.bytebuddy") relocate "org.apache.commons", getJavaRelocatedPath("org.apache.commons") + relocate "org.antlr.v4", getJavaRelocatedPath("org.antlr.v4") }) applyAvroNature() +applyAntlrNature() + +generateGrammarSource { + arguments += ["-visitor"] +} description = "Apache Beam :: SDKs :: Java :: Core" ext.summary = """Beam SDK Java All provides a simple, Java-based @@ -51,9 +58,11 @@ test { } dependencies { + antlr library.java.antlr // Required to load constants from the model, e.g. max timestamp for global window shadow project(path: ":beam-model-pipeline", configuration: "shadow") shadow library.java.vendored_guava_20_0 + compile library.java.antlr_runtime compile library.java.protobuf_java compile library.java.byte_buddy compile library.java.commons_compress diff --git a/sdks/java/core/src/main/antlr/java/org/apache/beam/sdk/schemas/parser/generated/FieldSpecifierNotation.g4 b/sdks/java/core/src/main/antlr/java/org/apache/beam/sdk/schemas/parser/generated/FieldSpecifierNotation.g4 new file mode 100644 index ..a869304bcf4f --- /dev/null +++ b/sdks/java/core/src/main/antlr/java/org/apache/beam/sdk/schemas/parser/generated/FieldSpecifierNotation.g4 @@ -0,0
[beam] branch master updated (40b889e -> be1b166)
This is an automated email from the ASF dual-hosted git repository. reuvenlax pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 40b889e Merge pull request #7517: [BEAM-6440] Fix leakage of timer de-duplication map add 6cea24f Add ANTLR dependency to beam along with initial grammar. add be1b166 Merge pull request #7500: [BEAM-4076] Add antlr4 to beam No new revisions were added by this update. Summary of changes: .../org/apache/beam/gradle/BeamModulePlugin.groovy | 18 - .../src/main/resources/beam/findbugs-filter.xml| 7 ++ .../src/main/resources/beam/suppressions.xml | 1 + sdks/java/core/build.gradle| 9 +++ .../parser/generated/FieldSpecifierNotation.g4}| 51 +++-- .../parser/FieldAccessDescriptorParser.java| 87 ++ .../{utils => parser/generated}/package-info.java | 2 +- .../schemas/{utils => parser}/package-info.java| 2 +- .../org/apache/beam/SdkCoreApiSurfaceTest.java | 73 -- .../schemas/FieldAccessDescriptorParserTest.java} | 12 ++- 10 files changed, 156 insertions(+), 106 deletions(-) copy sdks/java/{extensions/protobuf/src/test/proto/proto2_coder_test_messages.proto => core/src/main/antlr/java/org/apache/beam/sdk/schemas/parser/generated/FieldSpecifierNotation.g4} (51%) create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/parser/FieldAccessDescriptorParser.java copy sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/{utils => parser/generated}/package-info.java (95%) copy sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/{utils => parser}/package-info.java (96%) delete mode 100644 sdks/java/core/src/test/java/org/apache/beam/SdkCoreApiSurfaceTest.java copy sdks/java/core/src/{main/java/org/apache/beam/sdk/schemas/Factory.java => test/java/org/apache/beam/sdk/schemas/FieldAccessDescriptorParserTest.java} (74%)
[beam] 01/01: Merge pull request #7517: [BEAM-6440] Fix leakage of timer de-duplication map
This is an automated email from the ASF dual-hosted git repository. thw pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 40b889e2f633482ccef688ecfee250c2c268a364 Merge: b83b302 85e84e5 Author: Thomas Weise AuthorDate: Tue Jan 15 18:20:57 2019 -0800 Merge pull request #7517: [BEAM-6440] Fix leakage of timer de-duplication map .../wrappers/streaming/DoFnOperator.java | 5 ++- .../wrappers}/streaming/DedupingOperatorTest.java | 4 +- .../wrappers}/streaming/DoFnOperatorTest.java | 5 +-- .../wrappers}/streaming/StreamRecordStripper.java | 2 +- .../streaming/WindowDoFnOperatorTest.java | 48 +++--- 5 files changed, 50 insertions(+), 14 deletions(-)
[beam] branch master updated (b83b302 -> 40b889e)
This is an automated email from the ASF dual-hosted git repository. thw pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from b83b302 Merge pull request #7523: Apply spotless across Beam add 85e84e5 [BEAM-6440] Fix leakage of timer de-duplication map new 40b889e Merge pull request #7517: [BEAM-6440] Fix leakage of timer de-duplication map The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../wrappers/streaming/DoFnOperator.java | 5 ++- .../wrappers}/streaming/DedupingOperatorTest.java | 4 +- .../wrappers}/streaming/DoFnOperatorTest.java | 5 +-- .../wrappers}/streaming/StreamRecordStripper.java | 2 +- .../streaming/WindowDoFnOperatorTest.java | 48 +++--- 5 files changed, 50 insertions(+), 14 deletions(-) rename runners/flink/src/test/java/org/apache/beam/runners/flink/{ => translation/wrappers}/streaming/DedupingOperatorTest.java (95%) rename runners/flink/src/test/java/org/apache/beam/runners/flink/{ => translation/wrappers}/streaming/DoFnOperatorTest.java (99%) rename runners/flink/src/test/java/org/apache/beam/runners/flink/{ => translation/wrappers}/streaming/StreamRecordStripper.java (96%) rename runners/flink/src/test/java/org/apache/beam/runners/flink/{ => translation/wrappers}/streaming/WindowDoFnOperatorTest.java (83%)
[beam] Diff for: [GitHub] tweise merged pull request #7517: [BEAM-6440] Fix leakage of timer de-duplication map
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java index dcd0f0bd7dfe..647d6ccacb0a 100644 --- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java +++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java @@ -698,12 +698,14 @@ public void snapshotState(StateSnapshotContext context) throws Exception { public void onEventTime(InternalTimer timer) throws Exception { // We don't have to cal checkInvokeStartBundle() because it's already called in // processWatermark*(). +timerInternals.cleanupPendingTimer(timer.getNamespace()); fireTimer(timer); } @Override public void onProcessingTime(InternalTimer timer) throws Exception { checkInvokeStartBundle(); +timerInternals.cleanupPendingTimer(timer.getNamespace()); fireTimer(timer); } @@ -714,7 +716,6 @@ public void fireTimer(InternalTimer timer) { // This is a user timer, so namespace must be WindowNamespace checkArgument(namespace instanceof WindowNamespace); BoundedWindow window = ((WindowNamespace) namespace).getWindow(); -timerInternals.cleanupPendingTimer(timerData); pushbackDoFnRunner.onTimer( timerData.getTimerId(), window, timerData.getTimestamp(), timerData.getDomain()); } @@ -927,7 +928,7 @@ public TimerInternals timerInternals() { * namespace of the timer and the timer's id. Necessary for supporting removal of existing * timers. In Flink removal of timers can only be done by providing id and time of the timer. */ -private final MapState pendingTimersById; +final MapState pendingTimersById; private FlinkTimerInternals() { MapStateDescriptor pendingTimersByIdStateDescriptor = diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DedupingOperatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DedupingOperatorTest.java similarity index 95% rename from runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DedupingOperatorTest.java rename to runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DedupingOperatorTest.java index a6fa3dba6667..3a2c4a376530 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DedupingOperatorTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DedupingOperatorTest.java @@ -15,9 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.runners.flink.streaming; +package org.apache.beam.runners.flink.translation.wrappers.streaming; -import static org.apache.beam.runners.flink.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue; +import static org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue; import static org.hamcrest.collection.IsIterableContainingInOrder.contains; import static org.junit.Assert.assertThat; diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java similarity index 99% rename from runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java rename to runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java index 416595cb045d..ed1630c0472a 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperatorTest.java @@ -15,9 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.runners.flink.streaming; +package org.apache.beam.runners.flink.translation.wrappers.streaming; -import static org.apache.beam.runners.flink.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue; +import static org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue; import static org.hamcrest.Matchers.emptyIterable; import static org.hamcrest.collection.IsIterableContainingInOrder.contains; import static org.junit.Assert.assertEquals; @@ -31,7 +31,6 @@ import org.apache.beam.runners.flink.FlinkPipelineOptions; import org.apache.beam.runners.flink.translation.types.CoderTypeInformation; import org.apache.beam.runners.flink.translation.types.CoderTypeSerializer; -import
[beam] Diff for: [GitHub] kennknowles closed pull request #7528: DO NOT MERGE - dummy PR to see if spotlessCheck on Jenkins succeeds against two different codebases
diff --git a/another-README.md b/another-README.md new file mode 100644 index ..cef141ea4c99 --- /dev/null +++ b/another-README.md @@ -0,0 +1,112 @@ + + +# Apache Beam + +[Apache Beam](http://beam.apache.org/) is a unified model for defining both batch and streaming data-parallel processing pipelines, as well as a set of language-specific SDKs for constructing pipelines and Runners for executing them on distributed processing backends, including [Apache Apex](http://apex.apache.org/), [Apache Flink](http://flink.apache.org/), [Apache Spark](http://spark.apache.org/), and [Google Cloud Dataflow](http://cloud.google.com/dataflow/). + +## Status + +[![Maven Version](https://maven-badges.herokuapp.com/maven-central/org.apache.beam/beam-sdks-java-core/badge.svg)](http://search.maven.org/#search|gav|1|g:"org.apache.beam") +[![PyPI version](https://badge.fury.io/py/apache-beam.svg)](https://badge.fury.io/py/apache-beam) +[![Build Status](https://builds.apache.org/buildStatus/icon?job=beam_PostCommit_Java)](https://builds.apache.org/job/beam_PostCommit_Java) +[![Coverage Status](https://coveralls.io/repos/github/apache/beam/badge.svg?branch=master)](https://coveralls.io/github/apache/beam?branch=master) + +### Post-commit tests status (on master branch) + +Lang | SDK | Apex | Dataflow | Flink | Gearpump | Samza | Spark +--- | --- | --- | --- | --- | --- | --- | --- +Go | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | --- | --- | --- | --- +Java | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)[![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Gearpump/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/) +Python | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_Verify/lastCompletedBuild/) | --- | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/) [![Build Status](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/) | [![Build Status](https://builds.apache.org/job/beam_PostCommit_Python_VR_Flink/lastCompletedBuild/badge/icon)](https://builds.apache.org/job/beam_PostCommit_Python_VR_Flink/lastCompletedBuild/) | --- | --- | --- + + +## Overview + +Beam provides a general approach to expressing [embarrassingly parallel](https://en.wikipedia.org/wiki/Embarrassingly_parallel) data processing pipelines and supports three categories of users, each of which have relatively disparate backgrounds and needs. + +1. _End Users_: Writing pipelines with an existing SDK, running it on an existing runner. These users want to focus on writing their application logic and have everything else just work. +2. _SDK Writers_: Developing a Beam SDK targeted at a specific user community (Java, Python, Scala, Go, R, graphical, etc). These users are language geeks, and would prefer
[beam] 01/01: Merge pull request #7523: Apply spotless across Beam
This is an automated email from the ASF dual-hosted git repository. reuvenlax pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit b83b302ef97767e4ca245ea24e8bd40a6692e72c Merge: 1891410 2662f2b Author: reuvenlax AuthorDate: Tue Jan 15 18:09:44 2019 -0800 Merge pull request #7523: Apply spotless across Beam .../apache/beam/examples/WindowedWordCount.java| 3 +- .../apache/beam/examples/common/ExampleUtils.java | 6 +- .../beam/examples/cookbook/TriggerExample.java | 25 +++-- .../examples/complete/game/LeaderBoardTest.java| 12 ++- .../apache/beam/runners/apex/TestApexRunner.java | 2 +- .../runners/apex/translation/ParDoTranslator.java | 12 +-- .../translation/operators/ApexParDoOperator.java | 2 +- .../beam/runners/apex/ApexYarnLauncherTest.java| 4 +- .../core/construction/CombineTranslation.java | 3 +- .../CreatePCollectionViewTranslation.java | 3 +- .../construction/ExecutableStageTranslation.java | 5 +- .../core/construction/ParDoTranslation.java| 5 +- .../construction/PipelineOptionsTranslation.java | 2 +- .../core/construction/PipelineResources.java | 3 +- .../core/construction/PipelineTranslation.java | 8 +- .../construction/SplittableParDoNaiveBounded.java | 6 +- .../UnboundedReadFromBoundedSource.java| 3 +- .../core/construction/WriteFilesTranslation.java | 15 ++- .../core/construction/graph/ExecutableStage.java | 23 ++--- .../core/construction/graph/FusedPipeline.java | 3 +- .../graph/GreedyPCollectionFusers.java | 6 +- .../construction/graph/GreedyPipelineFuser.java| 42 ++-- .../graph/ImmutableExecutableStage.java| 3 +- .../runners/core/construction/graph/Networks.java | 9 +- .../construction/graph/OutputDeduplicator.java | 7 +- .../core/construction/graph/QueryablePipeline.java | 53 -- .../ExecutableStageTranslationTest.java| 4 +- .../SingleInputOutputOverrideFactoryTest.java | 6 +- .../construction/graph/ExecutableStageMatcher.java | 9 +- .../graph/GreedyPipelineFuserTest.java | 15 +-- .../construction/graph/GreedyStageFuserTest.java | 4 +- .../core/construction/graph/NetworksTest.java | 8 +- .../construction/graph/OutputDeduplicatorTest.java | 24 + .../construction/graph/QueryablePipelineTest.java | 5 +- .../runners/core/LateDataDroppingDoFnRunner.java | 4 +- .../core/SplittableParDoViaKeyedWorkItems.java | 6 +- .../apache/beam/runners/core/ReduceFnTester.java | 3 +- ...DirectGBKIntoKeyedWorkItemsOverrideFactory.java | 6 +- .../direct/DirectGroupByKeyOverrideFactory.java| 6 +- .../direct/ExecutorServiceParallelExecutor.java| 3 +- .../direct/GroupAlsoByWindowEvaluatorFactory.java | 9 +- .../beam/runners/direct/MultiStepCombine.java | 15 ++- .../apache/beam/runners/direct/ParDoEvaluator.java | 4 +- .../runners/direct/ParDoMultiOverrideFactory.java | 9 +- .../SplittableProcessElementsEvaluatorFactory.java | 3 +- .../direct/StatefulParDoEvaluatorFactory.java | 9 +- .../beam/runners/direct/ViewOverrideFactory.java | 6 +- .../runners/direct/WriteWithShardingFactory.java | 6 +- .../portable/ExecutorServiceParallelExecutor.java | 3 +- .../runners/direct/portable/ReferenceRunner.java | 3 +- .../portable/job/ReferenceRunnerJobServer.java | 7 +- .../beam/runners/direct/DirectRunnerTest.java | 3 +- .../direct/StatefulParDoEvaluatorFactoryTest.java | 6 +- .../direct/WriteWithShardingFactoryTest.java | 3 +- .../portable/RemoteStageEvaluatorFactoryTest.java | 8 +- .../extensions/metrics/MetricsGraphiteSink.java| 30 +++--- .../runners/flink/CreateStreamingFlinkView.java| 6 +- .../FlinkBatchPortablePipelineTranslator.java | 2 +- .../flink/FlinkBatchTransformTranslators.java | 6 +- .../flink/FlinkBatchTranslationContext.java| 5 +- .../runners/flink/FlinkExecutionEnvironments.java | 6 +- .../beam/runners/flink/FlinkJobInvocation.java | 3 +- .../beam/runners/flink/FlinkJobServerDriver.java | 33 +++--- .../flink/FlinkStreamingPipelineTranslator.java| 6 +- .../FlinkStreamingPortablePipelineTranslator.java | 5 +- .../flink/FlinkStreamingTransformTranslators.java | 3 +- .../flink/FlinkStreamingTranslationContext.java| 5 +- .../flink/PipelineTranslationModeOptimizer.java| 5 +- .../FlinkDefaultExecutableStageContext.java| 3 +- .../functions/FlinkExecutableStageFunction.java| 2 +- .../functions/SideInputInitializer.java| 4 +- .../streaming/ExecutableStageDoFnOperator.java | 11 +- .../streaming/io/UnboundedSourceWrapperTest.java | 7 +- .../dataflow/BatchStatefulParDoOverrides.java | 15 ++- .../dataflow/DataflowPipelineTranslator.java | 15 +--
[beam] branch master updated (1891410 -> b83b302)
This is an automated email from the ASF dual-hosted git repository. reuvenlax pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 1891410 Merge pull request #7519 from tvalentyn/pip_retries add 2662f2b Apply spotless. new b83b302 Merge pull request #7523: Apply spotless across Beam The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../apache/beam/examples/WindowedWordCount.java| 3 +- .../apache/beam/examples/common/ExampleUtils.java | 6 +- .../beam/examples/cookbook/TriggerExample.java | 25 +++-- .../examples/complete/game/LeaderBoardTest.java| 12 ++- .../apache/beam/runners/apex/TestApexRunner.java | 2 +- .../runners/apex/translation/ParDoTranslator.java | 12 +-- .../translation/operators/ApexParDoOperator.java | 2 +- .../beam/runners/apex/ApexYarnLauncherTest.java| 4 +- .../core/construction/CombineTranslation.java | 3 +- .../CreatePCollectionViewTranslation.java | 3 +- .../construction/ExecutableStageTranslation.java | 5 +- .../core/construction/ParDoTranslation.java| 5 +- .../construction/PipelineOptionsTranslation.java | 2 +- .../core/construction/PipelineResources.java | 3 +- .../core/construction/PipelineTranslation.java | 8 +- .../construction/SplittableParDoNaiveBounded.java | 6 +- .../UnboundedReadFromBoundedSource.java| 3 +- .../core/construction/WriteFilesTranslation.java | 15 ++- .../core/construction/graph/ExecutableStage.java | 23 ++--- .../core/construction/graph/FusedPipeline.java | 3 +- .../graph/GreedyPCollectionFusers.java | 6 +- .../construction/graph/GreedyPipelineFuser.java| 42 ++-- .../graph/ImmutableExecutableStage.java| 3 +- .../runners/core/construction/graph/Networks.java | 9 +- .../construction/graph/OutputDeduplicator.java | 7 +- .../core/construction/graph/QueryablePipeline.java | 53 -- .../ExecutableStageTranslationTest.java| 4 +- .../SingleInputOutputOverrideFactoryTest.java | 6 +- .../construction/graph/ExecutableStageMatcher.java | 9 +- .../graph/GreedyPipelineFuserTest.java | 15 +-- .../construction/graph/GreedyStageFuserTest.java | 4 +- .../core/construction/graph/NetworksTest.java | 8 +- .../construction/graph/OutputDeduplicatorTest.java | 24 + .../construction/graph/QueryablePipelineTest.java | 5 +- .../runners/core/LateDataDroppingDoFnRunner.java | 4 +- .../core/SplittableParDoViaKeyedWorkItems.java | 6 +- .../apache/beam/runners/core/ReduceFnTester.java | 3 +- ...DirectGBKIntoKeyedWorkItemsOverrideFactory.java | 6 +- .../direct/DirectGroupByKeyOverrideFactory.java| 6 +- .../direct/ExecutorServiceParallelExecutor.java| 3 +- .../direct/GroupAlsoByWindowEvaluatorFactory.java | 9 +- .../beam/runners/direct/MultiStepCombine.java | 15 ++- .../apache/beam/runners/direct/ParDoEvaluator.java | 4 +- .../runners/direct/ParDoMultiOverrideFactory.java | 9 +- .../SplittableProcessElementsEvaluatorFactory.java | 3 +- .../direct/StatefulParDoEvaluatorFactory.java | 9 +- .../beam/runners/direct/ViewOverrideFactory.java | 6 +- .../runners/direct/WriteWithShardingFactory.java | 6 +- .../portable/ExecutorServiceParallelExecutor.java | 3 +- .../runners/direct/portable/ReferenceRunner.java | 3 +- .../portable/job/ReferenceRunnerJobServer.java | 7 +- .../beam/runners/direct/DirectRunnerTest.java | 3 +- .../direct/StatefulParDoEvaluatorFactoryTest.java | 6 +- .../direct/WriteWithShardingFactoryTest.java | 3 +- .../portable/RemoteStageEvaluatorFactoryTest.java | 8 +- .../extensions/metrics/MetricsGraphiteSink.java| 30 +++--- .../runners/flink/CreateStreamingFlinkView.java| 6 +- .../FlinkBatchPortablePipelineTranslator.java | 2 +- .../flink/FlinkBatchTransformTranslators.java | 6 +- .../flink/FlinkBatchTranslationContext.java| 5 +- .../runners/flink/FlinkExecutionEnvironments.java | 6 +- .../beam/runners/flink/FlinkJobInvocation.java | 3 +- .../beam/runners/flink/FlinkJobServerDriver.java | 33 +++--- .../flink/FlinkStreamingPipelineTranslator.java| 6 +- .../FlinkStreamingPortablePipelineTranslator.java | 5 +- .../flink/FlinkStreamingTransformTranslators.java | 3 +- .../flink/FlinkStreamingTranslationContext.java| 5 +- .../flink/PipelineTranslationModeOptimizer.java| 5 +- .../FlinkDefaultExecutableStageContext.java| 3 +- .../functions/FlinkExecutableStageFunction.java| 2 +- .../functions/SideInputInitializer.java| 4 +-
[beam] Diff for: [GitHub] boyuanzz closed pull request #7525: [DO NOT MERGE] Run all PostCommit Tests against Release Branch
diff --git a/empty_file.txt b/empty_file.txt new file mode 100644 index ..e69de29bb2d1 With regards, Apache Git Services
[beam] Diff for: [GitHub] boyuanzz closed pull request #7524: [DO NOT MERGE] Run all PostCommit Tests against Release Branch
diff --git a/empty_file.txt b/empty_file.txt new file mode 100644 index ..e69de29bb2d1 diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index c4c721f28168..f913502ac760 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -39,7 +39,7 @@ processResources { filter org.apache.tools.ant.filters.ReplaceTokens, tokens: [ 'dataflow.legacy_environment_major_version' : '7', 'dataflow.fnapi_environment_major_version' : '7', -'dataflow.container_version' : 'beam-master-20190110' +'dataflow.container_version' : 'beam-2.10.0' ] } diff --git a/sdks/python/apache_beam/version.py b/sdks/python/apache_beam/version.py index 57512fe76c96..4e5e6791376c 100644 --- a/sdks/python/apache_beam/version.py +++ b/sdks/python/apache_beam/version.py @@ -18,4 +18,4 @@ """Apache Beam SDK version information and utilities.""" -__version__ = '2.10.0.dev' +__version__ = '2.10.0' With regards, Apache Git Services
[beam] branch master updated (5d00f6b -> 1891410)
This is an automated email from the ASF dual-hosted git repository. altay pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 5d00f6b Merge pull request #7415 from angoenka/grpc_keep_alive add 3ecda6b Increase the amount of pip download retries to 10 from a default 5 new 1891410 Merge pull request #7519 from tvalentyn/pip_retries The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 4 ++-- sdks/python/tox.ini | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
[beam] Diff for: [GitHub] aaltay merged pull request #7519: [BEAM-6318] Increase the amount of pip download retries to 10 from a default 5
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 4b45b17b8126..d7c73b4a6e3b 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -1530,7 +1530,7 @@ class BeamModulePlugin implements Plugin { project.exec { commandLine 'virtualenv', "${project.ext.envdir}" } project.exec { executable 'sh' -args '-c', ". ${project.ext.envdir}/bin/activate && pip install --upgrade tox==3.0.0 grpcio-tools==1.3.5" +args '-c', ". ${project.ext.envdir}/bin/activate && pip install --retries 10 --upgrade tox==3.0.0 grpcio-tools==1.3.5" } } // Gradle will delete outputs whenever it thinks they are stale. Putting a @@ -1562,7 +1562,7 @@ class BeamModulePlugin implements Plugin { doLast { project.exec { executable 'sh' -args '-c', ". ${project.ext.envdir}/bin/activate && pip install -e ${project.ext.pythonRootDir}/[gcp,test]" +args '-c', ". ${project.ext.envdir}/bin/activate && pip install --retries 10 -e ${project.ext.pythonRootDir}/[gcp,test]" } } } diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 77ad5e537c77..4f1d7023faec 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -41,7 +41,7 @@ deps = # Otherwise we get "OSError: [Errno 2] No such file or directory" errors. # Source: # https://github.com/tox-dev/tox/issues/123#issuecomment-284714629 -install_command = {envbindir}/python {envbindir}/pip install --process-dependency-links {opts} {packages} +install_command = {envbindir}/python {envbindir}/pip install --retries 10 --process-dependency-links {opts} {packages} list_dependencies_command = {envbindir}/python {envbindir}/pip freeze [testenv:py27] With regards, Apache Git Services
[beam] 01/01: Merge pull request #7519 from tvalentyn/pip_retries
This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 189141051eca74e5dc5428a1958092453e754727 Merge: 5d00f6b 3ecda6b Author: Ahmet Altay AuthorDate: Tue Jan 15 16:17:44 2019 -0800 Merge pull request #7519 from tvalentyn/pip_retries [BEAM-6318] Increase the amount of pip download retries to 10 from a default 5 .../src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy| 4 ++-- sdks/python/tox.ini | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
[beam] Diff for: [GitHub] angoenka merged pull request #7415: [BEAM-6258] Set grpc keep alive on server creation
diff --git a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/ServerFactory.java b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/ServerFactory.java index 5970f85c1745..38cae631a975 100644 --- a/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/ServerFactory.java +++ b/runners/java-fn-execution/src/main/java/org/apache/beam/runners/fnexecution/ServerFactory.java @@ -25,6 +25,7 @@ import java.net.InetSocketAddress; import java.net.SocketAddress; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.apache.beam.model.pipeline.v1.Endpoints; import org.apache.beam.sdk.fn.channel.SocketAddressFactory; @@ -42,6 +43,9 @@ /** A {@link Server gRPC server} factory. */ public abstract class ServerFactory { + + private static final int KEEP_ALIVE_TIME_SEC = 20; + /** Create a default {@link InetSocketAddressServerFactory}. */ public static ServerFactory createDefault() { return new InetSocketAddressServerFactory(UrlFactory.createDefault()); @@ -144,7 +148,8 @@ private static Server createServer(List services, InetSocketAdd NettyServerBuilder.forPort(socket.getPort()) // Set the message size to max value here. The actual size is governed by the // buffer size in the layers above. - .maxMessageSize(Integer.MAX_VALUE); + .maxMessageSize(Integer.MAX_VALUE) + .permitKeepAliveTime(KEEP_ALIVE_TIME_SEC, TimeUnit.SECONDS); services .stream() .forEach( @@ -200,7 +205,8 @@ private static Server createServer( .channelType(EpollServerDomainSocketChannel.class) .workerEventLoopGroup(new EpollEventLoopGroup()) .bossEventLoopGroup(new EpollEventLoopGroup()) - .maxMessageSize(Integer.MAX_VALUE); + .maxMessageSize(Integer.MAX_VALUE) + .permitKeepAliveTime(KEEP_ALIVE_TIME_SEC, TimeUnit.SECONDS); for (BindableService service : services) { // Wrap the service to extract headers builder.addService( @@ -249,7 +255,8 @@ private static Server createServer(List services, InetSocketAdd .channelType(EpollServerSocketChannel.class) .workerEventLoopGroup(new EpollEventLoopGroup()) .bossEventLoopGroup(new EpollEventLoopGroup()) - .maxMessageSize(Integer.MAX_VALUE); + .maxMessageSize(Integer.MAX_VALUE) + .permitKeepAliveTime(KEEP_ALIVE_TIME_SEC, TimeUnit.SECONDS); for (BindableService service : services) { // Wrap the service to extract headers builder.addService( diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py index a39a996478d6..1272b0e31b52 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py @@ -61,6 +61,7 @@ from apache_beam.runners.worker import bundle_processor from apache_beam.runners.worker import data_plane from apache_beam.runners.worker import sdk_worker +from apache_beam.runners.worker.channel_factory import GRPCChannelFactory from apache_beam.transforms import trigger from apache_beam.transforms.window import GlobalWindows from apache_beam.utils import profiler @@ -830,7 +831,8 @@ def __init__(self, external_payload, state): def start_worker(self): stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub( -grpc.insecure_channel(self._external_payload.endpoint.url)) +GRPCChannelFactory.insecure_channel( +self._external_payload.endpoint.url)) response = stub.NotifyRunnerAvailable( beam_fn_api_pb2.NotifyRunnerAvailableRequest( control_endpoint=endpoints_pb2.ApiServiceDescriptor( diff --git a/sdks/python/apache_beam/runners/portability/portable_runner.py b/sdks/python/apache_beam/runners/portability/portable_runner.py index f6108ed030d0..d2bf31b2c61c 100644 --- a/sdks/python/apache_beam/runners/portability/portable_runner.py +++ b/sdks/python/apache_beam/runners/portability/portable_runner.py @@ -44,6 +44,7 @@ from apache_beam.runners.portability.job_server import DockerizedJobServer from apache_beam.runners.worker import sdk_worker from apache_beam.runners.worker import sdk_worker_main +from apache_beam.runners.worker.channel_factory import GRPCChannelFactory __all__ = ['PortableRunner'] @@ -188,7 +189,7 @@ def run_pipeline(self, pipeline, options): for k, v in options.get_all_options().items() if v is not None} -channel = grpc.insecure_channel(job_endpoint) +channel = GRPCChannelFactory.insecure_channel(job_endpoint) grpc.channel_ready_future(channel).result() job_service = beam_job_api_pb2_grpc.JobServiceStub(channel) @@ -212,7 +213,8 @@ def
[beam] 01/01: Merge pull request #7415 from angoenka/grpc_keep_alive
This is an automated email from the ASF dual-hosted git repository. goenka pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 5d00f6b42304815944dfe46ec7a2d95df510ea44 Merge: 0e416e2 d2543a2 Author: Ankur AuthorDate: Tue Jan 15 16:06:40 2019 -0800 Merge pull request #7415 from angoenka/grpc_keep_alive [BEAM-6258] Set grpc keep alive on server creation .../beam/runners/fnexecution/ServerFactory.java| 13 +-- .../runners/portability/fn_api_runner.py | 4 +- .../runners/portability/portable_runner.py | 6 ++- .../runners/portability/portable_runner_test.py| 3 +- .../apache_beam/runners/worker/channel_factory.py | 44 ++ .../apache_beam/runners/worker/data_plane.py | 6 ++- .../apache_beam/runners/worker/log_handler.py | 3 +- .../apache_beam/runners/worker/sdk_worker.py | 9 +++-- 8 files changed, 75 insertions(+), 13 deletions(-)
[beam] branch master updated (0e416e2 -> 5d00f6b)
This is an automated email from the ASF dual-hosted git repository. goenka pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 0e416e2 Merge pull request #7503 from RobbeSneyders/tfrecordio add d2543a2 [BEAM-6258] Set keep alive ping to 20sec new 5d00f6b Merge pull request #7415 from angoenka/grpc_keep_alive The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../beam/runners/fnexecution/ServerFactory.java| 13 -- .../runners/portability/fn_api_runner.py | 4 +- .../runners/portability/portable_runner.py | 6 ++- .../runners/portability/portable_runner_test.py| 3 +- .../worker/channel_factory.py} | 46 +++--- .../apache_beam/runners/worker/data_plane.py | 6 ++- .../apache_beam/runners/worker/log_handler.py | 3 +- .../apache_beam/runners/worker/sdk_worker.py | 9 +++-- 8 files changed, 53 insertions(+), 37 deletions(-) copy sdks/python/apache_beam/{coders/observable.py => runners/worker/channel_factory.py} (55%)
[beam] 01/01: Merge pull request #7503 from RobbeSneyders/tfrecordio
This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 0e416e2ff3bdae1932156a5ae2aca9a398a33fa4 Merge: b6e838d 166e98b Author: Ahmet Altay AuthorDate: Tue Jan 15 15:36:38 2019 -0800 Merge pull request #7503 from RobbeSneyders/tfrecordio [BEAM-5315] Python 3 port io.tfrecordio module sdks/python/apache_beam/io/tfrecordio_test.py | 88 +++ sdks/python/tox.ini | 2 +- 2 files changed, 38 insertions(+), 52 deletions(-)
[beam] branch master updated (b6e838d -> 0e416e2)
This is an automated email from the ASF dual-hosted git repository. altay pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from b6e838d Merge pull request #7514: [BEAM-6439] Move Python Validates Runner Flink test to PreCommit add 166e98b Python 3 port io.tfrecordio module new 0e416e2 Merge pull request #7503 from RobbeSneyders/tfrecordio The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: sdks/python/apache_beam/io/tfrecordio_test.py | 88 +++ sdks/python/tox.ini | 2 +- 2 files changed, 38 insertions(+), 52 deletions(-)
[beam] Diff for: [GitHub] aaltay merged pull request #7503: [BEAM-5315] Python 3 port io.tfrecordio module
diff --git a/sdks/python/apache_beam/io/tfrecordio_test.py b/sdks/python/apache_beam/io/tfrecordio_test.py index e85842436b22..49956ea6f3a6 100644 --- a/sdks/python/apache_beam/io/tfrecordio_test.py +++ b/sdks/python/apache_beam/io/tfrecordio_test.py @@ -59,15 +59,15 @@ # >>> import tensorflow as tf # >>> import base64 # >>> writer = tf.python_io.TFRecordWriter('/tmp/python_foo.tfrecord') -# >>> writer.write('foo') +# >>> writer.write(b'foo') # >>> writer.close() # >>> with open('/tmp/python_foo.tfrecord', 'rb') as f: # ... data = base64.b64encode(f.read()) # ... print(data) -FOO_RECORD_BASE64 = 'AwCwmUkOZm9vYYq+/g==' +FOO_RECORD_BASE64 = b'AwCwmUkOZm9vYYq+/g==' -# Same as above but containing two records ['foo', 'bar'] -FOO_BAR_RECORD_BASE64 = 'AwCwmUkOZm9vYYq+/gMAsJlJDmJhckYA5cg=' +# Same as above but containing two records [b'foo', b'bar'] +FOO_BAR_RECORD_BASE64 = b'AwCwmUkOZm9vYYq+/gMAsJlJDmJhckYA5cg=' def _write_file(path, base64_records): @@ -95,42 +95,46 @@ def _as_file_handle(self, contents): def _increment_value_at_index(self, value, index): l = list(value) -l[index] = bytes(ord(l[index]) + 1) -return "".join(l) +if sys.version_info[0] <= 2: + l[index] = bytes(ord(l[index]) + 1) + return b"".join(l) +else: + l[index] = l[index] + 1 + return bytes(l) def _test_error(self, record, error_text): with self.assertRaisesRegexp(ValueError, re.escape(error_text)): _TFRecordUtil.read_record(self._as_file_handle(record)) def test_masked_crc32c(self): -self.assertEqual(0xfd7fffa, _TFRecordUtil._masked_crc32c('\x00' * 32)) -self.assertEqual(0xf909b029, _TFRecordUtil._masked_crc32c('\xff' * 32)) -self.assertEqual(0xfebe8a61, _TFRecordUtil._masked_crc32c('foo')) +self.assertEqual(0xfd7fffa, _TFRecordUtil._masked_crc32c(b'\x00' * 32)) +self.assertEqual(0xf909b029, _TFRecordUtil._masked_crc32c(b'\xff' * 32)) +self.assertEqual(0xfebe8a61, _TFRecordUtil._masked_crc32c(b'foo')) self.assertEqual( 0xe4999b0, -_TFRecordUtil._masked_crc32c('\x03\x00\x00\x00\x00\x00\x00\x00')) +_TFRecordUtil._masked_crc32c(b'\x03\x00\x00\x00\x00\x00\x00\x00')) def test_masked_crc32c_crcmod(self): crc32c_fn = crcmod.predefined.mkPredefinedCrcFun('crc-32c') self.assertEqual( 0xfd7fffa, _TFRecordUtil._masked_crc32c( -'\x00' * 32, crc32c_fn=crc32c_fn)) +b'\x00' * 32, crc32c_fn=crc32c_fn)) self.assertEqual( 0xf909b029, _TFRecordUtil._masked_crc32c( -'\xff' * 32, crc32c_fn=crc32c_fn)) +b'\xff' * 32, crc32c_fn=crc32c_fn)) self.assertEqual( 0xfebe8a61, _TFRecordUtil._masked_crc32c( -'foo', crc32c_fn=crc32c_fn)) +b'foo', crc32c_fn=crc32c_fn)) self.assertEqual( 0xe4999b0, _TFRecordUtil._masked_crc32c( -'\x03\x00\x00\x00\x00\x00\x00\x00', crc32c_fn=crc32c_fn)) +b'\x03\x00\x00\x00\x00\x00\x00\x00', crc32c_fn=crc32c_fn)) def test_write_record(self): file_handle = io.BytesIO() -_TFRecordUtil.write_record(file_handle, 'foo') +_TFRecordUtil.write_record(file_handle, b'foo') self.assertEqual(self.record, file_handle.getvalue()) def test_read_record(self): @@ -138,7 +142,7 @@ def test_read_record(self): self.assertEqual(b'foo', actual) def test_read_record_invalid_record(self): -self._test_error('bar', 'Not a valid TFRecord. Fewer than 12 bytes') +self._test_error(b'bar', 'Not a valid TFRecord. Fewer than 12 bytes') def test_read_record_invalid_length_mask(self): record = self._increment_value_at_index(self.record, 9) @@ -149,7 +153,7 @@ def test_read_record_invalid_data_mask(self): self._test_error(record, 'Mismatch of data mask') def test_compatibility_read_write(self): -for record in ['', 'blah', 'another blah']: +for record in [b'', b'blah', b'another blah']: file_handle = io.BytesIO() _TFRecordUtil.write_record(file_handle, record) file_handle.seek(0) @@ -176,9 +180,9 @@ def test_write_record_single(self): num_shards=0, shard_name_template=None, compression_type=CompressionTypes.UNCOMPRESSED) - self._write_lines(sink, path, ['foo']) + self._write_lines(sink, path, [b'foo']) - with open(path, 'r') as f: + with open(path, 'rb') as f: self.assertEqual(f.read(), record) def test_write_record_multiple(self): @@ -192,9 +196,9 @@ def test_write_record_multiple(self): num_shards=0, shard_name_template=None, compression_type=CompressionTypes.UNCOMPRESSED) - self._write_lines(sink, path, ['foo', 'bar']) + self._write_lines(sink, path, [b'foo', b'bar']) - with open(path, 'r') as f: + with open(path, 'rb') as f: self.assertEqual(f.read(), record) @@ -247,7 +251,7
[beam] 01/01: Merge pull request #7514: [BEAM-6439] Move Python Validates Runner Flink test to PreCommit
This is an automated email from the ASF dual-hosted git repository. mxm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit b6e838d9af07825f09b99e8cbc98b1b3a89e4944 Merge: feece91 4358d07 Author: Maximilian Michels AuthorDate: Tue Jan 15 18:06:16 2019 -0500 Merge pull request #7514: [BEAM-6439] Move Python Validates Runner Flink test to PreCommit ..._PreCommit_Python_ValidatesRunner_Flink.groovy} | 33 +- 1 file changed, 13 insertions(+), 20 deletions(-)
[beam] branch master updated (feece91 -> b6e838d)
This is an automated email from the ASF dual-hosted git repository. mxm pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from feece91 Merge pull request #7491 from akedin/multiple-schemas-support add 4358d07 Move Python Validates Runner Flink test to PreCommit. new b6e838d Merge pull request #7514: [BEAM-6439] Move Python Validates Runner Flink test to PreCommit The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: ..._PostCommit_Python_ValidatesRunner_Flink.groovy | 40 -- ..._PreCommit_Python_ValidatesRunner_Flink.groovy} | 5 +-- 2 files changed, 3 insertions(+), 42 deletions(-) delete mode 100644 .test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Flink.groovy copy .test-infra/jenkins/{job_PreCommit_Portable_Python.groovy => job_PreCommit_Python_ValidatesRunner_Flink.groovy} (85%)
[beam] Diff for: [GitHub] mxm merged pull request #7514: [BEAM-6439] Move Python Validates Runner Flink test to PreCommit.
diff --git a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Flink.groovy b/.test-infra/jenkins/job_PreCommit_Python_ValidatesRunner_Flink.groovy similarity index 55% rename from .test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Flink.groovy rename to .test-infra/jenkins/job_PreCommit_Python_ValidatesRunner_Flink.groovy index f6c42157047b..4ef5d586ca4f 100644 --- a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Flink.groovy +++ b/.test-infra/jenkins/job_PreCommit_Python_ValidatesRunner_Flink.groovy @@ -16,25 +16,18 @@ * limitations under the License. */ -import CommonJobProperties as commonJobProperties -import PostcommitJobBuilder +import PrecommitJobBuilder // This job runs the suite of ValidatesRunner tests against the Flink runner. -PostcommitJobBuilder.postCommitJob('beam_PostCommit_Python_VR_Flink', - 'Run Python Flink ValidatesRunner', 'Python Flink ValidatesRunner Tests', this) { - description('Runs the Python ValidatesRunner suite on the Flink runner.') - - previousNames('beam_PostCommit_Python_PVR_Flink_Gradle') - - // Set common parameters. - commonJobProperties.setTopLevelMainJobProperties(delegate) - - // Execute gradle task to test Python Flink Portable Runner. - steps { -gradle { - rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks(':beam-sdks-python:flinkValidatesRunner') - commonJobProperties.setGradleSwitches(delegate) -} - } -} +PrecommitJobBuilder builder = new PrecommitJobBuilder( +scope: this, +nameBase: 'Python_ValidatesRunner_Flink', +gradleTask: ':beam-sdks-python:flinkValidatesRunner', +triggerPathPatterns: [ + '^model/.*$', + '^runners/.*$', + '^sdks/python/.*$', + '^release/.*$', +] +) +builder.build {} With regards, Apache Git Services
[beam] branch release-2.10.0 updated (6cd27b0 -> 343e662)
This is an automated email from the ASF dual-hosted git repository. kenn pushed a change to branch release-2.10.0 in repository https://gitbox.apache.org/repos/asf/beam.git. discard 6cd27b0 [BEAM-6418] Execute Flink tests serially to avoid memory issues omit de94544 [BEAM-6294] Ensure input and output coders are equal for reshuffle transforms. omit 629eaf9 Merge pull request #7464: [BEAM-6382] Cherry pick pr #7443 into 2.10.0 release branch omit 57af122 [BEAM-6382]: Fix styling issues omit 19bb897 [BEAM-6248] Add Flink v1.7 build target to Flink Runner (#7300) omit ae38b89 [BEAM-6326] Fix portable stateful processing with side input omit 5e398fe [BEAM-6382] SamzaRunner: add an option to read configs using a user-defined factory omit 93a4cb5 Merge pull request #7419: [BEAM-6337] Cherry-pick #7417 to release-2.10.0: Fix JdbcDriver breaking DriverManager#getConnection omit 500b25f [BEAM-6367] Fix JdbcDriver breaking DriverManager#getConnection add 3b8abca Upgrade vendored gRPC artifact version to 0.2 add 15aa88d Merge pull request #7328: [BEAM-6056] Upgrade vendored gRPC artifact version to 0.2 add 14781c7 [BEAM-6056] Source vendored grpc dependency from Maven central add a25b64d Merge pull request #7388: [BEAM-6056] Source vendored grpc dependency from Maven central add 095870f Python 3 port io.range_trackers add 359ddb9 Add io.restriction_trackers_test to Python 3 test suite add bca5c60 Merge pull request #7358 from RobbeSneyders/trackers add 5ce0933 Updates release validating to run LeaderBoard example using Dataflow Streaming Engine add 5dd597e Merge pull request #7365: [BEAM-6249] Adds an Streaming Engine based test to release validation add 5cdf3a7 [BEAM-5315] Python 3 port io.source* and io.concat_source* modules (#7383) add 4b039e4 [BEAM-5315] Python 3 port io.filebased_* modules (#7386) add fc482f1 [BEAM-5959] Add performance testing for writing many files add 41dd6e1 Merge pull request #7266 from udim/cmek-perf add a24b1af Move org.apache.beam.runners.samza.util.Base64Serializer to org.apache.beam.runners.core.serialization.Base64Serializer to be used by other runners add 3b8ae00 Fix visibility of deserialize method add 0783779 Add missing package-info add 4660895 Merge pull request #7384 from echauchot/Base64Serializer add c4590a0 split SerializablePipelineOptions into serialization utils and instance class. add 5130bcb Merge pull request #7385 from echauchot/exposeSerializationSerializablePipelineOptions add a404cee Add paddedcell fix to spotlessJava rules. add c148c35 Merge pull request #7390: [BEAM-6339] Add paddedcell fix to spotlessJava rules. add c028ebc Upgrade html-proofer and dependencies to latest add 07c279a Remove broken links to datatorrent.com add b09e721 Fix pydoc link to GoogleCloudOptions add fd5e321 Remove broken link to atrato.io add a79ef89 Fix link to internal anchor add 5466ac0 Remove stale exclusions from HTML link checker. add a2986cc Merge pull request #7393: [BEAM-5662] Clean up website html-proofer config add b02f79f Disable UsesMetricsPusher tests for direct-runner add f74c979 Fix SplittableDoFnTest#testBoundedness add 459e730 [BEAM-6352] Ignore tests using Watch PTransform add 26c73ef [BEAM-6353] Fix TFRecordIOTest add 92a6c23 [BEAM-6354] Add timeout and ignore hanging tests add 55ffd97 Add :beam-runners-direct-java:needsRunnerTests to javaPreCommit add c591727 Merge pull request #7374: Add :beam-runners-direct-java:needsRunnerTests to javaPreCommit add 3948595 [BEAM-5959] Reorder methods according to convention add 5716dba Merge pull request #7403 from udim/cmek-perf add 5212b71 [BEAM-6030] Split metrics related options out of PipelineOptions add 185cb1a [BEAM-6030] Add Experimental label on MetricsOptions add bd80118 Merge pull request #7400 from echauchot/BEAM-6030-metrics-sinks-pipelineOptions add ea0b8d5 Upgrade Gradle to 4.10.3 add 20abb3e Merge pull request #7401: Upgrade Gradle to 4.10.3 add 8462a8b BEAM-4110 fix and reenable ResourceIdTester.runResourceIdBattery add 3670b18 BEAM-4110 fixed deprecation warnings and cleanup add e832e08 BEAM-4143 reenable sickbayed test, cleanup add b4d2d8e Merge pull request #7412: [BEAM-4143] GcsResourceIdTest has had a masked failure add a1d938e [BEAM-6367] Fix JdbcDriver breaking DriverManager#getConnection add 4b5b1a0 Merge pull request #7417 from kanterov/kanterov_fix_beam_driver add 1c76927 BQ Geography support for Java SDK add 3739683 Merge pull request #7389 from pabloem/bq-jeo add d19585a [BEAM-4030] Add compact attribute to combiners (#7405) add 8a7f971 [BEAM-6362] remove --info from gradle invocations add f22d12d Reduce days to keep
[beam] 01/01: Prepare for 2.10.0 release
This is an automated email from the ASF dual-hosted git repository. kenn pushed a commit to branch release-2.10.0 in repository https://gitbox.apache.org/repos/asf/beam.git commit 343e6629b0830509a1123484ac766207fdac4328 Author: Kenneth Knowles AuthorDate: Tue Jan 15 12:08:00 2019 -0800 Prepare for 2.10.0 release --- runners/google-cloud-dataflow-java/build.gradle | 2 +- sdks/python/apache_beam/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index c4c721f..f913502 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -39,7 +39,7 @@ processResources { filter org.apache.tools.ant.filters.ReplaceTokens, tokens: [ 'dataflow.legacy_environment_major_version' : '7', 'dataflow.fnapi_environment_major_version' : '7', -'dataflow.container_version' : 'beam-master-20190110' +'dataflow.container_version' : 'beam-2.10.0' ] } diff --git a/sdks/python/apache_beam/version.py b/sdks/python/apache_beam/version.py index 57512fe..4e5e679 100644 --- a/sdks/python/apache_beam/version.py +++ b/sdks/python/apache_beam/version.py @@ -18,4 +18,4 @@ """Apache Beam SDK version information and utilities.""" -__version__ = '2.10.0.dev' +__version__ = '2.10.0'
[beam] Diff for: [GitHub] akedin merged pull request #7491: [SQL] Multiple schemas support
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/SqlTransform.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/SqlTransform.java index 5fd08806c525..f5613c9b56bf 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/SqlTransform.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/SqlTransform.java @@ -19,12 +19,15 @@ import com.google.auto.value.AutoValue; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import javax.annotation.Nullable; import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv; import org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils; import org.apache.beam.sdk.extensions.sql.impl.schema.BeamPCollectionTable; +import org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider; import org.apache.beam.sdk.transforms.Combine; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.SerializableFunction; @@ -86,9 +89,17 @@ abstract boolean autoUdfUdafLoad(); + abstract Map tableProviderMap(); + + abstract @Nullable String defaultTableProvider(); + @Override public PCollection expand(PInput input) { BeamSqlEnv sqlEnv = BeamSqlEnv.readOnly(PCOLLECTION_NAME, toTableMap(input)); +tableProviderMap().forEach(sqlEnv::addSchema); +if (defaultTableProvider() != null) { + sqlEnv.setCurrentSchema(defaultTableProvider()); +} // TODO: validate duplicate functions. sqlEnv.loadBeamBuiltinFunctions(); @@ -154,10 +165,21 @@ public static SqlTransform query(String queryString) { .setQueryString(queryString) .setUdafDefinitions(Collections.emptyList()) .setUdfDefinitions(Collections.emptyList()) +.setTableProviderMap(Collections.emptyMap()) .setAutoUdfUdafLoad(false) .build(); } + public SqlTransform withTableProvider(String name, TableProvider tableProvider) { +Map map = new HashMap<>(tableProviderMap()); +map.put(name, tableProvider); +return toBuilder().setTableProviderMap(ImmutableMap.copyOf(map)).build(); + } + + public SqlTransform withDefaultTableProvider(String name, TableProvider tableProvider) { +return withTableProvider(name, tableProvider).toBuilder().setDefaultTableProvider(name).build(); + } + public SqlTransform withAutoUdfUdafLoad(boolean autoUdfUdafLoad) { return toBuilder().setAutoUdfUdafLoad(autoUdfUdafLoad).build(); } @@ -215,6 +237,10 @@ static Builder builder() { abstract Builder setAutoUdfUdafLoad(boolean autoUdfUdafLoad); +abstract Builder setTableProviderMap(Map tableProviderMap); + +abstract Builder setDefaultTableProvider(@Nullable String defaultTableProvider); + abstract SqlTransform build(); } diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/BeamQueryPlanner.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/BeamQueryPlanner.java index 65b2f1556555..83993600f941 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/BeamQueryPlanner.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/BeamQueryPlanner.java @@ -54,9 +54,13 @@ class BeamQueryPlanner { private static final Logger LOG = LoggerFactory.getLogger(BeamQueryPlanner.class); - private final FrameworkConfig config; + private JdbcConnection connection; BeamQueryPlanner(JdbcConnection connection) { +this.connection = connection; + } + + public FrameworkConfig config() { final CalciteConnectionConfig config = connection.config(); final SqlParser.ConfigBuilder parserConfig = SqlParser.configBuilder() @@ -85,17 +89,16 @@ final SqlOperatorTable opTab0 = connection.config().fun(SqlOperatorTable.class, SqlStdOperatorTable.instance()); -this.config = -Frameworks.newConfigBuilder() -.parserConfig(parserConfig.build()) -.defaultSchema(defaultSchema) -.traitDefs(traitDefs) -.context(Contexts.of(connection.config())) -.ruleSets(BeamRuleSets.getRuleSets()) -.costFactory(null) -.typeSystem(connection.getTypeFactory().getTypeSystem()) -.operatorTable(ChainedSqlOperatorTable.of(opTab0, catalogReader)) -.build(); +return Frameworks.newConfigBuilder() +.parserConfig(parserConfig.build()) +.defaultSchema(defaultSchema) +.traitDefs(traitDefs) +.context(Contexts.of(connection.config())) +.ruleSets(BeamRuleSets.getRuleSets()) +.costFactory(null) +.typeSystem(connection.getTypeFactory().getTypeSystem()) +.operatorTable(ChainedSqlOperatorTable.of(opTab0, catalogReader)) +.build();
[beam] 01/01: Merge pull request #7491 from akedin/multiple-schemas-support
This is an automated email from the ASF dual-hosted git repository. anton pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit feece918d2a938efd3fed22e7ce4e127953f662d Merge: ff70ca1 de08064 Author: Anton Kedin <33067037+ake...@users.noreply.github.com> AuthorDate: Tue Jan 15 12:11:19 2019 -0800 Merge pull request #7491 from akedin/multiple-schemas-support [SQL] Multiple schemas support .../beam/sdk/extensions/sql/SqlTransform.java | 26 +++ .../sdk/extensions/sql/impl/BeamQueryPlanner.java | 29 +-- .../beam/sdk/extensions/sql/impl/BeamSqlEnv.java | 13 ++ .../sdk/extensions/sql/impl/JdbcConnection.java| 1 + .../extensions/sql/BeamSqlMultipleSchemasTest.java | 253 + 5 files changed, 309 insertions(+), 13 deletions(-)
[beam] branch master updated (ff70ca1 -> feece91)
This is an automated email from the ASF dual-hosted git repository. anton pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from ff70ca1 Moving to 2.11.0-SNAPSHOT on master branch add 9ff8001 [SQL] Support multiple top-level schemas in SqlTransform add de08064 [SQL] Support changing the default schema in SqlTransform new feece91 Merge pull request #7491 from akedin/multiple-schemas-support The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../beam/sdk/extensions/sql/SqlTransform.java | 26 +++ .../sdk/extensions/sql/impl/BeamQueryPlanner.java | 29 +-- .../beam/sdk/extensions/sql/impl/BeamSqlEnv.java | 13 ++ .../sdk/extensions/sql/impl/JdbcConnection.java| 1 + .../extensions/sql/BeamSqlMultipleSchemasTest.java | 253 + 5 files changed, 309 insertions(+), 13 deletions(-) create mode 100644 sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlMultipleSchemasTest.java
[beam] Diff for: [GitHub] kennknowles closed pull request #7516: Update version to 2.11.0-SNAPSHOT
diff --git a/gradle.properties b/gradle.properties index 1dcfd0180407..8d2d1685709e 100644 --- a/gradle.properties +++ b/gradle.properties @@ -23,4 +23,4 @@ offlineRepositoryRoot=offline-repository signing.gnupg.executable=gpg signing.gnupg.useLegacyGpg=true -version=2.10.0-SNAPSHOT +version=2.11.0-SNAPSHOT With regards, Apache Git Services
[beam] branch master updated (317a8b5 -> ff70ca1)
This is an automated email from the ASF dual-hosted git repository. kenn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 317a8b5 Merge pull request #7499: [BEAM-6425] - Replace SSLContext.getInstance("SSL") add ff70ca1 Moving to 2.11.0-SNAPSHOT on master branch No new revisions were added by this update. Summary of changes: buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- gradle.properties | 2 +- sdks/python/apache_beam/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
[beam] Diff for: [GitHub] tvalentyn closed pull request #7058: DO NOT MERGE
diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py index 87fb2c808167..dd397e5ca6f5 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py @@ -1,3 +1,4 @@ +# DO NOT MERGE # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with With regards, Apache Git Services
[beam] branch release-2.10.0 updated: [BEAM-6418] Execute Flink tests serially to avoid memory issues
This is an automated email from the ASF dual-hosted git repository. mxm pushed a commit to branch release-2.10.0 in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/release-2.10.0 by this push: new 6cd27b0 [BEAM-6418] Execute Flink tests serially to avoid memory issues 6cd27b0 is described below commit 6cd27b0928f1cae2489fa9d6bcd18b3b76b7f56d Author: Maximilian Michels AuthorDate: Mon Jan 14 21:30:35 2019 -0500 [BEAM-6418] Execute Flink tests serially to avoid memory issues We had previously disabled Flink tests for 1.6 and 1.7 due to memory issues when they executed in parallel. This lets them execute one after another. --- runners/flink/flink_runner.gradle | 7 +++ 1 file changed, 7 insertions(+) diff --git a/runners/flink/flink_runner.gradle b/runners/flink/flink_runner.gradle index 51f2e3b..d90ef2c 100644 --- a/runners/flink/flink_runner.gradle +++ b/runners/flink/flink_runner.gradle @@ -68,6 +68,13 @@ test { if (System.getProperty("beamSurefireArgline")) { jvmArgs System.getProperty("beamSurefireArgline") } + // TODO Running tests of all Flink versions in parallel can be too harsh on Jenkins memory + // Run them serially for now, to avoid "Exit code 137", i.e. Jenkins host killing the Gradle test process + if (project.name == "beam-runners-flink-1.6") { +dependsOn(":beam-runners-flink_2.11:test") + } else if (project.name == "beam-runners-flink-1.7") { +dependsOn(":beam-runners-flink-1.6:test") + } } configurations {
[beam] branch spark-runner_structured-streaming updated: Cleaning
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/spark-runner_structured-streaming by this push: new 0402d6a Cleaning 0402d6a is described below commit 0402d6a0a379b973fba68524ccaf6ab2ea061d2c Author: Etienne Chauchot AuthorDate: Tue Jan 15 17:39:27 2019 +0100 Cleaning --- .../org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java | 1 + .../spark/structuredstreaming/translation/TranslationContext.java | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java index 934c6d2..72cb524 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java @@ -114,6 +114,7 @@ public final class SparkRunner extends PipelineRunner { public SparkPipelineResult run(final Pipeline pipeline) { translationContext = translatePipeline(pipeline); //TODO initialise other services: checkpointing, metrics system, listeners, ... +//TODO pass testMode using pipelineOptions translationContext.startPipeline(true); return new SparkPipelineResult(); } diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java index 0f20663..75b470e 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java @@ -188,8 +188,7 @@ public class TranslationContext { } } else { // apply a dummy fn just to apply forech action that will trigger the pipeline run in spark -dataset.foreachPartition(t -> { -}); +dataset.foreachPartition(t -> {}); } } }
[beam] 03/04: Remove bundleSize parameter and always use spark default parallelism
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit 7bb19451dadea0259f6658c7ccc7f157fa0cd576 Author: Etienne Chauchot AuthorDate: Tue Jan 15 17:06:51 2019 +0100 Remove bundleSize parameter and always use spark default parallelism --- .../spark/structuredstreaming/SparkPipelineOptions.java| 10 -- .../translation/batch/DatasetSourceBatch.java | 5 + 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkPipelineOptions.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkPipelineOptions.java index 2e6653b..442ccf8 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkPipelineOptions.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkPipelineOptions.java @@ -73,16 +73,6 @@ public interface SparkPipelineOptions void setCheckpointDurationMillis(Long durationMillis); - @Description( - "If set bundleSize will be used for splitting BoundedSources, otherwise default to " - + "splitting BoundedSources on Spark defaultParallelism. Most effective when used with " - + "Spark dynamicAllocation.") - @Default.Long(0) - Long getBundleSize(); - - @Experimental - void setBundleSize(Long value); - @Description("Enable/disable sending aggregator values to Spark's metric sinks") @Default.Boolean(true) Boolean getEnableSparkMetricSinks(); diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java index d966efb..3f6f219 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java @@ -113,10 +113,7 @@ public class DatasetSourceBatch implements DataSourceV2, ReadSupport { List> result = new ArrayList<>(); long desiredSizeBytes; try { -desiredSizeBytes = -(sparkPipelineOptions.getBundleSize() == null) -? source.getEstimatedSizeBytes(sparkPipelineOptions) / numPartitions -: sparkPipelineOptions.getBundleSize(); +desiredSizeBytes = source.getEstimatedSizeBytes(sparkPipelineOptions) / numPartitions; List> splits = source.split(desiredSizeBytes, sparkPipelineOptions); for (BoundedSource split : splits) { result.add(
[beam] 01/04: Fix testMode output to comply with new binary schema
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit f19721f35b9249c1db712dc4c72a588105019726 Author: Etienne Chauchot AuthorDate: Tue Jan 15 16:00:55 2019 +0100 Fix testMode output to comply with new binary schema --- .../spark/structuredstreaming/translation/TranslationContext.java | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java index 33706bd..0f20663 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TranslationContext.java @@ -21,6 +21,7 @@ import com.google.common.collect.Iterables; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -180,7 +181,11 @@ public class TranslationContext { dataset.writeStream().foreach(new NoOpForeachWriter<>()).start().awaitTermination(); } else { if (testMode){ -dataset.show(); +// cannot use dataset.show because dataset schema is binary so it will print binary code. +List windowedValues = ((Dataset)dataset).collectAsList(); +for (WindowedValue windowedValue : windowedValues){ + System.out.println(windowedValue); +} } else { // apply a dummy fn just to apply forech action that will trigger the pipeline run in spark dataset.foreachPartition(t -> {
[beam] 02/04: Cleaning
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit ca88d547d54ec9e1f5831894106dce076205acbd Author: Etienne Chauchot AuthorDate: Tue Jan 15 16:42:09 2019 +0100 Cleaning --- .../spark/structuredstreaming/SparkRunner.java | 4 +- .../translation/PipelineTranslator.java| 4 +- .../translation/batch/DatasetSourceBatch.java | 2 +- .../translation/batch/PipelineTranslatorBatch.java | 5 +- .../translation/batch/TranslationContextBatch.java | 40 --- .../batch/mocks/DatasetSourceMockBatch.java| 94 --- .../batch/mocks/ReadSourceTranslatorMockBatch.java | 62 -- .../translation/batch/mocks/package-info.java | 20 .../streaming/DatasetSourceStreaming.java | 133 +++-- ...lator.java => PipelineTranslatorStreaming.java} | 6 +- .../streaming/StreamingTranslationContext.java | 29 - 11 files changed, 27 insertions(+), 372 deletions(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java index 97aa4d8..934c6d2 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkRunner.java @@ -22,7 +22,7 @@ import static org.apache.beam.runners.core.construction.PipelineResources.detect import org.apache.beam.runners.spark.structuredstreaming.translation.PipelineTranslator; import org.apache.beam.runners.spark.structuredstreaming.translation.TranslationContext; import org.apache.beam.runners.spark.structuredstreaming.translation.batch.PipelineTranslatorBatch; -import org.apache.beam.runners.spark.structuredstreaming.translation.streaming.StreamingPipelineTranslator; +import org.apache.beam.runners.spark.structuredstreaming.translation.streaming.PipelineTranslatorStreaming; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineRunner; import org.apache.beam.sdk.options.PipelineOptions; @@ -124,7 +124,7 @@ public final class SparkRunner extends PipelineRunner { PipelineTranslator.prepareFilesToStageForRemoteClusterExecution(options); PipelineTranslator pipelineTranslator = options.isStreaming() -? new StreamingPipelineTranslator(options) +? new PipelineTranslatorStreaming(options) : new PipelineTranslatorBatch(options); pipelineTranslator.translate(pipeline); return pipelineTranslator.getTranslationContext(); diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java index e0924e3..7fbbfe6 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java @@ -21,7 +21,7 @@ import org.apache.beam.runners.core.construction.PTransformTranslation; import org.apache.beam.runners.core.construction.PipelineResources; import org.apache.beam.runners.spark.structuredstreaming.SparkPipelineOptions; import org.apache.beam.runners.spark.structuredstreaming.translation.batch.PipelineTranslatorBatch; -import org.apache.beam.runners.spark.structuredstreaming.translation.streaming.StreamingPipelineTranslator; +import org.apache.beam.runners.spark.structuredstreaming.translation.streaming.PipelineTranslatorStreaming; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.runners.TransformHierarchy; import org.apache.beam.sdk.transforms.PTransform; @@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory; * {@link Pipeline.PipelineVisitor} that translates the Beam operators to their Spark counterparts. * It also does the pipeline preparation: mode detection, transforms replacement, classpath * preparation. If we have a streaming job, it is instantiated as a {@link - * StreamingPipelineTranslator}. If we have a batch job, it is instantiated as a {@link + * PipelineTranslatorStreaming}. If we have a batch job, it is instantiated as a {@link * PipelineTranslatorBatch}. */ public abstract class PipelineTranslator extends Pipeline.PipelineVisitor.Defaults { diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java
[beam] 04/04: Fix split bug
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit 707470b0469fbcf63efd985faf6185be295e7c6d Author: Etienne Chauchot AuthorDate: Tue Jan 15 17:30:29 2019 +0100 Fix split bug --- .../spark/structuredstreaming/translation/batch/DatasetSourceBatch.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java index 3f6f219..8f22bc7 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java @@ -121,7 +121,7 @@ public class DatasetSourceBatch implements DataSourceV2, ReadSupport { @Override public InputPartitionReader createPartitionReader() { - return new DatasetPartitionReader<>(source, serializablePipelineOptions); + return new DatasetPartitionReader<>(split, serializablePipelineOptions); } }); }
[beam] branch spark-runner_structured-streaming updated (f5fd012 -> 707470b)
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a change to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git. from f5fd012 Fix errorprone new f19721f Fix testMode output to comply with new binary schema new ca88d54 Cleaning new 7bb1945 Remove bundleSize parameter and always use spark default parallelism new 707470b Fix split bug The 4 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../structuredstreaming/SparkPipelineOptions.java | 10 -- .../spark/structuredstreaming/SparkRunner.java | 4 +- .../translation/PipelineTranslator.java| 4 +- .../translation/TranslationContext.java| 7 +- .../translation/batch/DatasetSourceBatch.java | 9 +- .../translation/batch/PipelineTranslatorBatch.java | 5 +- .../translation/batch/TranslationContextBatch.java | 40 --- .../batch/mocks/DatasetSourceMockBatch.java| 94 --- .../batch/mocks/ReadSourceTranslatorMockBatch.java | 62 -- .../translation/batch/mocks/package-info.java | 20 .../streaming/DatasetSourceStreaming.java | 133 +++-- ...lator.java => PipelineTranslatorStreaming.java} | 6 +- .../streaming/StreamingTranslationContext.java | 29 - 13 files changed, 35 insertions(+), 388 deletions(-) delete mode 100644 runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/TranslationContextBatch.java delete mode 100644 runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/mocks/DatasetSourceMockBatch.java delete mode 100644 runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/mocks/ReadSourceTranslatorMockBatch.java delete mode 100644 runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/mocks/package-info.java rename runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/{StreamingPipelineTranslator.java => PipelineTranslatorStreaming.java} (87%) delete mode 100644 runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/streaming/StreamingTranslationContext.java
[beam] 02/03: Comment schema choices
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit 1fe87911591940f49d2db95d3cda5a906a04fd96 Author: Etienne Chauchot AuthorDate: Tue Jan 15 13:35:52 2019 +0100 Comment schema choices --- .../structuredstreaming/translation/batch/DatasetSourceBatch.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java index c4cfeaf..2a13d98 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java @@ -96,6 +96,9 @@ public class DatasetSourceBatch implements DataSourceV2, ReadSupport { @Override public StructType readSchema() { // TODO: find a way to extend schema with a WindowedValue schema + // we use a binary schema for now because: +// using a empty schema raises a indexOutOfBoundsException +// using a NullType schema stores null in the elements StructField[] array = new StructField[1]; StructField binaryStructField = StructField .apply("binaryStructField", DataTypes.BinaryType, true, Metadata.empty());
[beam] 01/03: Serialize windowedValue to byte[] in source to be able to specify a binary dataset schema and deserialize windowedValue from Row to get a dataset
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit 7ff0a262f2ae4c57ab5e7f5e213ab17317f70a69 Author: Etienne Chauchot AuthorDate: Tue Jan 15 13:24:09 2019 +0100 Serialize windowedValue to byte[] in source to be able to specify a binary dataset schema and deserialize windowedValue from Row to get a dataset --- .../translation/batch/DatasetSourceBatch.java | 29 -- .../batch/ReadSourceTranslatorBatch.java | 9 ++- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java index d9e1722..c4cfeaf 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DatasetSourceBatch.java @@ -20,6 +20,7 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; import static com.google.common.base.Preconditions.checkArgument; import static scala.collection.JavaConversions.asScalaBuffer; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; @@ -30,6 +31,7 @@ import org.apache.beam.runners.core.serialization.Base64Serializer; import org.apache.beam.runners.spark.structuredstreaming.SparkPipelineOptions; import org.apache.beam.sdk.io.BoundedSource; import org.apache.beam.sdk.io.BoundedSource.BoundedReader; +import org.apache.beam.sdk.transforms.windowing.GlobalWindow; import org.apache.beam.sdk.util.WindowedValue; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.sources.v2.ContinuousReadSupport; @@ -93,10 +95,11 @@ public class DatasetSourceBatch implements DataSourceV2, ReadSupport { @Override public StructType readSchema() { + // TODO: find a way to extend schema with a WindowedValue schema StructField[] array = new StructField[1]; - StructField dummyStructField = StructField - .apply("dummyStructField", DataTypes.NullType, true, Metadata.empty()); - array[0] = dummyStructField; + StructField binaryStructField = StructField + .apply("binaryStructField", DataTypes.BinaryType, true, Metadata.empty()); + array[0] = binaryStructField; return new StructType(array); } @@ -135,11 +138,13 @@ public class DatasetSourceBatch implements DataSourceV2, ReadSupport { private static class DatasetPartitionReader implements InputPartitionReader { private boolean started; private boolean closed; +private BoundedSource source; private BoundedReader reader; DatasetPartitionReader(BoundedSource source, SerializablePipelineOptions serializablePipelineOptions) { this.started = false; this.closed = false; + this.source = source; // reader is not serializable so lazy initialize it try { reader = source @@ -162,10 +167,20 @@ public class DatasetSourceBatch implements DataSourceV2, ReadSupport { @Override public InternalRow get() { List list = new ArrayList<>(); - list.add( - WindowedValue.timestampedValueInGlobalWindow( - reader.getCurrent(), reader.getCurrentTimestamp())); - return InternalRow.apply(asScalaBuffer(list).toList()); + WindowedValue windowedValue = WindowedValue + .timestampedValueInGlobalWindow(reader.getCurrent(), reader.getCurrentTimestamp()); + //serialize the windowedValue to bytes array to comply with dataset binary schema + WindowedValue.FullWindowedValueCoder windowedValueCoder = WindowedValue.FullWindowedValueCoder + .of(source.getOutputCoder(), GlobalWindow.Coder.INSTANCE); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try { +windowedValueCoder.encode(windowedValue, byteArrayOutputStream); +byte[] bytes = byteArrayOutputStream.toByteArray(); +list.add(bytes); + } catch (IOException e) { +throw new RuntimeException(e); + } +return InternalRow.apply(asScalaBuffer(list).toList()); } @Override diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ReadSourceTranslatorBatch.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ReadSourceTranslatorBatch.java index 8810e21..fec0fd3 100644 ---
[beam] 03/03: Fix errorprone
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a commit to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git commit f5fd0127bd5327373b134e838849c702f10cf045 Author: Etienne Chauchot AuthorDate: Tue Jan 15 13:37:56 2019 +0100 Fix errorprone --- .../structuredstreaming/translation/batch/ParDoTranslatorBatch.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java index 1ad1e3b..9cbde5a 100644 --- a/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java +++ b/runners/spark-structured-streaming/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java @@ -184,7 +184,7 @@ class ParDoTranslatorBatch context.putDatasetWildcard(output.getValue(), outputDataset); } - class SparkDoFnFilterFunction implements FilterFunction, WindowedValue>> { + static class SparkDoFnFilterFunction implements FilterFunction, WindowedValue>> { private final TupleTag key;
[beam] branch spark-runner_structured-streaming updated (16c57c3 -> f5fd012)
This is an automated email from the ASF dual-hosted git repository. echauchot pushed a change to branch spark-runner_structured-streaming in repository https://gitbox.apache.org/repos/asf/beam.git. from 16c57c3 First attempt for ParDo primitive implementation new 7ff0a26 Serialize windowedValue to byte[] in source to be able to specify a binary dataset schema and deserialize windowedValue from Row to get a dataset new 1fe8791 Comment schema choices new f5fd012 Fix errorprone The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../translation/batch/DatasetSourceBatch.java | 32 +- .../translation/batch/ParDoTranslatorBatch.java| 2 +- .../batch/ReadSourceTranslatorBatch.java | 9 +- 3 files changed, 34 insertions(+), 9 deletions(-)
[beam] 01/01: Merge pull request #7499: [BEAM-6425] - Replace SSLContext.getInstance("SSL")
This is an automated email from the ASF dual-hosted git repository. iemejia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 317a8b50f46c37ce0af50a1b9cce2437253e42a7 Merge: d7a0a3f 6480962 Author: Ismaël Mejía AuthorDate: Tue Jan 15 10:48:36 2019 +0100 Merge pull request #7499: [BEAM-6425] - Replace SSLContext.getInstance("SSL") [BEAM-6425] - Replace SSLContext.getInstance("SSL") .../mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/SSLUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[beam] branch master updated (d7a0a3f -> 317a8b5)
This is an automated email from the ASF dual-hosted git repository. iemejia pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from d7a0a3f [BEAM-6427]INTERSECT ALL is not compatible with SQL standard. add 6480962 BEAM-6425 - Replace SSLContext.getInstance("SSL") new 317a8b5 Merge pull request #7499: [BEAM-6425] - Replace SSLContext.getInstance("SSL") The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/SSLUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[beam] Diff for: [GitHub] iemejia merged pull request #7499: [BEAM-6425] - Replace SSLContext.getInstance("SSL")
diff --git a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/SSLUtils.java b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/SSLUtils.java index 5b062ed39579..9a778a349d13 100644 --- a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/SSLUtils.java +++ b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/SSLUtils.java @@ -53,7 +53,7 @@ public void checkServerTrusted(X509Certificate[] certs, String authType) {} public static SSLContext ignoreSSLCertificate() { try { // Install the all-trusting trust manager - SSLContext sc = SSLContext.getInstance("SSL"); + SSLContext sc = SSLContext.getInstance("TLS"); sc.init(null, trustAllCerts, new java.security.SecureRandom()); HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); With regards, Apache Git Services