[flink-ml] branch master updated: [hotfix] Bump dependency versions
This is an automated email from the ASF dual-hosted git repository. zhangzp pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git The following commit(s) were added to refs/heads/master by this push: new a533670 [hotfix] Bump dependency versions a533670 is described below commit a53367031023f280e0514641b90b7f047233c163 Author: Dong Lin AuthorDate: Fri May 6 22:17:49 2022 +0800 [hotfix] Bump dependency versions This closes #94. --- flink-ml-core/pom.xml | 4 flink-ml-iteration/pom.xml | 4 pom.xml| 3 ++- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/flink-ml-core/pom.xml b/flink-ml-core/pom.xml index c351270..56dbe4a 100644 --- a/flink-ml-core/pom.xml +++ b/flink-ml-core/pom.xml @@ -31,10 +31,6 @@ under the License. flink-ml-core_${scala.binary.version} Flink ML : Core - -2.4.1 - - org.apache.flink diff --git a/flink-ml-iteration/pom.xml b/flink-ml-iteration/pom.xml index a2ac1e5..8ab408f 100644 --- a/flink-ml-iteration/pom.xml +++ b/flink-ml-iteration/pom.xml @@ -31,10 +31,6 @@ under the License. flink-ml-iteration_${scala.binary.version} Flink ML : Iteration - -2.4.1 - - diff --git a/pom.xml b/pom.xml index c17718f..b4baa5f 100644 --- a/pom.xml +++ b/pom.xml @@ -68,12 +68,13 @@ under the License. 1.8 2.4.2 1.7.15 -2.17.0 +2.17.1 4.13.1 1C true 1.14.4 3.4.14 +2.8.5
[flink-kubernetes-operator] branch main updated: [FLINK-27262] Enrich validator for FlinkSessionJob
This is an automated email from the ASF dual-hosted git repository. gyfora pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/flink-kubernetes-operator.git The following commit(s) were added to refs/heads/main by this push: new 6ff7cf3 [FLINK-27262] Enrich validator for FlinkSessionJob 6ff7cf3 is described below commit 6ff7cf31c61d50a1459b518cfae86fc65e3c5810 Author: Aitozi AuthorDate: Wed Apr 27 14:58:03 2022 +0800 [FLINK-27262] Enrich validator for FlinkSessionJob --- examples/basic-session-job.yaml| 39 ++-- .../kubernetes/operator/crd/CrdConstants.java | 2 + .../operator/validation/DefaultValidator.java | 107 - .../operator/validation/DefaultValidatorTest.java | 83 .../operator/admission/AdmissionHandler.java | 7 +- .../operator/admission/FlinkValidator.java | 41 +++- .../admissioncontroller/AdmissionController.java | 4 +- .../validation/DefaultRequestValidator.java| 2 +- .../admissioncontroller/validation/Validator.java | 4 +- .../operator/admission/AdmissionHandlerTest.java | 6 ++ .../templates/webhook.yaml | 3 +- 11 files changed, 189 insertions(+), 109 deletions(-) diff --git a/examples/basic-session-job.yaml b/examples/basic-session-job.yaml index 97cbb6b..cde55a5 100644 --- a/examples/basic-session-job.yaml +++ b/examples/basic-session-job.yaml @@ -19,16 +19,10 @@ apiVersion: flink.apache.org/v1beta1 kind: FlinkDeployment metadata: - name: basic-session-cluster-with-ha + name: basic-session-cluster spec: image: flink:1.14 flinkVersion: v1_14 - flinkConfiguration: -taskmanager.numberOfTaskSlots: "2" -state.savepoints.dir: file:///flink-data/savepoints -high-availability: org.apache.flink.kubernetes.highavailability.KubernetesHaServicesFactory -high-availability.storageDir: file:///flink-data/ha -#kubernetes.rest-service.exposed.type: LoadBalancer jobManager: replicas: 1 resource: @@ -38,43 +32,28 @@ spec: resource: memory: "2048m" cpu: 1 - podTemplate: -spec: - serviceAccount: flink - containers: -- name: flink-main-container - volumeMounts: -- mountPath: /flink-data - name: flink-volume - volumes: -- name: flink-volume - hostPath: -# directory location on host -path: /tmp/flink -# this field is optional -type: Directory + serviceAccount: flink --- apiVersion: flink.apache.org/v1beta1 kind: FlinkSessionJob metadata: - name: basic-session-ha-job-example + name: basic-session-job-example spec: - deploymentName: basic-session-cluster-with-ha + deploymentName: basic-session-cluster job: -jarURI: file:///opt/flink/artifacts/TopSpeedWindowing.jar +jarURI: https://repo1.maven.org/maven2/org/apache/flink/flink-examples-streaming_2.12/1.14.3/flink-examples-streaming_2.12-1.14.3-TopSpeedWindowing.jar parallelism: 4 -upgradeMode: savepoint -savepointTriggerNonce: 1 +upgradeMode: stateless --- apiVersion: flink.apache.org/v1beta1 kind: FlinkSessionJob metadata: - name: basic-session-ha-job-example2 + name: basic-session-job-example2 spec: - deploymentName: basic-session-cluster-with-ha + deploymentName: basic-session-cluster job: -jarURI: file:///opt/flink/artifacts/flink-examples-streaming_2.12-1.14.3.jar +jarURI: https://repo1.maven.org/maven2/org/apache/flink/flink-examples-streaming_2.12/1.14.3/flink-examples-streaming_2.12-1.14.3.jar parallelism: 2 upgradeMode: stateless entryClass: org.apache.flink.streaming.examples.statemachine.StateMachineExample diff --git a/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/crd/CrdConstants.java b/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/crd/CrdConstants.java index bcdad92..b699ad9 100644 --- a/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/crd/CrdConstants.java +++ b/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/crd/CrdConstants.java @@ -21,4 +21,6 @@ package org.apache.flink.kubernetes.operator.crd; public class CrdConstants { public static final String API_GROUP = "flink.apache.org"; public static final String API_VERSION = "v1beta1"; +public static final String KIND_SESSION_JOB = "FlinkSessionJob"; +public static final String KIND_FLINK_DEPLOYMENT = "FlinkDeployment"; } diff --git a/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/validation/DefaultValidator.java b/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/validation/DefaultValidator.java index 0150fe6..bd0f25e 100644 --- a/flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/validation/DefaultValidator.java
[flink-table-store] branch master updated: [FLINK-27528] Introduce a new configuration option 'compact.rescale-bucket' for FileStore
This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-table-store.git The following commit(s) were added to refs/heads/master by this push: new d0576e1 [FLINK-27528] Introduce a new configuration option 'compact.rescale-bucket' for FileStore d0576e1 is described below commit d0576e1bbd7f798d68db398593c8368376743203 Author: Jane Chan <55568005+ladyfor...@users.noreply.github.com> AuthorDate: Fri May 6 21:32:07 2022 +0800 [FLINK-27528] Introduce a new configuration option 'compact.rescale-bucket' for FileStore This closes #109 --- .../table/store/connector/TableStoreFactoryOptions.java | 12 .../org/apache/flink/table/store/file/FileStoreOptions.java | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/TableStoreFactoryOptions.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/TableStoreFactoryOptions.java index 4337ec5..28afcfe 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/TableStoreFactoryOptions.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/TableStoreFactoryOptions.java @@ -28,6 +28,17 @@ import java.util.Set; /** Options for {@link TableStoreFactory}. */ public class TableStoreFactoryOptions { +public static final ConfigOption COMPACTION_RESCALE_BUCKET = +ConfigOptions.key("compaction.rescale-bucket") +.booleanType() +.defaultValue(false) +.withDescription( +"Specify the behavior for compaction. Set value to true " ++ "will lead compaction to reorganize data files " ++ "according to the bucket number from table schema. " ++ "By default, compaction does not adjust the bucket number " ++ "of a partition/table."); + public static final ConfigOption LOG_SYSTEM = ConfigOptions.key("log.system") .stringType() @@ -47,6 +58,7 @@ public class TableStoreFactoryOptions { public static Set> allOptions() { Set> allOptions = new HashSet<>(); +allOptions.add(COMPACTION_RESCALE_BUCKET); allOptions.add(LOG_SYSTEM); allOptions.add(SINK_PARALLELISM); allOptions.add(SCAN_PARALLELISM); diff --git a/flink-table-store-core/src/main/java/org/apache/flink/table/store/file/FileStoreOptions.java b/flink-table-store-core/src/main/java/org/apache/flink/table/store/file/FileStoreOptions.java index eb8c908..ca5f80c 100644 --- a/flink-table-store-core/src/main/java/org/apache/flink/table/store/file/FileStoreOptions.java +++ b/flink-table-store-core/src/main/java/org/apache/flink/table/store/file/FileStoreOptions.java @@ -122,7 +122,7 @@ public class FileStoreOptions implements Serializable { .defaultValue(MergeEngine.DEDUPLICATE) .withDescription( Description.builder() -.text("Specifies the merge engine for table with primary key.") +.text("Specify the merge engine for table with primary key.") .linebreak() .list( formatEnumOption(MergeEngine.DEDUPLICATE),
[flink-web] branch asf-site updated: [FLINK-24370] Add AsyncSinkBase blog post
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/flink-web.git The following commit(s) were added to refs/heads/asf-site by this push: new cf231b81a [FLINK-24370] Add AsyncSinkBase blog post cf231b81a is described below commit cf231b81a7a64826d13735639fba84116bb0a651 Author: Zichen Liu <9928830+crynetlogist...@users.noreply.github.com> AuthorDate: Fri May 6 13:45:35 2022 +0100 [FLINK-24370] Add AsyncSinkBase blog post --- _posts/2022-03-16-async-sink-base.md | 171 +++ 1 file changed, 171 insertions(+) diff --git a/_posts/2022-03-16-async-sink-base.md b/_posts/2022-03-16-async-sink-base.md new file mode 100644 index 0..df687c3af --- /dev/null +++ b/_posts/2022-03-16-async-sink-base.md @@ -0,0 +1,171 @@ +--- +layout: post +title: "The Generic Asynchronous Base Sink" +date: 2022-05-06 16:00:00 +authors: +- CrynetLogistics: + name: "Zichen Liu" +excerpt: An overview of the new AsyncBaseSink and how to use it for building your own concrete sink +--- + +Flink sinks share a lot of similar behavior. Most sinks batch records according to user-defined buffering hints, sign requests, write them to the destination, retry unsuccessful or throttled requests, and participate in checkpointing. + +This is why for Flink 1.15 we have decided to create the [`AsyncSinkBase` (FLIP-171)](https://cwiki.apache.org/confluence/display/FLINK/FLIP-171%3A+Async+Sink), an abstract sink with a number of common functionalities extracted. + +This is a base implementation for asynchronous sinks, which you should use whenever you need to implement a sink that doesn't offer transactional capabilities. Adding support for a new destination now only requires a lightweight shim that implements the specific interfaces of the destination using a client that supports async requests. + +This common abstraction will reduce the effort required to maintain individual sinks that extend from this abstract sink, with bug fixes and improvements to the sink core benefiting all implementations that extend it. The design of `AsyncSinkBase` focuses on extensibility and a broad support of destinations. The core of the sink is kept generic and free of any connector-specific dependencies. + +The sink base is designed to participate in checkpointing to provide at-least-once semantics and can work directly with destinations that provide a client that supports asynchronous requests. + +In this post, we will go over the details of the AsyncSinkBase so that you can start using it to build your own concrete sink. + +{% toc %} + +# Adding the base sink as a dependency + +In order to use the base sink, you will need to add the following dependency to your project. The example below follows the Maven syntax: + +```xml + + org.apache.flink + flink-connector-base + ${flink.version} + +``` + +# The Public Interfaces of AsyncSinkBase + +## Generic Types + +`` – type of elements in a DataStream that should be passed to the sink + +`` – type of a payload containing the element and additional metadata that is required to submit a single element to the destination + + +## Element Converter Interface + +[ElementConverter](https://github.com/apache/flink/blob/release-1.15.0/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/sink/writer/ElementConverter.java) + +```java +public interface ElementConverter extends Serializable { +RequestEntryT apply(InputT element, SinkWriter.Context context); +} +``` +The concrete sink implementation should provide a way to convert from an element in the DataStream to the payload type that contains all the additional metadata required to submit that element to the destination by the sink. Ideally, this would be encapsulated from the end user since it allows concrete sink implementers to adapt to changes in the destination API without breaking end user code. + +## Sink Writer Interface + +[AsyncSinkWriter](https://github.com/apache/flink/blob/release-1.15.0/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/sink/writer/AsyncSinkWriter.java) + +There is a buffer in the sink writer that holds the request entries that have been sent to the sink but not yet written to the destination. An element of the buffer is a `RequestEntryWrapper` consisting of the `RequestEntryT` along with the size of that record. + +```java +public abstract class AsyncSinkWriter +implements StatefulSink.StatefulSinkWriter> { +// ... +protected abstract void submitRequestEntries( +List requestEntries, Consumer> requestResult); +// ... +} +``` + +We will submit the `requestEntries` asynchronously to the destination from here. Sink implementers should use the client libraries of the destination they intend to write to, to perform this. + +Should any elements fail to be persisted,
[flink] branch master updated (a22b977051d -> b904c948fb1)
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from a22b977051d [hotfix][docs] Fix the inaccessible huawei cloud link. add b904c948fb1 [FLINK-27476][tests] Add dedicated import option for selecting production code No new revisions were added by this update. Summary of changes: .../flink/architecture/common/ImportOptions.java | 10 ++ .../architecture/common/SourcePredicates.java | 39 +- .../architecture/rules/ApiAnnotationRules.java | 4 +-- 3 files changed, 13 insertions(+), 40 deletions(-)
[flink] branch master updated: [hotfix][docs] Fix the inaccessible huawei cloud link.
This is an automated email from the ASF dual-hosted git repository. martijnvisser pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new a22b977051d [hotfix][docs] Fix the inaccessible huawei cloud link. a22b977051d is described below commit a22b977051d160dee4425598d55d47801834bc94 Author: liuzhuang2017 AuthorDate: Fri May 6 14:14:35 2022 +0800 [hotfix][docs] Fix the inaccessible huawei cloud link. --- docs/content.zh/docs/deployment/overview.md | 2 +- docs/content/docs/deployment/overview.md| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content.zh/docs/deployment/overview.md b/docs/content.zh/docs/deployment/overview.md index f10dab34e10..308670a44b6 100644 --- a/docs/content.zh/docs/deployment/overview.md +++ b/docs/content.zh/docs/deployment/overview.md @@ -308,7 +308,7 @@ Supported Environment: Huawei Cloud Stream Service -[Website](https://www.huaweicloud.com/en-us/product/cs.html) +[Website](https://www.huaweicloud.com/intl/zh-cn/product/cs.html) Supported Environment: {{< label Huawei Cloud >}} diff --git a/docs/content/docs/deployment/overview.md b/docs/content/docs/deployment/overview.md index 247605ee811..89fb9295d2c 100644 --- a/docs/content/docs/deployment/overview.md +++ b/docs/content/docs/deployment/overview.md @@ -314,7 +314,7 @@ Supported Environment: Huawei Cloud Stream Service -[Website](https://www.huaweicloud.com/en-us/product/cs.html) +[Website](https://www.huaweicloud.com/intl/en-us/product/cs.html) Supported Environment: {{< label Huawei Cloud >}}
[flink] branch master updated (9d174b3d7c8 -> 88a180392b1)
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from 9d174b3d7c8 [hotfix][docs-zh] Delete redundant English content in Chinese documents. add 88a180392b1 [FLINK-27533][coordination][tests] Remove retry attempt limit No new revisions were added by this update. Summary of changes: .../runtime/scheduler/adaptive/AdaptiveSchedulerSimpleITCase.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
[flink] branch master updated (3f157bbaa29 -> 9d174b3d7c8)
This is an automated email from the ASF dual-hosted git repository. martijnvisser pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from 3f157bbaa29 [FLINK-24820][docs] Wrong description in documentation for IS DISTINCT FROM add 9d174b3d7c8 [hotfix][docs-zh] Delete redundant English content in Chinese documents. No new revisions were added by this update. Summary of changes: docs/content.zh/docs/dev/table/concepts/time_attributes.md | 3 --- 1 file changed, 3 deletions(-)
[flink] branch master updated (3e1874bf05d -> 3f157bbaa29)
This is an automated email from the ASF dual-hosted git repository. martijnvisser pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from 3e1874bf05d [release] Change the stable doc versions add 3f157bbaa29 [FLINK-24820][docs] Wrong description in documentation for IS DISTINCT FROM No new revisions were added by this update. Summary of changes: docs/data/sql_functions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[flink-docker] 01/02: [hotfix] Remove unused variables
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch dev-1.14 in repository https://gitbox.apache.org/repos/asf/flink-docker.git commit 55d9e5bc614a34b337b60f4456813a0ec1bb038a Author: Chesnay Schepler AuthorDate: Wed May 4 15:36:32 2022 +0200 [hotfix] Remove unused variables --- testing/run_travis_tests.sh | 6 -- 1 file changed, 6 deletions(-) diff --git a/testing/run_travis_tests.sh b/testing/run_travis_tests.sh index 7311ccf..c87772a 100755 --- a/testing/run_travis_tests.sh +++ b/testing/run_travis_tests.sh @@ -4,12 +4,6 @@ SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) . "${SCRIPT_DIR}/testing_lib.sh" -IS_PULL_REQUEST= -if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then - IS_PULL_REQUEST=1 -fi - -BRANCH="$TRAVIS_BRANCH" ./add-version.sh -r 1.14 -f 1.14.3
[flink-docker] 02/02: [FLINK-27488][ci] Migrate to GHA
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch dev-1.14 in repository https://gitbox.apache.org/repos/asf/flink-docker.git commit 56566d7d94958f28d0f6c55e78d9e1b9a1973214 Author: Chesnay Schepler AuthorDate: Wed May 4 15:36:38 2022 +0200 [FLINK-27488][ci] Migrate to GHA --- .github/workflows/ci.yml | 29 +++ .travis.yml | 14 - testing/{run_travis_tests.sh => run_tests.sh} | 2 -- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000..f355ae4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: "CI" + +on: [push, pull_request] + +jobs: + ci: +runs-on: ubuntu-latest +steps: + - uses: actions/checkout@v3 + - name: "Build images" +run: | + ./add-version.sh -r 1.14 -f 1.14.0 + - name: "Test images" +run: testing/run_tests.sh diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6f58f78..000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -os: linux -dist: bionic -language: java -jdk: "openjdk8" - -cache: - directories: -- $HOME/.m2 - -services: -- docker - -script: -- testing/run_travis_tests.sh diff --git a/testing/run_travis_tests.sh b/testing/run_tests.sh similarity index 86% rename from testing/run_travis_tests.sh rename to testing/run_tests.sh index c87772a..764eaa0 100755 --- a/testing/run_travis_tests.sh +++ b/testing/run_tests.sh @@ -5,8 +5,6 @@ SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) . "${SCRIPT_DIR}/testing_lib.sh" -./add-version.sh -r 1.14 -f 1.14.3 - test_docker_entrypoint smoke_test_all_images
[flink-docker] branch dev-1.14 updated (e26a46b -> 56566d7)
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a change to branch dev-1.14 in repository https://gitbox.apache.org/repos/asf/flink-docker.git from e26a46b Add GPG key for 1.14.4 release (#108) new 55d9e5b [hotfix] Remove unused variables new 56566d7 [FLINK-27488][ci] Migrate to GHA The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .github/workflows/ci.yml | 29 +++ .travis.yml | 14 - testing/{run_travis_tests.sh => run_tests.sh} | 8 3 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml rename testing/{run_travis_tests.sh => run_tests.sh} (60%)
[flink-docker] 02/02: [FLINK-27488][ci] Migrate to GHA
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch dev-1.15 in repository https://gitbox.apache.org/repos/asf/flink-docker.git commit 88264cd1a5f6111a1ef5bab8249ba8e1d6b8c62c Author: Chesnay Schepler AuthorDate: Wed May 4 15:36:38 2022 +0200 [FLINK-27488][ci] Migrate to GHA --- .github/workflows/ci.yml| 30 ++ .travis.yml | 14 -- testing/run_tests.sh| 16 testing/run_travis_tests.sh | 21 - 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000..6aaf266 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: "CI" + +on: [push, pull_request] + +jobs: + ci: +runs-on: ubuntu-latest +steps: + - uses: actions/checkout@v3 + - name: "Build images" +run: | + ./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.15-SNAPSHOT-bin-scala_2.12.tgz; -j 8 -n test-java8 + ./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.15-SNAPSHOT-bin-scala_2.12.tgz; -j 11 -n test-java11 + - name: "Test images" +run: testing/run_tests.sh diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6f58f78..000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -os: linux -dist: bionic -language: java -jdk: "openjdk8" - -cache: - directories: -- $HOME/.m2 - -services: -- docker - -script: -- testing/run_travis_tests.sh diff --git a/testing/run_tests.sh b/testing/run_tests.sh new file mode 100755 index 000..764eaa0 --- /dev/null +++ b/testing/run_tests.sh @@ -0,0 +1,16 @@ +#!/bin/bash -e + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) + +. "${SCRIPT_DIR}/testing_lib.sh" + + +test_docker_entrypoint + +smoke_test_all_images +smoke_test_one_image_non_root + + +echo "Test successfully finished" + +# vim: et ts=2 sw=2 diff --git a/testing/run_travis_tests.sh b/testing/run_travis_tests.sh deleted file mode 100755 index 7ff7ce6..000 --- a/testing/run_travis_tests.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -e - -SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) - -. "${SCRIPT_DIR}/testing_lib.sh" - - -test_docker_entrypoint - -./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.15-SNAPSHOT-bin-scala_2.12.tgz; -j 8 -n test-java8 - -# test Flink with Java11 image as well -./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.15-SNAPSHOT-bin-scala_2.12.tgz; -j 11 -n test-java11 - -smoke_test_all_images -smoke_test_one_image_non_root - - -echo "Test successfully finished" - -# vim: et ts=2 sw=2
[flink-docker] 01/02: [hotfix] Remove unused variables
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch dev-1.15 in repository https://gitbox.apache.org/repos/asf/flink-docker.git commit 8c7e70e98cf9103a9594c727de440215050af746 Author: Chesnay Schepler AuthorDate: Wed May 4 15:36:32 2022 +0200 [hotfix] Remove unused variables --- testing/run_travis_tests.sh | 6 -- 1 file changed, 6 deletions(-) diff --git a/testing/run_travis_tests.sh b/testing/run_travis_tests.sh index 464c822..7ff7ce6 100755 --- a/testing/run_travis_tests.sh +++ b/testing/run_travis_tests.sh @@ -4,12 +4,6 @@ SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) . "${SCRIPT_DIR}/testing_lib.sh" -IS_PULL_REQUEST= -if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then - IS_PULL_REQUEST=1 -fi - -BRANCH="$TRAVIS_BRANCH" test_docker_entrypoint
[flink-docker] branch dev-1.15 updated (17bcbe4 -> 88264cd)
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a change to branch dev-1.15 in repository https://gitbox.apache.org/repos/asf/flink-docker.git from 17bcbe4 Merge adjacent RUN commands to avoid too much levels new 8c7e70e [hotfix] Remove unused variables new 88264cd [FLINK-27488][ci] Migrate to GHA The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .github/workflows/ci.yml| 30 ++ .travis.yml | 14 -- testing/run_tests.sh| 16 testing/run_travis_tests.sh | 27 --- 4 files changed, 46 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml create mode 100755 testing/run_tests.sh delete mode 100755 testing/run_travis_tests.sh
[flink-ml] branch master updated: [FLINK-27096] Add a script to visualize benchmark results
This is an automated email from the ASF dual-hosted git repository. lindong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git The following commit(s) were added to refs/heads/master by this push: new 2da5d59 [FLINK-27096] Add a script to visualize benchmark results 2da5d59 is described below commit 2da5d59b2aa9e1c654ac129376bbf00fe9489615 Author: yunfengzhou-hub AuthorDate: Wed Apr 20 14:26:25 2022 +0800 [FLINK-27096] Add a script to visualize benchmark results This PR also enriches benchmark result file's content with input parameters This closes #87. --- .../benchmark_results_visualization_example.svg| 1131 flink-ml-benchmark/README.md | 131 ++- .../org/apache/flink/ml/benchmark/Benchmark.java | 88 +- .../apache/flink/ml/benchmark/BenchmarkResult.java | 15 + .../apache/flink/ml/benchmark/BenchmarkUtils.java | 50 +- .../src/main/resources/benchmark-conf.json | 149 +++ .../apache/flink/ml/benchmark/BenchmarkTest.java |4 +- .../org/apache/flink/ml/util/ReadWriteUtils.java |4 +- .../bin/benchmark-results-visualize.py | 68 ++ .../{flink-ml-benchmark.sh => benchmark-run.sh}|0 10 files changed, 1522 insertions(+), 118 deletions(-) diff --git a/docs/static/fig/benchmark_results_visualization_example.svg b/docs/static/fig/benchmark_results_visualization_example.svg new file mode 100644 index 000..a5093ad --- /dev/null +++ b/docs/static/fig/benchmark_results_visualization_example.svg @@ -0,0 +1,1131 @@ + + +http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd;> + +http://www.w3.org/2000/svg; xmlns:xlink="http://www.w3.org/1999/xlink;> + + +*{stroke-linecap:butt;stroke-linejoin:round;} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[flink-docker] branch dev-master updated (7b715c8 -> 8417e25)
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a change to branch dev-master in repository https://gitbox.apache.org/repos/asf/flink-docker.git from 7b715c8 Merge adjacent RUN commands to avoid too much levels new 682c0db [hotfix] Remove unused variables new 8417e25 [FLINK-27488][ci] Migrate to GHA The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .github/workflows/ci.yml| 30 ++ .travis.yml | 14 -- testing/run_tests.sh| 16 testing/run_travis_tests.sh | 27 --- 4 files changed, 46 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml create mode 100755 testing/run_tests.sh delete mode 100755 testing/run_travis_tests.sh
[flink-docker] 01/02: [hotfix] Remove unused variables
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch dev-master in repository https://gitbox.apache.org/repos/asf/flink-docker.git commit 682c0dbd316bd3633af15c5c2457d080f0f5021f Author: Chesnay Schepler AuthorDate: Wed May 4 15:36:32 2022 +0200 [hotfix] Remove unused variables --- testing/run_travis_tests.sh | 6 -- 1 file changed, 6 deletions(-) diff --git a/testing/run_travis_tests.sh b/testing/run_travis_tests.sh index 7538161..55ffecd 100755 --- a/testing/run_travis_tests.sh +++ b/testing/run_travis_tests.sh @@ -4,12 +4,6 @@ SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) . "${SCRIPT_DIR}/testing_lib.sh" -IS_PULL_REQUEST= -if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then - IS_PULL_REQUEST=1 -fi - -BRANCH="$TRAVIS_BRANCH" test_docker_entrypoint
[flink-docker] 02/02: [FLINK-27488][ci] Migrate to GHA
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch dev-master in repository https://gitbox.apache.org/repos/asf/flink-docker.git commit 8417e2587bf16571383ec5280ad74ba04db825e1 Author: Chesnay Schepler AuthorDate: Wed May 4 15:36:38 2022 +0200 [FLINK-27488][ci] Migrate to GHA --- .github/workflows/ci.yml| 30 ++ .travis.yml | 14 -- testing/run_tests.sh| 16 testing/run_travis_tests.sh | 21 - 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000..e526c3e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: "CI" + +on: [push, pull_request] + +jobs: + ci: +runs-on: ubuntu-latest +steps: + - uses: actions/checkout@v3 + - name: "Build images" +run: | + ./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.16-SNAPSHOT-bin-scala_2.12.tgz; -j 8 -n test-java8 + ./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.16-SNAPSHOT-bin-scala_2.12.tgz; -j 11 -n test-java11 + - name: "Test images" +run: testing/run_tests.sh diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6f58f78..000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -os: linux -dist: bionic -language: java -jdk: "openjdk8" - -cache: - directories: -- $HOME/.m2 - -services: -- docker - -script: -- testing/run_travis_tests.sh diff --git a/testing/run_tests.sh b/testing/run_tests.sh new file mode 100755 index 000..764eaa0 --- /dev/null +++ b/testing/run_tests.sh @@ -0,0 +1,16 @@ +#!/bin/bash -e + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) + +. "${SCRIPT_DIR}/testing_lib.sh" + + +test_docker_entrypoint + +smoke_test_all_images +smoke_test_one_image_non_root + + +echo "Test successfully finished" + +# vim: et ts=2 sw=2 diff --git a/testing/run_travis_tests.sh b/testing/run_travis_tests.sh deleted file mode 100755 index 55ffecd..000 --- a/testing/run_travis_tests.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -e - -SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) - -. "${SCRIPT_DIR}/testing_lib.sh" - - -test_docker_entrypoint - -./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.16-SNAPSHOT-bin-scala_2.12.tgz; -j 8 -n test-java8 - -# test Flink with Java11 image as well -./add-custom.sh -u "https://s3.amazonaws.com/flink-nightly/flink-1.16-SNAPSHOT-bin-scala_2.12.tgz; -j 11 -n test-java11 - -smoke_test_all_images -smoke_test_one_image_non_root - - -echo "Test successfully finished" - -# vim: et ts=2 sw=2
[flink] branch master updated (6ebdde3b124 -> 3e1874bf05d)
This is an automated email from the ASF dual-hosted git repository. gaoyunhaii pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from 6ebdde3b124 [FLINK-25970][core] Add type of original exception to the detailMessage of SerializedThrowable. add 3e1874bf05d [release] Change the stable doc versions No new revisions were added by this update. Summary of changes: .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[flink] branch master updated (66917f6d4a2 -> 6ebdde3b124)
This is an automated email from the ASF dual-hosted git repository. xtsong pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from 66917f6d4a2 [FLINK-27469][yarn][tests] Drop CliFrontendRunWithYarnTest add 6ebdde3b124 [FLINK-25970][core] Add type of original exception to the detailMessage of SerializedThrowable. No new revisions were added by this update. Summary of changes: .../org/apache/flink/util/SerializedThrowable.java | 11 ++-- .../checkpoint/CheckpointCoordinatorTest.java | 9 -- .../messages/checkpoint/DeclineCheckpointTest.java | 7 - .../json/SerializedThrowableSerializerTest.java| 10 +-- .../runtime/util/SerializedThrowableTest.java | 33 +- 5 files changed, 46 insertions(+), 24 deletions(-)
[flink] branch master updated (f26bfd4798b -> 66917f6d4a2)
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from f26bfd4798b [FLINK-26052][docs-zh] Document savepoint format add 66917f6d4a2 [FLINK-27469][yarn][tests] Drop CliFrontendRunWithYarnTest No new revisions were added by this update. Summary of changes: flink-yarn-tests/pom.xml | 8 -- .../flink/yarn/CliFrontendRunWithYarnTest.java | 91 -- 2 files changed, 99 deletions(-) delete mode 100644 flink-yarn-tests/src/test/java/org/apache/flink/yarn/CliFrontendRunWithYarnTest.java
[flink-web] branch asf-site updated (666398fe3 -> b7c2c62b5)
This is an automated email from the ASF dual-hosted git repository. guoyangze pushed a change to branch asf-site in repository https://gitbox.apache.org/repos/asf/flink-web.git from 666398fe3 rebuild website add 42f21218f Add Yangze Guo to the committer list add b7c2c62b5 Rebuild website No new revisions were added by this update. Summary of changes: community.md | 6 + community.zh.md | 6 + content/2022/05/06/pyflink-1.15-thread-mode.html | 767 --- content/blog/feed.xml| 618 -- content/blog/index.html | 36 +- content/blog/page10/index.html | 36 +- content/blog/page11/index.html | 36 +- content/blog/page12/index.html | 38 +- content/blog/page13/index.html | 42 +- content/blog/page14/index.html | 42 +- content/blog/page15/index.html | 40 +- content/blog/page16/index.html | 40 +- content/blog/page17/index.html | 40 +- content/blog/page18/index.html | 40 +- content/blog/page19/index.html | 25 - content/blog/page2/index.html| 36 +- content/blog/page3/index.html| 38 +- content/blog/page4/index.html| 38 +- content/blog/page5/index.html| 42 +- content/blog/page6/index.html| 42 +- content/blog/page7/index.html| 38 +- content/blog/page8/index.html| 38 +- content/blog/page9/index.html| 36 +- content/community.html | 6 + content/index.html | 6 +- content/zh/community.html| 6 + content/zh/index.html| 6 +- 27 files changed, 421 insertions(+), 1723 deletions(-) delete mode 100644 content/2022/05/06/pyflink-1.15-thread-mode.html
[flink-ml] branch master updated: [FLINK-27093] Add Transformer and Estimator for LinearRegression
This is an automated email from the ASF dual-hosted git repository. lindong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git The following commit(s) were added to refs/heads/master by this push: new f34dbb7 [FLINK-27093] Add Transformer and Estimator for LinearRegression f34dbb7 is described below commit f34dbb708a0d151caa5afab593d04badeb549224 Author: Zhipeng Zhang AuthorDate: Fri May 6 16:02:55 2022 +0800 [FLINK-27093] Add Transformer and Estimator for LinearRegression This closes #90. --- .../flink/ml/common/datastream/AllReduceImpl.java | 2 + .../ml/common/datastream/DataStreamUtils.java | 83 - .../main/java/org/apache/flink/ml/linalg/BLAS.java | 19 +- .../ml/common/datastream/DataStreamUtilsTest.java | 11 + .../java/org/apache/flink/ml/linalg/BLASTest.java | 16 + .../logisticregression/LogisticGradient.java | 97 - .../logisticregression/LogisticRegression.java | 401 +++-- .../LogisticRegressionModel.java | 80 ++-- .../LogisticRegressionParams.java | 2 + .../ml/common/lossfunc/BinaryLogisticLoss.java | 50 +++ .../flink/ml/common/lossfunc/LeastSquareLoss.java | 50 +++ .../apache/flink/ml/common/lossfunc/LossFunc.java | 51 +++ .../flink/ml/common/optimizer/Optimizer.java | 46 +++ .../ml/common/optimizer/RegularizationUtils.java | 92 + .../org/apache/flink/ml/common/optimizer/SGD.java | 390 .../flink/ml/common/param/HasElasticNet.java | 47 +++ .../linearregression/LinearRegression.java | 122 +++ .../linearregression/LinearRegressionModel.java} | 113 +++--- .../LinearRegressionModelData.java | 111 ++ .../LinearRegressionModelParams.java | 29 ++ .../linearregression/LinearRegressionParams.java} | 12 +- .../ml/classification/LogisticRegressionTest.java | 101 -- .../ml/common/lossfunc/BinaryLogisticLossTest.java | 53 +++ .../ml/common/lossfunc/LeastSquareLossTest.java| 51 +++ .../common/optimizer/RegularizationUtilsTest.java | 47 +++ .../flink/ml/regression/LinearRegressionTest.java | 255 + 26 files changed, 1721 insertions(+), 610 deletions(-) diff --git a/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/AllReduceImpl.java b/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/AllReduceImpl.java index 760b5db..167572a 100644 --- a/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/AllReduceImpl.java +++ b/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/AllReduceImpl.java @@ -18,6 +18,7 @@ package org.apache.flink.ml.common.datastream; +import org.apache.flink.annotation.Internal; import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.common.functions.RichFlatMapFunction; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; @@ -49,6 +50,7 @@ import java.util.Map; * All workers do reduce on all data it received and then broadcast partial results to others. * All workers merge partial results into final result. */ +@Internal class AllReduceImpl { @VisibleForTesting static final int CHUNK_SIZE = 1024 * 4; diff --git a/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/DataStreamUtils.java b/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/DataStreamUtils.java index 7eea6b0..10073b8 100644 --- a/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/DataStreamUtils.java +++ b/flink-ml-core/src/main/java/org/apache/flink/ml/common/datastream/DataStreamUtils.java @@ -18,12 +18,16 @@ package org.apache.flink.ml.common.datastream; +import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.functions.MapPartitionFunction; +import org.apache.flink.api.common.functions.ReduceFunction; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.TypeExtractor; +import org.apache.flink.iteration.operator.OperatorStateUtils; import org.apache.flink.runtime.state.StateInitializationContext; +import org.apache.flink.runtime.state.StateSnapshotContext; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator; import org.apache.flink.streaming.api.operators.BoundedOneInput; @@ -32,6 +36,7 @@ import org.apache.flink.streaming.api.operators.TimestampedCollector; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; /** Provides utility functions for {@link DataStream}. */ +@Internal public class DataStreamUtils { /** * Applies allReduceSum on the input data stream. The input data stream is supposed to contain @@ -55,8 +60,8 @@ public class
[flink] branch release-1.15 updated: [FLINK-26052][docs-zh] Document savepoint format
This is an automated email from the ASF dual-hosted git repository. tangyun pushed a commit to branch release-1.15 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.15 by this push: new 5530d635979 [FLINK-26052][docs-zh] Document savepoint format 5530d635979 is described below commit 5530d635979ef149514fda7b860a9286928d82ca Author: wangfeifan AuthorDate: Sun May 1 21:19:38 2022 +0800 [FLINK-26052][docs-zh] Document savepoint format --- docs/content.zh/docs/ops/state/savepoints.md | 27 +++ 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/docs/content.zh/docs/ops/state/savepoints.md b/docs/content.zh/docs/ops/state/savepoints.md index 11f336898d9..3d5cb7c52fe 100644 --- a/docs/content.zh/docs/ops/state/savepoints.md +++ b/docs/content.zh/docs/ops/state/savepoints.md @@ -116,6 +116,19 @@ mapper-id | State of StatefulMapper **注意:** 不建议移动或删除正在运行作业的最后一个 Savepoint ,因为这可能会干扰故障恢复。因此,Savepoint 对精确一次的接收器有副作用,为了确保精确一次的语义,如果在最后一个 Savepoint 之后没有 Checkpoint ,那么将使用 Savepoint 进行恢复。 {{< /hint >}} + + + Savepoint 格式 + +你可以在 savepoint 的两种二进制格式之间进行选择: + +* 标准格式 - 一种在所有 state backends 间统一的格式,允许你使用一种状态后端创建 savepoint 后,使用另一种状态后端恢复这个 savepoint。这是最稳定的格式,旨在与之前的版本、模式、修改等保持最大兼容性。 + +* 原生格式 - 标准格式的缺点是它的创建和恢复速度通常很慢。原生格式以特定于使用的状态后端的格式创建快照(例如 RocksDB 的 SST 文件)。 + +{{< hint info >}} +以原生格式创建 savepoint 的能力在 Flink 1.15 中引入,在那之前 savepoint 都是以标准格式创建的。 +{{< /hint >}} 触发 Savepoint @@ -123,7 +136,11 @@ mapper-id | State of StatefulMapper $ bin/flink savepoint :jobId [:targetDirectory] ``` -这将触发 ID 为 `:jobId` 的作业的 Savepoint,并返回创建的 Savepoint 路径。 你需要此路径来还原和删除 Savepoint 。 +这将触发 ID 为 `:jobId` 的作业的 Savepoint,并返回创建的 Savepoint 路径。 你需要此路径来恢复和删除 Savepoint 。你也可以指定创建 Savepoint 的格式。如果没有指定,会采用标准格式创建 Savepoint。 + +```shell +$ bin/flink savepoint --type [native/canonical] :jobId [:targetDirectory] +``` 使用 YARN 触发 Savepoint @@ -133,13 +150,15 @@ $ bin/flink savepoint :jobId [:targetDirectory] -yid :yarnAppId 这将触发 ID 为 `:jobId` 和 YARN 应用程序 ID `:yarnAppId` 的作业的 Savepoint,并返回创建的 Savepoint 的路径。 - 使用 Savepoint 取消作业 + + + 使用 Savepoint 停止作业 ```shell -$ bin/flink cancel -s [:targetDirectory] :jobId +$ bin/flink stop --type [native/canonical] --savepointPath [:targetDirectory] :jobId ``` -这将自动触发 ID 为 `:jobid` 的作业的 Savepoint,并取消该作业。此外,你可以指定一个目标文件系统目录来存储 Savepoint 。该目录需要能被 JobManager(s) 和 TaskManager(s) 访问。 +这将自动触发 ID 为 `:jobid` 的作业的 Savepoint,并停止该作业。此外,你可以指定一个目标文件系统目录来存储 Savepoint 。该目录需要能被 JobManager(s) 和 TaskManager(s) 访问。你也可以指定创建 Savepoint 的格式。如果没有指定,会采用标准格式创建 Savepoint。 ### 从 Savepoint 恢复
[flink] branch master updated: [FLINK-26052][docs-zh] Document savepoint format
This is an automated email from the ASF dual-hosted git repository. tangyun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new f26bfd4798b [FLINK-26052][docs-zh] Document savepoint format f26bfd4798b is described below commit f26bfd4798bdaf56a7d615afaa5660bda7ef78f9 Author: wangfeifan AuthorDate: Sun May 1 21:19:38 2022 +0800 [FLINK-26052][docs-zh] Document savepoint format --- docs/content.zh/docs/ops/state/savepoints.md | 27 +++ 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/docs/content.zh/docs/ops/state/savepoints.md b/docs/content.zh/docs/ops/state/savepoints.md index 11f336898d9..3d5cb7c52fe 100644 --- a/docs/content.zh/docs/ops/state/savepoints.md +++ b/docs/content.zh/docs/ops/state/savepoints.md @@ -116,6 +116,19 @@ mapper-id | State of StatefulMapper **注意:** 不建议移动或删除正在运行作业的最后一个 Savepoint ,因为这可能会干扰故障恢复。因此,Savepoint 对精确一次的接收器有副作用,为了确保精确一次的语义,如果在最后一个 Savepoint 之后没有 Checkpoint ,那么将使用 Savepoint 进行恢复。 {{< /hint >}} + + + Savepoint 格式 + +你可以在 savepoint 的两种二进制格式之间进行选择: + +* 标准格式 - 一种在所有 state backends 间统一的格式,允许你使用一种状态后端创建 savepoint 后,使用另一种状态后端恢复这个 savepoint。这是最稳定的格式,旨在与之前的版本、模式、修改等保持最大兼容性。 + +* 原生格式 - 标准格式的缺点是它的创建和恢复速度通常很慢。原生格式以特定于使用的状态后端的格式创建快照(例如 RocksDB 的 SST 文件)。 + +{{< hint info >}} +以原生格式创建 savepoint 的能力在 Flink 1.15 中引入,在那之前 savepoint 都是以标准格式创建的。 +{{< /hint >}} 触发 Savepoint @@ -123,7 +136,11 @@ mapper-id | State of StatefulMapper $ bin/flink savepoint :jobId [:targetDirectory] ``` -这将触发 ID 为 `:jobId` 的作业的 Savepoint,并返回创建的 Savepoint 路径。 你需要此路径来还原和删除 Savepoint 。 +这将触发 ID 为 `:jobId` 的作业的 Savepoint,并返回创建的 Savepoint 路径。 你需要此路径来恢复和删除 Savepoint 。你也可以指定创建 Savepoint 的格式。如果没有指定,会采用标准格式创建 Savepoint。 + +```shell +$ bin/flink savepoint --type [native/canonical] :jobId [:targetDirectory] +``` 使用 YARN 触发 Savepoint @@ -133,13 +150,15 @@ $ bin/flink savepoint :jobId [:targetDirectory] -yid :yarnAppId 这将触发 ID 为 `:jobId` 和 YARN 应用程序 ID `:yarnAppId` 的作业的 Savepoint,并返回创建的 Savepoint 的路径。 - 使用 Savepoint 取消作业 + + + 使用 Savepoint 停止作业 ```shell -$ bin/flink cancel -s [:targetDirectory] :jobId +$ bin/flink stop --type [native/canonical] --savepointPath [:targetDirectory] :jobId ``` -这将自动触发 ID 为 `:jobid` 的作业的 Savepoint,并取消该作业。此外,你可以指定一个目标文件系统目录来存储 Savepoint 。该目录需要能被 JobManager(s) 和 TaskManager(s) 访问。 +这将自动触发 ID 为 `:jobid` 的作业的 Savepoint,并停止该作业。此外,你可以指定一个目标文件系统目录来存储 Savepoint 。该目录需要能被 JobManager(s) 和 TaskManager(s) 访问。你也可以指定创建 Savepoint 的格式。如果没有指定,会采用标准格式创建 Savepoint。 ### 从 Savepoint 恢复
[flink-web] 01/02: Improvements to Flink operations: Snapshots Ownership and Savepoint Formats
This is an automated email from the ASF dual-hosted git repository. dwysakowicz pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/flink-web.git commit 93bd43090eae792e9b2ff205d03b7af740f84c8d Author: Dawid Wysakowicz AuthorDate: Tue Mar 15 16:06:13 2022 +0100 Improvements to Flink operations: Snapshots Ownership and Savepoint Formats Co-authored-by: Daisy T --- _posts/2022-05-06-restore-modes.md | 221 + .../restore-mode-claim.svg | 21 ++ .../restore-mode-legacy.svg| 21 ++ .../restore-mode-no_claim.svg | 21 ++ .../restore-mode-claim.svg | 21 ++ .../restore-mode-legacy.svg| 21 ++ .../restore-mode-no_claim.svg | 21 ++ 7 files changed, 347 insertions(+) diff --git a/_posts/2022-05-06-restore-modes.md b/_posts/2022-05-06-restore-modes.md new file mode 100644 index 0..013282beb --- /dev/null +++ b/_posts/2022-05-06-restore-modes.md @@ -0,0 +1,221 @@ +--- +layout: post +title: "Improvements to Flink operations: Snapshots Ownership and Savepoint Formats" +date: 2022-05-06T00:00:00.000Z +authors: +- dwysakowicz: + name: "Dawid Wysakowicz" + twitter: "dwysakowicz" +- Daisy Tsang: + name: "Daisy Tsang" + +excerpt: This post will outline the journey of improving snapshotting in past releases and the upcoming improvements in Flink 1.15, which includes making it possible to take savepoints in the native state backend specific format as well as clarifying snapshots ownership. + +--- + +Flink has become a well established data streaming engine and a +mature project requires some shifting of priorities from thinking purely about new features +towards improving stability and operational simplicity. In the last couple of releases, the Flink community has tried to address +some known friction points, which includes improvements to the +snapshotting process. Snapshotting takes a global, consistent image of the state of a Flink job and is integral to fault-tolerance and exacty-once processing. Snapshots include savepoints and checkpoints. + +This post will outline the journey of improving snapshotting in past releases and the upcoming improvements in Flink 1.15, which includes making it possible to take savepoints in the native state backend specific format as well as clarifying snapshots ownership. + +{% toc %} + +# Past improvements to the snapshotting process + +Flink 1.13 was the first release where we announced [unaligned checkpoints]({{site.DOCS_BASE_URL}}flink-docs-release-1.15/docs/concepts/stateful-stream-processing/#unaligned-checkpointing) to be production-ready. We +encouraged people to use them if their jobs are backpressured to a point where it causes issues for +checkpoints. We also [unified the binary format of savepoints](/news/2021/05/03/release-1.13.0.html#switching-state-backend-with-savepoints) across all +different state backends, which enables stateful switching of savepoints. + +Flink 1.14 also brought additional improvements. As an alternative and as a complement +to unaligned checkpoints, we introduced a feature called ["buffer debloating"](/news/2021/09/29/release-1.14.0.html#buffer-debloating). This is built +around the concept of automatically adjusting the amount of in-flight data that needs to be aligned +while snapshotting. We also fixed another long-standing problem and made it +possible to [continue checkpointing even if there are finished tasks](/news/2021/09/29/release-1.14.0.html#checkpointing-and-bounded-streams) in a JobGraph. + + + + +# New improvements to the snapshotting process + +You can expect more improvements in Flink 1.15! We continue to be invested in making it easy +to operate Flink clusters and have tackled the following problems. :) + +Savepoints can be expensive +to take and restore from if taken for a very large state stored in the RocksDB state backend. In +order to circumvent this issue, we have seen users leveraging the externalized incremental checkpoints +instead of savepoints in order to benefit from the native RocksDB format. However, checkpoints and savepoints +serve different operational purposes. Thus, we now made it possible to take savepoints in the +native state backend specific format, while still maintaining some characteristics of savepoints (i.e. making them relocatable). + +Another issue reported with externalized checkpoints is that it is not clear who owns the +checkpoint files (Flink or the user?). This is especially problematic when it comes to incremental RocksDB checkpoints +where you can easily end up in a situation where you do not know which checkpoints depend on which files +which makes it tough to clean those files up. To solve this issue, we added explicit restore +modes (CLAIM, NO_CLAIM, and LEGACY) which clearly define whether Flink should take +care
[flink-web] branch asf-site updated (be6961584 -> 666398fe3)
This is an automated email from the ASF dual-hosted git repository. dwysakowicz pushed a change to branch asf-site in repository https://gitbox.apache.org/repos/asf/flink-web.git from be6961584 Rebuild website new 93bd43090 Improvements to Flink operations: Snapshots Ownership and Savepoint Formats new 666398fe3 rebuild website The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: _posts/2022-05-06-restore-modes.md | 221 + content/2022/05/06/pyflink-1.15-thread-mode.html | 767 +++ content/2022/05/06/restore-modes.html | 572 +++ content/blog/feed.xml | 1034 +++- content/blog/index.html| 74 +- content/blog/page10/index.html | 76 +- content/blog/page11/index.html | 76 +- content/blog/page12/index.html | 80 +- content/blog/page13/index.html | 87 +- content/blog/page14/index.html | 85 +- content/blog/page15/index.html | 80 +- content/blog/page16/index.html | 82 +- content/blog/page17/index.html | 80 +- content/blog/page18/index.html | 85 +- content/blog/{page12 => page19}/index.html | 167 +--- content/blog/page2/index.html | 76 +- content/blog/page3/index.html | 78 +- content/blog/page4/index.html | 76 +- content/blog/page5/index.html | 82 +- content/blog/page6/index.html | 82 +- content/blog/page7/index.html | 76 +- content/blog/page8/index.html | 76 +- content/blog/page9/index.html | 76 +- .../restore-mode-claim.svg | 21 + .../restore-mode-legacy.svg| 21 + .../restore-mode-no_claim.svg | 21 + content/index.html | 12 +- content/zh/index.html | 12 +- .../restore-mode-claim.svg | 21 + .../restore-mode-legacy.svg| 21 + .../restore-mode-no_claim.svg | 21 + 31 files changed, 3413 insertions(+), 925 deletions(-) create mode 100644 _posts/2022-05-06-restore-modes.md create mode 100644 content/2022/05/06/pyflink-1.15-thread-mode.html create mode 100644 content/2022/05/06/restore-modes.html copy content/blog/{page12 => page19}/index.html (90%) create mode 100644 content/img/blog/2022-05-06-restore-modes/restore-mode-claim.svg create mode 100644 content/img/blog/2022-05-06-restore-modes/restore-mode-legacy.svg create mode 100644 content/img/blog/2022-05-06-restore-modes/restore-mode-no_claim.svg create mode 100644 img/blog/2022-05-06-restore-modes/restore-mode-claim.svg create mode 100644 img/blog/2022-05-06-restore-modes/restore-mode-legacy.svg create mode 100644 img/blog/2022-05-06-restore-modes/restore-mode-no_claim.svg
[flink] branch master updated: [FLINK-27477][yarn][tests] Drop flink-yarn test-jar
This is an automated email from the ASF dual-hosted git repository. chesnay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new 98a6a5432b6 [FLINK-27477][yarn][tests] Drop flink-yarn test-jar 98a6a5432b6 is described below commit 98a6a5432b642aa647f6edcd60dae49ef9093786 Author: Chesnay Schepler AuthorDate: Mon May 2 11:59:00 2022 +0200 [FLINK-27477][yarn][tests] Drop flink-yarn test-jar --- flink-yarn-tests/pom.xml| 8 .../org/apache/flink/yarn/YarnConfigurationITCase.java | 5 ++--- .../apache/flink/yarn/YarnPrioritySchedulingITCase.java | 6 -- .../test/java/org/apache/flink/yarn/YarnTestBase.java | 8 +--- flink-yarn/pom.xml | 17 - 5 files changed, 7 insertions(+), 37 deletions(-) diff --git a/flink-yarn-tests/pom.xml b/flink-yarn-tests/pom.xml index 6a4ac79ae58..253f2ede2b1 100644 --- a/flink-yarn-tests/pom.xml +++ b/flink-yarn-tests/pom.xml @@ -91,14 +91,6 @@ under the License. test - - org.apache.flink - flink-yarn - ${project.version} - test-jar - test - - org.apache.flink flink-examples-batch_${scala.binary.version} diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnConfigurationITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnConfigurationITCase.java index 73f6e8fef3c..f5473fab912 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnConfigurationITCase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnConfigurationITCase.java @@ -19,7 +19,6 @@ package org.apache.flink.yarn; import org.apache.flink.api.common.time.Time; -import org.apache.flink.client.cli.CliFrontend; import org.apache.flink.client.deployment.ClusterSpecification; import org.apache.flink.client.program.ClusterClient; import org.apache.flink.client.program.PackagedProgram; @@ -88,11 +87,11 @@ class YarnConfigurationITCase extends YarnTestBase { final YarnConfiguration yarnConfiguration = getYarnConfiguration(); final YarnClusterDescriptor clusterDescriptor = -YarnTestUtils.createClusterDescriptorWithLogging( - CliFrontend.getConfigurationDirectoryFromEnv(), +new YarnClusterDescriptor( configuration, yarnConfiguration, yarnClient, + YarnClientYarnClusterInformationRetriever.create(yarnClient), true); clusterDescriptor.setLocalJarPath(new Path(flinkUberjar.getAbsolutePath())); diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnPrioritySchedulingITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnPrioritySchedulingITCase.java index cf610593e6f..5673aee1e7f 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnPrioritySchedulingITCase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnPrioritySchedulingITCase.java @@ -18,25 +18,19 @@ package org.apache.flink.yarn; -import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import java.lang.reflect.Method; -import static org.apache.flink.yarn.YarnTestUtils.isHadoopVersionGreaterThanOrEquals; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assumptions.assumeTrue; /** Tests to Yarn's priority scheduling. */ class YarnPrioritySchedulingITCase extends YarnTestBase { @BeforeAll public static void setup() { -assumeTrue( -isHadoopVersionGreaterThanOrEquals(2, 8), -"Priority scheduling is not supported by Hadoop: " + VersionInfo.getVersion()); YARN_CONFIGURATION.setStrings("yarn.cluster.max-application-priority", "10"); startYARNWithConfig(YARN_CONFIGURATION); diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java index f049b54462c..6e25d4378ce 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java @@ -29,6 +29,7 @@ import org.apache.flink.util.ExceptionUtils; import org.apache.flink.util.Preconditions; import