svn commit: r72258 - /release/flink/KEYS
Author: jqin Date: Fri Oct 11 06:57:01 2024 New Revision: 72258 Log: [flink] Add Becket's public key Modified: release/flink/KEYS Modified: release/flink/KEYS == --- release/flink/KEYS (original) +++ release/flink/KEYS Fri Oct 11 06:57:01 2024 @@ -3931,3 +3931,62 @@ CQWjmoAACgkQU4tJ6bzwty+BmQEA7tp61tPgq9Wr 8sLoOEoBAOCcCJiOv7KiplIac1xv8WHA5eacb5Bm2HGsX7zOTHMN =iyaV -END PGP PUBLIC KEY BLOCK- +pub rsa4096 2024-10-11 [SC] + A9AD7F73911D747F7ED3128E15E0F69F4424F39A +uid [ultimate] Jiangjie (Becket) Qin +sig 315E0F69F4424F39A 2024-10-11 [self-signature] +sub rsa4096 2024-10-11 [E] +sig 15E0F69F4424F39A 2024-10-11 [self-signature] + +-BEGIN PGP PUBLIC KEY BLOCK- + +mQINBGcIv9oBEADdjqFEOv7aWGo12DZfeOh2C5pumMUJAG5rTiJpO5cl/a8sSwKw +EhtkrEST+qQaMPutX4p2zE7SZinCFuyjfGFAz3pNlFtURWSHEz2wiPI3EArdrooe +usAbASRil/bcj4LQIkDaLgiq6LnWXq5AQfttayOszMK/TmU2gnHMyQdjnMddNY1E +0+GkmeTgRyQKy8GY2S7hS5+v0y8YJEvh1D34hBpZiF0rSSXPiA/LNanLTRSGYd0x +zmXZkNKZDl51r4IdpoAnOCLvSWEZy2fq9khDIAEbuRfHZKYVaZ0rAXvmxCmgiMob +j+QHeWHzAb3kbGLAJNaongIazZugtMlL5j0vEqCTTMPWuBk7jCd0ZSuxU8U+ksdY +Hkac0flsIoEW8RacGFWY6w8r7Xj68f0dzYrsBk38S8GO4KIbTpT0dGQNxX1jN0pu +d7nrAmQAJXvfBpufs0AtZtR7I9nNkS8ELvSCPim3sVpuk5YIJE9ccjwgrn+AetMx +AMeMZg6NNNhlB9D9pZOAh7UqksQBqijMY1OB2d4UQ6LNMAHpFbIUhVDlwHREBlWl +PVKW9J1OzT0cd1xObf9OdbnMQp4GlC5alP3g3VNMk/3fCgLlxWDt7UsJS/r6LkbV +q9QSS8TuvdY96A1dDr9upvxGoqE7GaMgGgEphLwfysI+nCvp4EjBzs+37QARAQAB +tCdKaWFuZ2ppZSAoQmVja2V0KSBRaW4gPGpxaW5AYXBhY2hlLm9yZz6JAlEEEwEI +ADsWIQSprX9zkR10f37TEo4V4PafRCTzmgUCZwi/2gIbAwULCQgHAgIiAgYVCgkI +CwIEFgIDAQIeBwIXgAAKCRAV4PafRCTzmqtREADbSX1mBhPMTjNB//+bRfkfUaUI +2FYr22hsYxeIiO+oJTRMsMAB1rP4MoYbUGQXshp21i+8wJDu49XBUs96ZnL/qPZk +KUst3wXao8B/b9gpIWgz1piuBQP+ERLpRLFBGx1vtbSbT950l5AqlaC2KfKduYZg +ndEYafpxElv8bsmms+GNVkfAXfQZhYoxZK/PyHFu6pY3gBL+nzvHNWmbXeGGrxLL +J2mb6ustAFZEupN8zLkBfcD9F2c/Ozgn42dQUsIJkkajKF36j5nkz1F4bjKGG3DA +YHwwHFRtuMdtgDg9Qc8ycOlPxtc2DJIcMMneKFVi2DeTPixviMtg10sFDa11t1cl +sDtZk9tpSLWCcv0MDtux7qxZdNOutDUNStheEf0ywG1mtA59l01s3hmkKZe2trtY +D2bCXvzr2n0+rTRgeoO31cozuprLHlxH/Hj9CnE9bDv23LCD1nl56QV+R6GVxsol +BtJy+yxzPQRBeI5d3ss1/vOV3QiNfuBA+4c8Mm0kYcBE360TUgsCM6sMerWZQ6HW +mZYVjU/XZq87x+Vh6G/O7ZohA7DLJkANxlzMvCDD2cH1d8d7/HuH+l/C80Kzs16s +z1aj4fgfI3TuQiGvapDAZDfL3SDJUydOWvQNUCrrLFMy6xJitq3X09gJwdD8bg55 +dhoUW7pWAYBP8XAUqLkCDQRnCL/aARAArPWssV0jG95sLeMd+ydzVSLymw0eY0ZG +njmqO86ymswRbnEZR/Y8NRgV1r9ZVhyB23796wtZ7dv8UjGxPZRiP5E0YAvY7cQS +KMHKIfALzGzMEOKHTXlymgO4FK2iUT4z1juG0y9FB9OEyPrgoD68pjU8KRJDKrQp +rBn80ZXOWd/lnFQCOipP26lTiq7EV1b/v0GBmPbKp4XNQBtnUyMndBEXXTeRmfJ5 +yf6h/9tGfa60Dy2PhTk4+X0CPFWe0X+K22eEwKYPn8K4UhtntbqQCNaj9xUVDkmN +SEMaHwWxHluSwf1WK+qbFIYuVsu5q6UkbkXEuPRix/pjtl0kbuAg2ncpGYhQ3j1n +/V2EeaXSj9fsmFVKBiQaPU53kQb56NII0ZS2mMkm0s1PoNZ7LBVpk4gVTq5YG7Fc +osMLp3X7WaVFOXwbopgOcE8jW8JdnmGbMfM4S7Q4h9yKr3mSos9V3sI6QBmSzWcC +3D4mgH7a+W3Cvz4E7MgUmA7Tg4uRgLWuiCM3VTfc9l7OvhOmxOhY3V/poyxS7VOL +LXYGw0SWMAW7oEv88sV6cLiAXK+P4g7a94cYEt2jA27C1BCXNsvvkGv1RhrmH2gT +ccyK9viSFBi90G2jXHHpd9b+MUfFIZt8ObvWx7Ola5Du+JblS1eD30x84dvxxUfs +gYxzNkFY20cAEQEAAYkCNgQYAQgAIBYhBKmtf3ORHXR/ftMSjhXg9p9EJPOaBQJn +CL/aAhsMAAoJEBXg9p9EJPOa4IkP/1n3blONpP+Efba/6b0+SlzGp6FkxtpH0yJm +4OlwiPJ7IY5J06AP3ZAl5MaDd7p23FogN/wwaZ+szWdpbdUF5EX/oisppXYyCEdj +3ELcTELGEFlcd/1LDtIHRKql4Y5vm9DbQessybthWWQoxtWWOAI6IRDrmFRobA51 +36IFMWQdfA2RdGAPrC3B+OmhI8emInDGEX+jzamLbqxQUL+YPytCpad3KtTp69B3 +mzJn89idzH6FT+NxucyxMsrixcZZeSPsNLR6BEW4WceHERwz2QZ6SqwV6Ss9H+89 ++88Dg9THvO1kLGT3C42EZHPgDZzxCPCOH6OAk0/73vIObtgOwnEmkUj5v3hviHY9 +MqSabHgSow/xZoXPhJVMJd3qzgqgODO0GgupyG1sGTHnf3YoOHCsVr/EJtAe0+vH +HytjUBghh71wJA0mvajs2Jqbz5pw6WQ60HeH7dfabyE6FnXBTxPAvErqyDkLnGaj +qefAHR6TgCN8kLYikDOVcFG8Ti8ZpFiuOdf6Ivyq4SlHVQWVnBU+TiWe6bSevNCt +302DaARiwKZCzSi8ROCNjhYlF/xA71oXA4u733cPXLVeISxeT9uHpvMmhJ3onJu4 +gn0az213T6Xzycl4sEWSiF2qGQ5RifH8nPlpr4SQ2FzvE7fseLVonxt6KIJHA270 ++aNJBX6t +=diOL +-END PGP PUBLIC KEY BLOCK-
(flink) branch master updated: [FLINK-36292][Connectors/Common][test]: remove timeout to avoid timeout exception (#25371)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new cc0a8b3f69b [FLINK-36292][Connectors/Common][test]: remove timeout to avoid timeout exception (#25371) cc0a8b3f69b is described below commit cc0a8b3f69b8036534a5f0132bbe8f470d945ca0 Author: Luke Chen AuthorDate: Fri Sep 27 03:25:07 2024 +0900 [FLINK-36292][Connectors/Common][test]: remove timeout to avoid timeout exception (#25371) --- .../connector/base/source/reader/fetcher/SplitFetcherManagerTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java index 21572b1c84d..c225ea8e718 100644 --- a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java +++ b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java @@ -71,7 +71,7 @@ public class SplitFetcherManagerTest { .hasRootCauseMessage("Artificial exception on closing the split reader."); } -@Test(timeout = 3) +@Test public void testCloseCleansUpPreviouslyClosedFetcher() throws Exception { final String splitId = "testSplit"; // Set the queue capacity to 1 to make sure in this case the @@ -94,7 +94,7 @@ public class SplitFetcherManagerTest { }, "The idle fetcher should have been removed."); // Now close the fetcher manager. The fetcher manager closing should not block. -fetcherManager.close(60_000); +fetcherManager.close(Long.MAX_VALUE); } @Test
(flink) branch master updated: [FLINK-33681][Runtime/Metrics] Reuse input/output metrics of SourceOperator/SinkWriterOperator for task (#23998)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new 0309f13e8af [FLINK-33681][Runtime/Metrics] Reuse input/output metrics of SourceOperator/SinkWriterOperator for task (#23998) 0309f13e8af is described below commit 0309f13e8af62f9b523e227a3a66ff59e838a1b4 Author: Zhanghao Chen AuthorDate: Thu Aug 22 06:31:44 2024 +0800 [FLINK-33681][Runtime/Metrics] Reuse input/output metrics of SourceOperator/SinkWriterOperator for task (#23998) --- .../base/source/reader/SourceMetricsITCase.java| 21 + .../groups/InternalOperatorIOMetricGroup.java | 12 .../runtime/metrics/groups/TaskIOMetricGroup.java | 17 + .../flink/runtime/testutils/InMemoryReporter.java | 22 +- .../streaming/api/operators/SourceOperator.java| 5 + .../runtime/operators/sink/SinkWriterOperator.java | 15 +++ .../test/streaming/runtime/SinkMetricsITCase.java | 21 + .../streaming/runtime/SinkV2MetricsITCase.java | 20 8 files changed, 128 insertions(+), 5 deletions(-) diff --git a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java index b7631ef0311..3c315f694b9 100644 --- a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java +++ b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java @@ -34,6 +34,7 @@ import org.apache.flink.metrics.Metric; import org.apache.flink.metrics.groups.OperatorMetricGroup; import org.apache.flink.runtime.metrics.MetricNames; import org.apache.flink.runtime.metrics.groups.InternalSourceReaderMetricGroup; +import org.apache.flink.runtime.metrics.groups.TaskMetricGroup; import org.apache.flink.runtime.testutils.InMemoryReporter; import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; import org.apache.flink.streaming.api.datastream.DataStream; @@ -223,6 +224,26 @@ public class SourceMetricsITCase extends TestLogger { assertThatGauge(metrics.get(MetricNames.SOURCE_IDLE_TIME)).isEqualTo(0L); } assertThat(subtaskWithMetrics).isEqualTo(numSplits); + +// Test operator I/O metrics are reused by task metrics +List taskMetricGroups = +reporter.findTaskMetricGroups(jobId, "MetricTestingSource"); +assertThat(taskMetricGroups).hasSize(parallelism); + +int subtaskWithTaskMetrics = 0; +for (TaskMetricGroup taskMetricGroup : taskMetricGroups) { +// there are only 2 splits assigned; so two groups will not update metrics +if (taskMetricGroup.getIOMetricGroup().getNumRecordsInCounter().getCount() == 0) { +continue; +} + +subtaskWithTaskMetrics++; + assertThatCounter(taskMetricGroup.getIOMetricGroup().getNumRecordsInCounter()) +.isEqualTo(processedRecordsPerSubtask); + assertThatCounter(taskMetricGroup.getIOMetricGroup().getNumBytesInCounter()) +.isEqualTo(processedRecordsPerSubtask * MockRecordEmitter.RECORD_SIZE_IN_BYTES); +} +assertThat(subtaskWithTaskMetrics).isEqualTo(numSplits); } private static class LaggingTimestampAssigner diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java b/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java index 31cf560ce78..0405b2d6e07 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java @@ -97,4 +97,16 @@ public class InternalOperatorIOMetricGroup extends ProxyMetricGroup { private final Clock clock; -private final Counter numBytesIn; -private final Counter numBytesOut; +private final SumCounter numBytesIn; +private final SumCounter numBytesOut; private final SumCounter numRecordsIn; private final SumCounter numRecordsOut; private final Counter numBuffersOut; @@ -95,8 +95,8 @@ public class TaskIOMetricGroup extends ProxyMetricGroup { public TaskIOMetricGroup(TaskMetricGroup parent, Clock clock) { super(parent); this.clock = clock; -this.numBytesIn = counter(MetricNames.IO_NUM_BYTES_IN); -this.numBytesOut = counter(MetricNames.IO_NUM_BYTES_OUT); +this.numBytesIn
(flink) branch master updated: [FLINK-35924][Connectors / Common] delay the SplitReader closure to until all the emitted records are processed. (#25130)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new a15bf58da54 [FLINK-35924][Connectors / Common] delay the SplitReader closure to until all the emitted records are processed. (#25130) a15bf58da54 is described below commit a15bf58da5442deeb07ac2a1795a961a0ec75561 Author: Jiangjie (Becket) Qin AuthorDate: Tue Aug 6 08:31:59 2024 -0700 [FLINK-35924][Connectors / Common] delay the SplitReader closure to until all the emitted records are processed. (#25130) This patch delays the SplitReader closure to wait until all the previously emitted records are processed. This is needed for some of the SplitReader implementation which stores the returned records in internal buffer to save a data copy. In that case, closing the SplitReader will result in corruption of the emitted but not yet processed records. --- .../base/source/reader/fetcher/SplitFetcher.java | 45 .../source/reader/fetcher/SplitFetcherManager.java | 29 +++- .../reader/fetcher/SplitFetcherManagerTest.java| 79 +- .../source/reader/fetcher/SplitFetcherTest.java| 36 ++ .../java/org/apache/flink/test/util/TestUtils.java | 38 +++ 5 files changed, 223 insertions(+), 4 deletions(-) diff --git a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java index 339686415ee..f05d7d16a45 100644 --- a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java +++ b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java @@ -20,6 +20,7 @@ package org.apache.flink.connector.base.source.reader.fetcher; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.api.connector.source.SourceSplit; +import org.apache.flink.connector.base.source.reader.RecordsBySplits; import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds; import org.apache.flink.connector.base.source.reader.splitreader.SplitReader; import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue; @@ -32,10 +33,12 @@ import javax.annotation.concurrent.GuardedBy; import java.util.ArrayDeque; import java.util.Collection; +import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; import java.util.function.Consumer; @@ -82,6 +85,14 @@ public class SplitFetcher implements Runnable { private final Consumer> splitFinishedHook; +/** + * A shutdown latch to help make sure the SplitReader is only closed after all the emitted + * records have been processed by the main reader thread. This is needed because in some cases, + * the records in the RecordsWithSplitIds may have not been processed when the split + * fetcher shuts down. + */ +private final CountDownLatch recordsProcessedLatch; + SplitFetcher( int id, FutureCompletingBlockingQueue> elementsQueue, @@ -97,6 +108,7 @@ public class SplitFetcher implements Runnable { this.shutdownHook = checkNotNull(shutdownHook); this.allowUnalignedSourceSplits = allowUnalignedSourceSplits; this.splitFinishedHook = splitFinishedHook; +this.recordsProcessedLatch = new CountDownLatch(1); this.fetchTask = new FetchTask<>( @@ -117,10 +129,25 @@ public class SplitFetcher implements Runnable { while (runOnce()) { // nothing to do, everything is inside #runOnce. } +if (recordsProcessedLatch.getCount() > 0) { +// Put an empty synchronization batch to the element queue. +// When this batch is recycled, all the records emitted earlier +// must have already been processed. +elementsQueue.put( +fetcherId(), +new RecordsBySplits(Collections.emptyMap(), Collections.emptySet()) { +@Override +public void recycle() { +super.recycle(); +recordsProcessedLatch.countDown(); +} +}); +} } catch (Throwable t) { errorHandler.accept(t); } finally {
(flink) branch master updated: [FLIP-321] Update the docs to add migration periods for deprecated APIs. (#23865)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new d4a3687aacd [FLIP-321] Update the docs to add migration periods for deprecated APIs. (#23865) d4a3687aacd is described below commit d4a3687aacdea61920098dd7814776655fde19db Author: Jiangjie (Becket) Qin AuthorDate: Fri Dec 15 16:11:42 2023 +0800 [FLIP-321] Update the docs to add migration periods for deprecated APIs. (#23865) --- docs/content/docs/ops/upgrading.md | 38 +++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/ops/upgrading.md b/docs/content/docs/ops/upgrading.md index ce58a7848e5..cc7d5e28cd8 100644 --- a/docs/content/docs/ops/upgrading.md +++ b/docs/content/docs/ops/upgrading.md @@ -55,11 +55,43 @@ This table lists the `source` / `binary` compatibility guarantees for each annot | `Experimental` |{{< xmark >}}/{{< xmark >}} |{{< xmark >}}/{{< xmark >}} |{{< xmark >}}/{{< xmark >}} | {{< hint info >}} -{{< label Example >}} -Code written against a `PublicEvolving` API in 1.15.2 will continue to run in 1.15.3, without having to recompile the code. -That same code would have to be recompiled when upgrading to 1.16.0 though. +{{< label Example >}} +Consider the code written against a `Public` API in 1.15.2: +* The code can continue to run when upgrading to Flink 1.15.3 without recompiling, because patch version upgrades for `Public` APIs guarantee `binary` compatibility. +* The same code may have to be recompiled when upgrading from 1.15.x to 1.16.0, because minor version upgrades for `Public` APIs only provide `source` compatibility, not `binary` compatibility. +* Code change may be required when upgrading from 1.x to 2.x because major version upgrades for `Public` APIs provide neither `source` nor `binary` compatibility. + +Consider the code written against a `PublicEvolving` API in 1.15.2: +* The code can continue to run when upgrading to Flink 1.15.3 without recompiling, because patch version upgrades for `PublicEvolving` APIs guarantee `binary` compatibility. +* A code change may be required when upgrading from 1.15.x to Flink 1.16.0, because minor version upgrades for `PublicEvolving` APIs provide neither `source` nor binary compatibility. {{< /hint >}} +### Deprecated API Migration Period +When an API is deprecated, it is marked with the `@Deprecated` annotation and a deprecation message is added to the Javadoc. +According to [FLIP-321](https://cwiki.apache.org/confluence/display/FLINK/FLIP-321%3A+Introduce+an+API+deprecation+process), +starting from release 1.18, each deprecated API will have a guaranteed migration period depending on the API stability level: + +|Annotation| Guaranteed Migration Period |Could be removed after the migration period| +|::|:--:|:-:| +| `Public` |2 minor releases| Next major version | +| `PublicEvolving` |1 minor release | Next minor version | +| `Experimental` | 1 patch release for the affected minor release | Next patch version | + +The source code of a deprecated API will be kept for at least the guaranteed migration period, +and may be removed at any point after the migration period has passed. + +{{< hint info >}} +{{< label Example >}} +Assuming a release sequence of 1.18, 1.19, 1.20, 2.0, 2.1, ..., 3.0, +- if a `Public` API is deprecated in 1.18, it will not be removed until 2.0. +- if a `Public` API is deprecated in 1.20, the source code will be kept in 2.0 because the migration period is 2 minor releases. Also, because a `Public` API must maintain source compatibility throughout a major version, the source code will be kept for all the 2.x versions and removed in 3.0 at the earliest. +- if a `PublicEvolving` API is deprecated in 1.18, it will be removed in 1.20 at the earliest. +- if a `PublicEvolving` API is deprecated in 1.20, the source code will be kept in 2.0 because the migration period is 1 minor releases. The source code may be removed in 2.1 at the earliest. +- if an `Experimental` API is deprecated in 1.18.0, the source code will be kept for 1.18.1 and removed in 1.18.2 at the earliest. Also, the source code can be removed in 1.19.0. +{{< /hint >}} + +Please check the [FLIP-321](https://cwiki.apache.org/confluence/display/FLINK/FLIP-321%3A+Introduce+an+API+deprecation+process) wiki for more details. + ## Restarting Streaming Applications The line of action for upgrading a streaming application or m
[flink] branch master updated (b1a111c85c1 -> 5be4688e8b8)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git from b1a111c85c1 [hotfix][tests] Decrease the network memory size to make HybridShuffleITCase more stable add 5be4688e8b8 [FLINK-20767][table planner] Support filter push down on nested fields (#23313) No new revisions were added by this update. Summary of changes: .../utils/ApiExpressionDefaultVisitor.java | 6 ++ .../utils/ResolvedExpressionDefaultVisitor.java| 6 ++ .../expressions/ExpressionDefaultVisitor.java | 5 ++ .../flink/table/expressions/ExpressionVisitor.java | 4 + ...on.java => NestedFieldReferenceExpression.java} | 73 - .../expressions/converter/ExpressionConverter.java | 12 +++ .../plan/abilities/source/FilterPushDownSpec.java | 7 +- .../table/planner/plan/utils/FlinkRexUtil.scala| 3 +- .../planner/plan/utils/RexNodeExtractor.scala | 48 ++- .../table/planner/factories/TestValuesCatalog.java | 2 +- .../planner/factories/TestValuesTableFactory.java | 15 +++- .../PushFilterIntoTableSourceScanRuleTest.java | 63 +++ .../flink/table/planner/utils/FilterUtils.java | 45 +-- .../PushFilterIntoTableSourceScanRuleTest.xml | 92 ++ .../runtime/batch/sql/TableSourceITCase.scala | 38 + .../runtime/stream/sql/TableSourceITCase.scala | 54 + 16 files changed, 421 insertions(+), 52 deletions(-) copy flink-table/flink-table-common/src/main/java/org/apache/flink/table/expressions/{FieldReferenceExpression.java => NestedFieldReferenceExpression.java} (53%)
[flink] branch master updated: [FLINK-30424][DataStream API] Add source operator addSplits log when restore from state
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new 528186b62da [FLINK-30424][DataStream API] Add source operator addSplits log when restore from state 528186b62da is described below commit 528186b62da92ecde0fa308f1df0cc6f95495f4d Author: Ran Tao AuthorDate: Thu Dec 15 13:18:32 2022 +0800 [FLINK-30424][DataStream API] Add source operator addSplits log when restore from state --- .../java/org/apache/flink/streaming/api/operators/SourceOperator.java| 1 + 1 file changed, 1 insertion(+) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java index 817d19dd795..3e962596e66 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java @@ -333,6 +333,7 @@ public class SourceOperator extends AbstractStr // restore the state if necessary. final List splits = CollectionUtil.iterableToList(readerState.get()); if (!splits.isEmpty()) { +LOG.info("Restoring state for {} split(s) to reader.", splits.size()); sourceReader.addSplits(splits); }
[flink] branch release-1.15 updated: Revert "[FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting."
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.15 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.15 by this push: new 0c718666476 Revert "[FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting." 0c718666476 is described below commit 0c718666476ae469fb825e8fdf362470d7af2488 Author: Jiangjie (Becket) Qin AuthorDate: Thu Apr 14 17:56:59 2022 +0800 Revert "[FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting." This reverts commit 9fc89a05f128ab645b73687f240fb14b57790fc6 due to a FLINK-27148. --- .../runtime/checkpoint/CheckpointCoordinator.java | 17 ++--- .../runtime/checkpoint/PendingCheckpoint.java | 7 +--- .../checkpoint/CheckpointCoordinatorTest.java | 44 -- .../runtime/checkpoint/PendingCheckpointTest.java | 3 +- 4 files changed, 6 insertions(+), 65 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java index 2d47d79f063..72a6b7032de 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java @@ -536,8 +536,6 @@ public class CheckpointCoordinator { boolean initializeBaseLocations = !baseLocationsForCheckpointInitialized; baseLocationsForCheckpointInitialized = true; -CompletableFuture masterTriggerCompletionPromise = new CompletableFuture<>(); - final CompletableFuture pendingCheckpointCompletableFuture = checkpointPlanFuture .thenApplyAsync( @@ -562,8 +560,7 @@ public class CheckpointCoordinator { checkpointInfo.f0, request.isPeriodic, checkpointInfo.f1, - request.getOnCompletionFuture(), - masterTriggerCompletionPromise), + request.getOnCompletionFuture()), timer); final CompletableFuture coordinatorCheckpointsComplete = @@ -618,12 +615,8 @@ public class CheckpointCoordinator { }, timer); -FutureUtils.forward( -CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete), -masterTriggerCompletionPromise); - FutureUtils.assertNoException( -masterTriggerCompletionPromise +CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete) .handleAsync( (ignored, throwable) -> { final PendingCheckpoint checkpoint = @@ -785,8 +778,7 @@ public class CheckpointCoordinator { CheckpointPlan checkpointPlan, boolean isPeriodic, long checkpointID, -CompletableFuture onCompletionPromise, -CompletableFuture masterTriggerCompletionPromise) { +CompletableFuture onCompletionPromise) { synchronized (lock) { try { @@ -811,8 +803,7 @@ public class CheckpointCoordinator { masterHooks.keySet(), props, onCompletionPromise, -pendingCheckpointStats, -masterTriggerCompletionPromise); +pendingCheckpointStats); synchronized (lock) { pendingCheckpoints.put(checkpointID, checkpoint); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java index 8ca6e22020e..b4bd8eacc5e 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java @@ -112,8 +112,6 @@ public class PendingCheckpoint implements Checkpoint { @Nullable private final PendingCheckpointStats pendingCheckpointStats; -private final CompletableFuture masterTriggerCompletionPromise; - /** Target storage location to persist the checkpoint metadata to. */ @Nullable private CheckpointStorageLocation targetLocation; @@ -138,8 +136,7 @@ public class PendingChe
[flink] branch release-1.15 updated: [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.15 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.15 by this push: new 9fc89a05f12 [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting. 9fc89a05f12 is described below commit 9fc89a05f128ab645b73687f240fb14b57790fc6 Author: Gen Luo AuthorDate: Wed Apr 6 16:38:39 2022 +0800 [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting. --- .../runtime/checkpoint/CheckpointCoordinator.java | 17 +++-- .../runtime/checkpoint/PendingCheckpoint.java | 7 +++- .../checkpoint/CheckpointCoordinatorTest.java | 44 ++ .../runtime/checkpoint/PendingCheckpointTest.java | 3 +- 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java index 72a6b7032de..2d47d79f063 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java @@ -536,6 +536,8 @@ public class CheckpointCoordinator { boolean initializeBaseLocations = !baseLocationsForCheckpointInitialized; baseLocationsForCheckpointInitialized = true; +CompletableFuture masterTriggerCompletionPromise = new CompletableFuture<>(); + final CompletableFuture pendingCheckpointCompletableFuture = checkpointPlanFuture .thenApplyAsync( @@ -560,7 +562,8 @@ public class CheckpointCoordinator { checkpointInfo.f0, request.isPeriodic, checkpointInfo.f1, - request.getOnCompletionFuture()), + request.getOnCompletionFuture(), + masterTriggerCompletionPromise), timer); final CompletableFuture coordinatorCheckpointsComplete = @@ -615,8 +618,12 @@ public class CheckpointCoordinator { }, timer); +FutureUtils.forward( +CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete), +masterTriggerCompletionPromise); + FutureUtils.assertNoException( -CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete) +masterTriggerCompletionPromise .handleAsync( (ignored, throwable) -> { final PendingCheckpoint checkpoint = @@ -778,7 +785,8 @@ public class CheckpointCoordinator { CheckpointPlan checkpointPlan, boolean isPeriodic, long checkpointID, -CompletableFuture onCompletionPromise) { +CompletableFuture onCompletionPromise, +CompletableFuture masterTriggerCompletionPromise) { synchronized (lock) { try { @@ -803,7 +811,8 @@ public class CheckpointCoordinator { masterHooks.keySet(), props, onCompletionPromise, -pendingCheckpointStats); +pendingCheckpointStats, +masterTriggerCompletionPromise); synchronized (lock) { pendingCheckpoints.put(checkpointID, checkpoint); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java index b4bd8eacc5e..8ca6e22020e 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java @@ -112,6 +112,8 @@ public class PendingCheckpoint implements Checkpoint { @Nullable private final PendingCheckpointStats pendingCheckpointStats; +private final CompletableFuture masterTriggerCompletionPromise; + /** Target storage location to persist the checkpoint metadata to. */ @Nullable private CheckpointStorageLocation targetLocation; @@ -136,7 +138,8 @@ public class PendingCheckpoint implements Checkpoint { Collection masterStateIdentifiers, CheckpointProperties props, Completa
[flink] branch master updated: [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new 31222b9adf1 [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting. 31222b9adf1 is described below commit 31222b9adf1c354b22fd50c587efc16734b18d40 Author: Gen Luo AuthorDate: Wed Apr 6 16:38:39 2022 +0800 [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting. --- .../runtime/checkpoint/CheckpointCoordinator.java | 17 +++-- .../runtime/checkpoint/PendingCheckpoint.java | 7 +++- .../checkpoint/CheckpointCoordinatorTest.java | 44 ++ .../runtime/checkpoint/PendingCheckpointTest.java | 3 +- 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java index 2efc034ca90..6fd6ad19fea 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java @@ -536,6 +536,8 @@ public class CheckpointCoordinator { boolean initializeBaseLocations = !baseLocationsForCheckpointInitialized; baseLocationsForCheckpointInitialized = true; +CompletableFuture masterTriggerCompletionPromise = new CompletableFuture<>(); + final CompletableFuture pendingCheckpointCompletableFuture = checkpointPlanFuture .thenApplyAsync( @@ -560,7 +562,8 @@ public class CheckpointCoordinator { checkpointInfo.f0, request.isPeriodic, checkpointInfo.f1, - request.getOnCompletionFuture()), + request.getOnCompletionFuture(), + masterTriggerCompletionPromise), timer); final CompletableFuture coordinatorCheckpointsComplete = @@ -615,8 +618,12 @@ public class CheckpointCoordinator { }, timer); +FutureUtils.forward( +CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete), +masterTriggerCompletionPromise); + FutureUtils.assertNoException( -CompletableFuture.allOf(masterStatesComplete, coordinatorCheckpointsComplete) +masterTriggerCompletionPromise .handleAsync( (ignored, throwable) -> { final PendingCheckpoint checkpoint = @@ -778,7 +785,8 @@ public class CheckpointCoordinator { CheckpointPlan checkpointPlan, boolean isPeriodic, long checkpointID, -CompletableFuture onCompletionPromise) { +CompletableFuture onCompletionPromise, +CompletableFuture masterTriggerCompletionPromise) { synchronized (lock) { try { @@ -803,7 +811,8 @@ public class CheckpointCoordinator { masterHooks.keySet(), props, onCompletionPromise, -pendingCheckpointStats); +pendingCheckpointStats, +masterTriggerCompletionPromise); synchronized (lock) { pendingCheckpoints.put(checkpointID, checkpoint); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java index b4bd8eacc5e..8ca6e22020e 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java @@ -112,6 +112,8 @@ public class PendingCheckpoint implements Checkpoint { @Nullable private final PendingCheckpointStats pendingCheckpointStats; +private final CompletableFuture masterTriggerCompletionPromise; + /** Target storage location to persist the checkpoint metadata to. */ @Nullable private CheckpointStorageLocation targetLocation; @@ -136,7 +138,8 @@ public class PendingCheckpoint implements Checkpoint { Collection masterStateIdentifiers, CheckpointProperties props, CompletableFuture onCompleti
[flink] branch release-1.14 updated: [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.14 by this push: new 36136bc [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext 36136bc is described below commit 36136bc2bc33f0dc0add1303af949f681a7e42cd Author: zoucao AuthorDate: Sat Mar 19 00:45:19 2022 +0800 [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext --- .../source/coordinator/SourceCoordinatorContext.java | 19 +++ .../coordinator/SourceCoordinatorContextTest.java | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java index c07f62d..3c05fe4 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java @@ -178,14 +178,17 @@ public class SourceCoordinatorContext callInCoordinatorThread( () -> { // Ensure all the subtasks in the assignment have registered. -for (Integer subtaskId : assignment.assignment().keySet()) { -if (!registeredReaders.containsKey(subtaskId)) { -throw new IllegalArgumentException( -String.format( -"Cannot assign splits %s to subtask %d because the subtask is not registered.", -registeredReaders.get(subtaskId), subtaskId)); -} -} +assignment +.assignment() +.forEach( +(id, splits) -> { +if (!registeredReaders.containsKey(id)) { +throw new IllegalArgumentException( +String.format( +"Cannot assign splits %s to subtask %d because the subtask is not registered.", +splits, id)); +} +}); assignmentTracker.recordSplitAssignment(assignment); assignment diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java index d15f28e..380e65c 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java @@ -153,7 +153,7 @@ public class SourceCoordinatorContextTest extends SourceCoordinatorTestBase { } }, "assignSplits() should fail to assign the splits to a reader that is not registered.", -"Cannot assign splits"); +"Cannot assign splits " + splitsAssignment.assignment().get(0)); } @Test
[flink] branch release-1.15 updated: [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.15 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.15 by this push: new e3992ab [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext e3992ab is described below commit e3992ab17a4515076c287b8d12003e1449718df1 Author: zoucao AuthorDate: Sat Mar 19 00:45:19 2022 +0800 [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext --- .../source/coordinator/SourceCoordinatorContext.java | 19 +++ .../coordinator/SourceCoordinatorContextTest.java | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java index 5e560fa..4434a18 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java @@ -195,14 +195,17 @@ public class SourceCoordinatorContext callInCoordinatorThread( () -> { // Ensure all the subtasks in the assignment have registered. -for (Integer subtaskId : assignment.assignment().keySet()) { -if (!registeredReaders.containsKey(subtaskId)) { -throw new IllegalArgumentException( -String.format( -"Cannot assign splits %s to subtask %d because the subtask is not registered.", -registeredReaders.get(subtaskId), subtaskId)); -} -} +assignment +.assignment() +.forEach( +(id, splits) -> { +if (!registeredReaders.containsKey(id)) { +throw new IllegalArgumentException( +String.format( +"Cannot assign splits %s to subtask %d because the subtask is not registered.", +splits, id)); +} +}); assignmentTracker.recordSplitAssignment(assignment); assignment diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java index d15f28e..380e65c 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java @@ -153,7 +153,7 @@ public class SourceCoordinatorContextTest extends SourceCoordinatorTestBase { } }, "assignSplits() should fail to assign the splits to a reader that is not registered.", -"Cannot assign splits"); +"Cannot assign splits " + splitsAssignment.assignment().get(0)); } @Test
[flink] branch master updated: [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new da79677 [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext da79677 is described below commit da79677c9afcd6703ee5cd513740981530916f2f Author: zoucao AuthorDate: Sat Mar 19 00:45:19 2022 +0800 [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext --- .../source/coordinator/SourceCoordinatorContext.java | 19 +++ .../coordinator/SourceCoordinatorContextTest.java | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java index 5e560fa..4434a18 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java @@ -195,14 +195,17 @@ public class SourceCoordinatorContext callInCoordinatorThread( () -> { // Ensure all the subtasks in the assignment have registered. -for (Integer subtaskId : assignment.assignment().keySet()) { -if (!registeredReaders.containsKey(subtaskId)) { -throw new IllegalArgumentException( -String.format( -"Cannot assign splits %s to subtask %d because the subtask is not registered.", -registeredReaders.get(subtaskId), subtaskId)); -} -} +assignment +.assignment() +.forEach( +(id, splits) -> { +if (!registeredReaders.containsKey(id)) { +throw new IllegalArgumentException( +String.format( +"Cannot assign splits %s to subtask %d because the subtask is not registered.", +splits, id)); +} +}); assignmentTracker.recordSplitAssignment(assignment); assignment diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java index d15f28e..380e65c 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java @@ -153,7 +153,7 @@ public class SourceCoordinatorContextTest extends SourceCoordinatorTestBase { } }, "assignSplits() should fail to assign the splits to a reader that is not registered.", -"Cannot assign splits"); +"Cannot assign splits " + splitsAssignment.assignment().get(0)); } @Test
[flink] branch release-1.13 updated (965774c -> 6fb7807)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git. from 965774c [FLINK-25851][cassandra][tests] Inject dynamic table name into Pojos add e2a8990 [FLINK-24607] Let Deadline handle duration overflow. add 0774fed [FLINK-24607] Add util methods to shutdown executor services. add 6fb7807 [FLINK-24607] Make OperatorCoordinator closure more robust. No new revisions were added by this update. Summary of changes: .../org/apache/flink/api/common/time/Deadline.java | 27 +++- .../coordination/ComponentClosingUtils.java| 95 +++- .../RecreateOnResetOperatorCoordinator.java| 12 +- .../source/coordinator/ExecutorNotifier.java | 21 +-- .../source/coordinator/SourceCoordinator.java | 23 +-- .../coordinator/SourceCoordinatorContext.java | 20 +-- .../coordinator/SourceCoordinatorProvider.java | 13 +- .../coordination/ComponentClosingUtilsTest.java| 172 + .../source/coordinator/ExecutorNotifierTest.java | 14 +- .../source/coordinator/SourceCoordinatorTest.java | 62 +++- .../coordinator/SourceCoordinatorTestBase.java | 9 +- .../ManuallyTriggeredScheduledExecutorService.java | 2 +- 12 files changed, 382 insertions(+), 88 deletions(-) create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtilsTest.java
[flink] 03/03: [FLINK-24607] Make OperatorCoordinator closure more robust.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git commit 0a76d632f33d9a69df87457a63043bd7f609ed40 Author: Jiangjie (Becket) Qin AuthorDate: Mon Feb 21 17:43:19 2022 +0800 [FLINK-24607] Make OperatorCoordinator closure more robust. --- .../RecreateOnResetOperatorCoordinator.java| 12 - .../source/coordinator/ExecutorNotifier.java | 21 +--- .../source/coordinator/SourceCoordinator.java | 23 ++-- .../coordinator/SourceCoordinatorContext.java | 20 +++ .../coordinator/SourceCoordinatorProvider.java | 13 + .../coordination/ComponentClosingUtilsTest.java| 3 +- .../source/coordinator/ExecutorNotifierTest.java | 14 ++--- .../source/coordinator/SourceCoordinatorTest.java | 62 -- .../coordinator/SourceCoordinatorTestBase.java | 9 ++-- 9 files changed, 96 insertions(+), 81 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java index 0d3d5f8..7d3d3ff 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java @@ -128,8 +128,16 @@ public class RecreateOnResetOperatorCoordinator implements OperatorCoordinator { // capture the status whether the coordinator was started when this method was called final boolean wasStarted = this.started; -closingFuture.thenRun( -() -> { +closingFuture.whenComplete( +(ignored, e) -> { +if (e != null) { +LOG.warn( +String.format( +"Received exception when closing " ++ "operator coordinator for %s.", +oldCoordinator.operatorId), +e); +} if (!closed) { // The previous coordinator has closed. Create a new one. newCoordinator.createNewInternalCoordinator(context, provider); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java index e52f6cd..fe4cf8a 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java @@ -25,23 +25,20 @@ import java.util.concurrent.Callable; import java.util.concurrent.Executor; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.BiConsumer; /** * This class is used to coordinate between two components, where one component has an executor * following the mailbox model and the other component notifies it when needed. */ -public class ExecutorNotifier implements AutoCloseable { +public class ExecutorNotifier { private static final Logger LOG = LoggerFactory.getLogger(ExecutorNotifier.class); private final ScheduledExecutorService workerExecutor; private final Executor executorToNotify; -private final AtomicBoolean closed; public ExecutorNotifier(ScheduledExecutorService workerExecutor, Executor executorToNotify) { this.executorToNotify = executorToNotify; this.workerExecutor = workerExecutor; -this.closed = new AtomicBoolean(false); } /** @@ -140,20 +137,4 @@ public class ExecutorNotifier implements AutoCloseable { periodMs, TimeUnit.MILLISECONDS); } - -/** - * Close the executor notifier. This is a blocking call which waits for all the async calls to - * finish before it returns. - * - * @throws InterruptedException when interrupted during closure. - */ -public void close() throws InterruptedException { -if (!closed.compareAndSet(false, true)) { -LOG.debug("The executor notifier has been closed."); -return; -} -// Shutdown the worker executor, so no more worker tasks can run. -workerExecutor.shutdownNow(); -workerExecutor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); -} } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java b/flink-runtime/src/main/
[flink] 02/03: [FLINK-24607] Add util methods to shutdown executor services.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git commit 6936ce6848e0544cb3275aa841ce40cac308540d Author: Jiangjie (Becket) Qin AuthorDate: Fri Feb 11 16:13:50 2022 +0800 [FLINK-24607] Add util methods to shutdown executor services. --- .../coordination/ComponentClosingUtils.java| 95 ++- .../coordination/ComponentClosingUtilsTest.java| 173 + .../ManuallyTriggeredScheduledExecutorService.java | 2 +- 3 files changed, 266 insertions(+), 4 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java index deed49e..4bfe302 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java @@ -18,16 +18,22 @@ limitations under the License. package org.apache.flink.runtime.operators.coordination; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.api.common.time.Deadline; +import org.apache.flink.util.clock.Clock; +import org.apache.flink.util.clock.SystemClock; import org.apache.flink.util.concurrent.FutureUtils; import org.apache.flink.util.function.ThrowingRunnable; import java.time.Duration; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; /** A util class to help with a clean component shutdown. */ public class ComponentClosingUtils { +private static Clock clock = SystemClock.getInstance(); /** Utility class, not meant to be instantiated. */ private ComponentClosingUtils() {} @@ -95,8 +101,91 @@ public class ComponentClosingUtils { return future; } -static void abortThread(Thread t) { -// the abortion strategy is pretty simple here... -t.interrupt(); +/** + * A util method that tries to shut down an {@link ExecutorService} elegantly within the given + * timeout. If the executor has not been shut down before it hits timeout or the thread is + * interrupted when waiting for the termination, a forceful shutdown will be attempted on the + * executor. + * + * @param executor the {@link ExecutorService} to shut down. + * @param timeout the timeout duration. + * @return true if the given executor has been successfully closed, false otherwise. + */ +@SuppressWarnings("ResultOfMethodCallIgnored") +public static boolean tryShutdownExecutorElegantly(ExecutorService executor, Duration timeout) { +try { +executor.shutdown(); +executor.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS); +} catch (InterruptedException ie) { +// Let it go. +} +if (!executor.isTerminated()) { +shutdownExecutorForcefully(executor, Duration.ZERO, false); +} +return executor.isTerminated(); +} + +/** + * Shutdown the given executor forcefully within the given timeout. The method returns if it is + * interrupted. + * + * @param executor the executor to shut down. + * @param timeout the timeout duration. + * @return true if the given executor is terminated, false otherwise. + */ +public static boolean shutdownExecutorForcefully(ExecutorService executor, Duration timeout) { +return shutdownExecutorForcefully(executor, timeout, true); +} + +/** + * Shutdown the given executor forcefully within the given timeout. + * + * @param executor the executor to shut down. + * @param timeout the timeout duration. + * @param interruptable when set to true, the method can be interrupted. Each interruption to + * the thread results in another {@code ExecutorService.shutdownNow()} call to the shutting + * down executor. + * @return true if the given executor is terminated, false otherwise. + */ +public static boolean shutdownExecutorForcefully( +ExecutorService executor, Duration timeout, boolean interruptable) { +Deadline deadline = Deadline.fromNowWithClock(timeout, clock); +boolean isInterrupted = false; +do { +executor.shutdownNow(); +try { +executor.awaitTermination(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); +} catch (InterruptedException e) { +isInterrupted = interruptable; +} +} while (!isInterrupted && deadline.hasTimeLeft() && !executor.isTerminated()); +return executor.isTerminated(); +}
[flink] branch release-1.14 updated (1ff23ac -> 0a76d63)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git. from 1ff23ac [FLINK-25851][cassandra][tests] Inject dynamic table name into Pojos new 89046bc [FLINK-24607] Let Deadline handle duration overflow. new 6936ce6 [FLINK-24607] Add util methods to shutdown executor services. new 0a76d63 [FLINK-24607] Make OperatorCoordinator closure more robust. The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../org/apache/flink/api/common/time/Deadline.java | 27 +++- .../coordination/ComponentClosingUtils.java| 95 +++- .../RecreateOnResetOperatorCoordinator.java| 12 +- .../source/coordinator/ExecutorNotifier.java | 21 +-- .../source/coordinator/SourceCoordinator.java | 23 +-- .../coordinator/SourceCoordinatorContext.java | 20 +-- .../coordinator/SourceCoordinatorProvider.java | 13 +- .../coordination/ComponentClosingUtilsTest.java| 172 + .../source/coordinator/ExecutorNotifierTest.java | 14 +- .../source/coordinator/SourceCoordinatorTest.java | 62 +++- .../coordinator/SourceCoordinatorTestBase.java | 9 +- .../ManuallyTriggeredScheduledExecutorService.java | 2 +- 12 files changed, 382 insertions(+), 88 deletions(-) create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtilsTest.java
[flink] 01/03: [FLINK-24607] Let Deadline handle duration overflow.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git commit 89046bc690d502e42212e01cfe28c737c0b2d3c9 Author: Jiangjie (Becket) Qin AuthorDate: Thu Feb 17 20:48:58 2022 +0800 [FLINK-24607] Let Deadline handle duration overflow. --- .../org/apache/flink/api/common/time/Deadline.java | 27 ++ 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java index 641a46b..bf7dba2 100644 --- a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java +++ b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java @@ -42,7 +42,7 @@ public class Deadline { } public Deadline plus(Duration other) { -return new Deadline(Math.addExact(timeNanos, other.toNanos()), this.clock); +return new Deadline(addHandlingOverflow(timeNanos, other.toNanos()), this.clock); } /** @@ -72,9 +72,12 @@ public class Deadline { return !isOverdue(); } -/** Determines whether the deadline is in the past, i.e. whether the time left is negative. */ +/** + * Determines whether the deadline is in the past, i.e. whether the time left is zero or + * negative. + */ public boolean isOverdue() { -return timeNanos < clock.relativeTimeNanos(); +return timeNanos <= clock.relativeTimeNanos(); } // @@ -92,7 +95,8 @@ public class Deadline { /** Constructs a Deadline that is a given duration after now. */ public static Deadline fromNow(Duration duration) { return new Deadline( -Math.addExact(System.nanoTime(), duration.toNanos()), SystemClock.getInstance()); +addHandlingOverflow(System.nanoTime(), duration.toNanos()), +SystemClock.getInstance()); } /** @@ -103,11 +107,24 @@ public class Deadline { * @param clock Time provider for this deadline. */ public static Deadline fromNowWithClock(Duration duration, Clock clock) { -return new Deadline(Math.addExact(clock.relativeTimeNanos(), duration.toNanos()), clock); +return new Deadline( +addHandlingOverflow(clock.relativeTimeNanos(), duration.toNanos()), clock); } @Override public String toString() { return LocalDateTime.now().plus(timeLeft()).toString(); } + +// private helper methods + +private static long addHandlingOverflow(long x, long y) { +// The logic is copied over from Math.addExact() in order to handle overflows. +long r = x + y; +if (((x ^ r) & (y ^ r)) < 0) { +return Long.MAX_VALUE; +} else { +return x + y; +} +} }
[flink] 02/03: [FLINK-24607] Add util methods to shutdown executor services.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit a17655ebe7e3b2870b7616f1c2b640fcb3154187 Author: Jiangjie (Becket) Qin AuthorDate: Fri Feb 11 16:13:50 2022 +0800 [FLINK-24607] Add util methods to shutdown executor services. --- .../coordination/ComponentClosingUtils.java| 95 ++- .../coordination/ComponentClosingUtilsTest.java| 173 + .../ManuallyTriggeredScheduledExecutorService.java | 2 +- 3 files changed, 266 insertions(+), 4 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java index deed49e..4bfe302 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java @@ -18,16 +18,22 @@ limitations under the License. package org.apache.flink.runtime.operators.coordination; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.api.common.time.Deadline; +import org.apache.flink.util.clock.Clock; +import org.apache.flink.util.clock.SystemClock; import org.apache.flink.util.concurrent.FutureUtils; import org.apache.flink.util.function.ThrowingRunnable; import java.time.Duration; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; /** A util class to help with a clean component shutdown. */ public class ComponentClosingUtils { +private static Clock clock = SystemClock.getInstance(); /** Utility class, not meant to be instantiated. */ private ComponentClosingUtils() {} @@ -95,8 +101,91 @@ public class ComponentClosingUtils { return future; } -static void abortThread(Thread t) { -// the abortion strategy is pretty simple here... -t.interrupt(); +/** + * A util method that tries to shut down an {@link ExecutorService} elegantly within the given + * timeout. If the executor has not been shut down before it hits timeout or the thread is + * interrupted when waiting for the termination, a forceful shutdown will be attempted on the + * executor. + * + * @param executor the {@link ExecutorService} to shut down. + * @param timeout the timeout duration. + * @return true if the given executor has been successfully closed, false otherwise. + */ +@SuppressWarnings("ResultOfMethodCallIgnored") +public static boolean tryShutdownExecutorElegantly(ExecutorService executor, Duration timeout) { +try { +executor.shutdown(); +executor.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS); +} catch (InterruptedException ie) { +// Let it go. +} +if (!executor.isTerminated()) { +shutdownExecutorForcefully(executor, Duration.ZERO, false); +} +return executor.isTerminated(); +} + +/** + * Shutdown the given executor forcefully within the given timeout. The method returns if it is + * interrupted. + * + * @param executor the executor to shut down. + * @param timeout the timeout duration. + * @return true if the given executor is terminated, false otherwise. + */ +public static boolean shutdownExecutorForcefully(ExecutorService executor, Duration timeout) { +return shutdownExecutorForcefully(executor, timeout, true); +} + +/** + * Shutdown the given executor forcefully within the given timeout. + * + * @param executor the executor to shut down. + * @param timeout the timeout duration. + * @param interruptable when set to true, the method can be interrupted. Each interruption to + * the thread results in another {@code ExecutorService.shutdownNow()} call to the shutting + * down executor. + * @return true if the given executor is terminated, false otherwise. + */ +public static boolean shutdownExecutorForcefully( +ExecutorService executor, Duration timeout, boolean interruptable) { +Deadline deadline = Deadline.fromNowWithClock(timeout, clock); +boolean isInterrupted = false; +do { +executor.shutdownNow(); +try { +executor.awaitTermination(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); +} catch (InterruptedException e) { +isInterrupted = interruptable; +} +} while (!isInterrupted && deadline.hasTimeLeft() && !executor.isTerminated()); +return executor.isTerminated(); +}
[flink] 01/03: [FLINK-24607] Let Deadline handle duration overflow.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 10c666be7000e4a370a449f14d07e9b2f7a6c71b Author: Jiangjie (Becket) Qin AuthorDate: Thu Feb 17 20:48:58 2022 +0800 [FLINK-24607] Let Deadline handle duration overflow. --- .../org/apache/flink/api/common/time/Deadline.java | 27 ++ 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java index 641a46b..bf7dba2 100644 --- a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java +++ b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java @@ -42,7 +42,7 @@ public class Deadline { } public Deadline plus(Duration other) { -return new Deadline(Math.addExact(timeNanos, other.toNanos()), this.clock); +return new Deadline(addHandlingOverflow(timeNanos, other.toNanos()), this.clock); } /** @@ -72,9 +72,12 @@ public class Deadline { return !isOverdue(); } -/** Determines whether the deadline is in the past, i.e. whether the time left is negative. */ +/** + * Determines whether the deadline is in the past, i.e. whether the time left is zero or + * negative. + */ public boolean isOverdue() { -return timeNanos < clock.relativeTimeNanos(); +return timeNanos <= clock.relativeTimeNanos(); } // @@ -92,7 +95,8 @@ public class Deadline { /** Constructs a Deadline that is a given duration after now. */ public static Deadline fromNow(Duration duration) { return new Deadline( -Math.addExact(System.nanoTime(), duration.toNanos()), SystemClock.getInstance()); +addHandlingOverflow(System.nanoTime(), duration.toNanos()), +SystemClock.getInstance()); } /** @@ -103,11 +107,24 @@ public class Deadline { * @param clock Time provider for this deadline. */ public static Deadline fromNowWithClock(Duration duration, Clock clock) { -return new Deadline(Math.addExact(clock.relativeTimeNanos(), duration.toNanos()), clock); +return new Deadline( +addHandlingOverflow(clock.relativeTimeNanos(), duration.toNanos()), clock); } @Override public String toString() { return LocalDateTime.now().plus(timeLeft()).toString(); } + +// private helper methods + +private static long addHandlingOverflow(long x, long y) { +// The logic is copied over from Math.addExact() in order to handle overflows. +long r = x + y; +if (((x ^ r) & (y ^ r)) < 0) { +return Long.MAX_VALUE; +} else { +return x + y; +} +} }
[flink] 03/03: [FLINK-24607] Make OperatorCoordinator closure more robust.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 0f19c2472c54aac97e4067f5398731ab90036d1a Author: Jiangjie (Becket) Qin AuthorDate: Thu Feb 10 15:13:55 2022 +0800 [FLINK-24607] Make OperatorCoordinator closure more robust. --- .../RecreateOnResetOperatorCoordinator.java| 12 - .../source/coordinator/ExecutorNotifier.java | 21 +--- .../source/coordinator/SourceCoordinator.java | 36 - .../coordinator/SourceCoordinatorContext.java | 24 + .../coordinator/SourceCoordinatorProvider.java | 11 +--- .../source/coordinator/ExecutorNotifierTest.java | 14 ++--- .../source/coordinator/SourceCoordinatorTest.java | 63 -- .../coordinator/SourceCoordinatorTestBase.java | 1 - 8 files changed, 102 insertions(+), 80 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java index 5c660d0..ffab3ff 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java @@ -128,8 +128,16 @@ public class RecreateOnResetOperatorCoordinator implements OperatorCoordinator { // capture the status whether the coordinator was started when this method was called final boolean wasStarted = this.started; -closingFuture.thenRun( -() -> { +closingFuture.whenComplete( +(ignored, e) -> { +if (e != null) { +LOG.warn( +String.format( +"Received exception when closing " ++ "operator coordinator for %s.", +oldCoordinator.operatorId), +e); +} if (!closed) { // The previous coordinator has closed. Create a new one. newCoordinator.createNewInternalCoordinator(context, provider); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java index e52f6cd..fe4cf8a 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java @@ -25,23 +25,20 @@ import java.util.concurrent.Callable; import java.util.concurrent.Executor; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.BiConsumer; /** * This class is used to coordinate between two components, where one component has an executor * following the mailbox model and the other component notifies it when needed. */ -public class ExecutorNotifier implements AutoCloseable { +public class ExecutorNotifier { private static final Logger LOG = LoggerFactory.getLogger(ExecutorNotifier.class); private final ScheduledExecutorService workerExecutor; private final Executor executorToNotify; -private final AtomicBoolean closed; public ExecutorNotifier(ScheduledExecutorService workerExecutor, Executor executorToNotify) { this.executorToNotify = executorToNotify; this.workerExecutor = workerExecutor; -this.closed = new AtomicBoolean(false); } /** @@ -140,20 +137,4 @@ public class ExecutorNotifier implements AutoCloseable { periodMs, TimeUnit.MILLISECONDS); } - -/** - * Close the executor notifier. This is a blocking call which waits for all the async calls to - * finish before it returns. - * - * @throws InterruptedException when interrupted during closure. - */ -public void close() throws InterruptedException { -if (!closed.compareAndSet(false, true)) { -LOG.debug("The executor notifier has been closed."); -return; -} -// Shutdown the worker executor, so no more worker tasks can run. -workerExecutor.shutdownNow(); -workerExecutor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); -} } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoo
[flink] branch master updated (faf9a8a -> 0f19c24)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from faf9a8a [FLINK-26016][hive] Fix FileSystemLookupFunction does not produce correct results when hive table uses columnar storage new 10c666b [FLINK-24607] Let Deadline handle duration overflow. new a17655e [FLINK-24607] Add util methods to shutdown executor services. new 0f19c24 [FLINK-24607] Make OperatorCoordinator closure more robust. The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../org/apache/flink/api/common/time/Deadline.java | 27 +++- .../coordination/ComponentClosingUtils.java| 95 ++- .../RecreateOnResetOperatorCoordinator.java| 12 +- .../source/coordinator/ExecutorNotifier.java | 21 +-- .../source/coordinator/SourceCoordinator.java | 36 ++--- .../coordinator/SourceCoordinatorContext.java | 24 +-- .../coordinator/SourceCoordinatorProvider.java | 11 +- .../coordination/ComponentClosingUtilsTest.java| 173 + .../source/coordinator/ExecutorNotifierTest.java | 14 +- .../source/coordinator/SourceCoordinatorTest.java | 63 +++- .../coordinator/SourceCoordinatorTestBase.java | 1 - .../ManuallyTriggeredScheduledExecutorService.java | 2 +- 12 files changed, 390 insertions(+), 89 deletions(-) create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtilsTest.java
[flink-web] branch asf-site updated: Make minor change to the 'Apache Flink ML 2.0.0 Release Announcement' (#498)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/flink-web.git The following commit(s) were added to refs/heads/asf-site by this push: new f9d8660 Make minor change to the 'Apache Flink ML 2.0.0 Release Announcement' (#498) f9d8660 is described below commit f9d8660204aaebea2b34874360d2aa803fe7f88b Author: Dong Lin AuthorDate: Thu Jan 13 10:57:38 2022 +0800 Make minor change to the 'Apache Flink ML 2.0.0 Release Announcement' (#498) --- _posts/2022-01-07-release-ml-2.0.0.md | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/_posts/2022-01-07-release-ml-2.0.0.md b/_posts/2022-01-07-release-ml-2.0.0.md index 4cbea98..1468d19 100644 --- a/_posts/2022-01-07-release-ml-2.0.0.md +++ b/_posts/2022-01-07-release-ml-2.0.0.md @@ -150,12 +150,14 @@ TensorFlow program). ## Algorithm Library Now that the Flink ML API re-design is done, we started the initiative to add -off-the-shelf algorithms in Flink ML. As part of this initiative, we borrowed -ideas from the [Alink](https://github.com/alibaba/alink) project, and worked -closely with developers of the Alink project to design the new Flink ML APIs, -refactor, optimize and migrate algorithms from Alink to Flink. Our long-term -goal is to provide a library of performant algorithms that are easy to use, -debug and customize for your needs. +off-the-shelf algorithms in Flink ML. The release of Flink-ML 2.0.0 is closely +related to project Alink - an Apache Flink ecosystem project open sourced by +Alibaba. The connection between the Flink community and developers of the Alink +project dates back to 2017. The project Alink developers have a significant +contribution in designing the new Flink ML APIs, refactoring, optimizing and +migrating algorithms from Alink to Flink. Our long-term goal is to provide a +library of performant algorithms that are easy to use, debug and customize for +your needs. We have implemented five algorithms in this release, i.e. logistic regression, k-means, k-nearest neighbors, naive bayes and one-hot encoder. For now these
[flink-table-store] 01/01: Initial Commit. Add .asf.yaml
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-table-store.git commit a249ebac66d00f664d257551029da9b1beddaf1f Author: Jiangjie (Becket) Qin AuthorDate: Wed Jan 12 11:33:21 2022 +0800 Initial Commit. Add .asf.yaml --- .asf.yaml | 5 + 1 file changed, 5 insertions(+) diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 000..82eef0b --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,5 @@ +notifications: + commits: commits@flink.apache.org + issues: iss...@flink.apache.org + pullrequests: iss...@flink.apache.org + jira_options: link label
[flink-table-store] branch master created (now a249eba)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink-table-store.git. at a249eba Initial Commit. Add .asf.yaml This branch includes the following new commits: new a249eba Initial Commit. Add .asf.yaml The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[flink] branch release-1.14 updated: [FLINK-25132][connector/kafka] Move record deserializing from SplitFetcher to RecordEmitter to support object-reusing deserializer
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.14 by this push: new ebbf772 [FLINK-25132][connector/kafka] Move record deserializing from SplitFetcher to RecordEmitter to support object-reusing deserializer ebbf772 is described below commit ebbf772ea287ee987f5eb628ad2e395895b312aa Author: Qingsheng Ren AuthorDate: Thu Dec 2 08:51:26 2021 +0800 [FLINK-25132][connector/kafka] Move record deserializing from SplitFetcher to RecordEmitter to support object-reusing deserializer --- .../flink/connector/kafka/source/KafkaSource.java | 20 +-- .../source/reader/KafkaPartitionSplitReader.java | 186 +++-- .../kafka/source/reader/KafkaRecordEmitter.java| 50 +- .../kafka/source/reader/KafkaSourceReader.java | 14 +- .../reader/fetcher/KafkaSourceFetcherManager.java | 21 ++- .../connector/kafka/source/KafkaSourceITCase.java | 65 --- .../reader/KafkaPartitionSplitReaderTest.java | 53 +++--- 7 files changed, 201 insertions(+), 208 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java index 6df7d2f..400e803 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java @@ -27,7 +27,6 @@ import org.apache.flink.api.connector.source.SourceReader; import org.apache.flink.api.connector.source.SourceReaderContext; import org.apache.flink.api.connector.source.SplitEnumerator; import org.apache.flink.api.connector.source.SplitEnumeratorContext; -import org.apache.flink.api.java.tuple.Tuple3; import org.apache.flink.api.java.typeutils.ResultTypeQueryable; import org.apache.flink.configuration.Configuration; import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds; @@ -49,6 +48,8 @@ import org.apache.flink.core.io.SimpleVersionedSerializer; import org.apache.flink.metrics.MetricGroup; import org.apache.flink.util.UserCodeClassLoader; +import org.apache.kafka.clients.consumer.ConsumerRecord; + import javax.annotation.Nullable; import java.io.IOException; @@ -131,8 +132,8 @@ public class KafkaSource SourceReader createReader( SourceReaderContext readerContext, Consumer> splitFinishedHook) throws Exception { -FutureCompletingBlockingQueue>> elementsQueue = -new FutureCompletingBlockingQueue<>(); + FutureCompletingBlockingQueue>> +elementsQueue = new FutureCompletingBlockingQueue<>(); deserializationSchema.open( new DeserializationSchema.InitializationContext() { @Override @@ -148,18 +149,13 @@ public class KafkaSource final KafkaSourceReaderMetrics kafkaSourceReaderMetrics = new KafkaSourceReaderMetrics(readerContext.metricGroup()); -Supplier> splitReaderSupplier = -() -> -new KafkaPartitionSplitReader<>( -props, -deserializationSchema, -readerContext, -kafkaSourceReaderMetrics); -KafkaRecordEmitter recordEmitter = new KafkaRecordEmitter<>(); +Supplier splitReaderSupplier = +() -> new KafkaPartitionSplitReader(props, readerContext, kafkaSourceReaderMetrics); +KafkaRecordEmitter recordEmitter = new KafkaRecordEmitter<>(deserializationSchema); return new KafkaSourceReader<>( elementsQueue, -new KafkaSourceFetcherManager<>( +new KafkaSourceFetcherManager( elementsQueue, splitReaderSupplier::get, splitFinishedHook), recordEmitter, toConfiguration(props), diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java index d048230..ebadef3 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java @@ -19,16 +19,13 @@ package org.apache.flink.connector.kafka.source.reader; import
[flink] branch master updated (f191bec -> 2b1a9de)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from f191bec [hotfix][checkpoint] Fix the wrong parameters due to base code change in StreamTaskFinalCheckpointsTest add 2b1a9de [FLINK-25132][connector/kafka] Move record deserializing from SplitFetcher to RecordEmitter to support object-reusing deserializer No new revisions were added by this update. Summary of changes: .../flink/connector/kafka/source/KafkaSource.java | 20 +-- .../source/reader/KafkaPartitionSplitReader.java | 186 +++-- .../kafka/source/reader/KafkaRecordEmitter.java| 50 +- .../kafka/source/reader/KafkaSourceReader.java | 14 +- .../reader/fetcher/KafkaSourceFetcherManager.java | 21 ++- .../connector/kafka/source/KafkaSourceITCase.java | 65 --- .../reader/KafkaPartitionSplitReaderTest.java | 53 +++--- 7 files changed, 201 insertions(+), 208 deletions(-)
[flink-ml] 02/02: [FLINK-24354][FLIP-174] Improve the WithParams interface
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit 9c44eef25970338fe32dcf77ce45efac74c4324f Author: Dong Lin AuthorDate: Sun Sep 26 21:40:59 2021 +0800 [FLINK-24354][FLIP-174] Improve the WithParams interface --- flink-ml-api/pom.xml | 15 + .../org/apache/flink/ml/api/core/Pipeline.java | 23 +- .../apache/flink/ml/api/core/PipelineModel.java| 23 +- .../java/org/apache/flink/ml/api/core/Stage.java | 2 +- .../org/apache/flink/ml/param/BooleanParam.java| 35 ++ .../apache/flink/ml/param/DoubleArrayParam.java| 35 ++ .../org/apache/flink/ml/param/DoubleParam.java | 35 ++ .../org/apache/flink/ml/param/FloatArrayParam.java | 35 ++ .../java/org/apache/flink/ml/param/FloatParam.java | 32 ++ .../org/apache/flink/ml/param/IntArrayParam.java | 35 ++ .../java/org/apache/flink/ml/param/IntParam.java | 35 ++ .../org/apache/flink/ml/param/LongArrayParam.java | 35 ++ .../java/org/apache/flink/ml/param/LongParam.java | 32 ++ .../main/java/org/apache/flink/ml/param/Param.java | 98 ++ .../org/apache/flink/ml/param/ParamValidator.java | 40 +++ .../org/apache/flink/ml/param/ParamValidators.java | 98 ++ .../apache/flink/ml/param/StringArrayParam.java| 35 ++ .../org/apache/flink/ml/param/StringParam.java | 35 ++ .../java/org/apache/flink/ml/param/WithParams.java | 135 .../java/org/apache/flink/ml/util/ParamUtils.java | 89 + .../org/apache/flink/ml/util/ReadWriteUtils.java | 279 +++ .../apache/flink/ml/api/core/ExampleStages.java| 244 ++ .../org/apache/flink/ml/api/core/PipelineTest.java | 202 +-- .../org/apache/flink/ml/api/core/StageTest.java| 375 + pom.xml| 2 - 25 files changed, 1863 insertions(+), 141 deletions(-) diff --git a/flink-ml-api/pom.xml b/flink-ml-api/pom.xml index 81fdcc7..ddfc659 100644 --- a/flink-ml-api/pom.xml +++ b/flink-ml-api/pom.xml @@ -38,6 +38,21 @@ under the License. ${flink.version} provided + + + org.apache.flink + flink-table-planner_${scala.binary.version} + ${flink.version} + test + + + + org.apache.flink + flink-test-utils_${scala.binary.version} + ${flink.version} + test + + org.apache.flink flink-shaded-jackson diff --git a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java index a5fed01..f1e5d0c 100644 --- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java +++ b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java @@ -20,13 +20,17 @@ package org.apache.flink.ml.api.core; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.ml.api.misc.param.Params; +import org.apache.flink.ml.param.Param; +import org.apache.flink.ml.util.ParamUtils; +import org.apache.flink.ml.util.ReadWriteUtils; import org.apache.flink.table.api.Table; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * A Pipeline acts as an Estimator. It consists of an ordered list of stages, each of which could be @@ -36,10 +40,11 @@ import java.util.List; public final class Pipeline implements Estimator { private static final long serialVersionUID = 6384850154817512318L; private final List> stages; -private final Params params = new Params(); +private final Map, Object> paramMap = new HashMap<>(); public Pipeline(List> stages) { this.stages = stages; +ParamUtils.initializeMapWithDefaultValues(paramMap, this); } /** @@ -97,17 +102,17 @@ public final class Pipeline implements Estimator { } @Override -public void save(String path) throws IOException { -throw new UnsupportedOperationException(); +public Map, Object> getParamMap() { +return paramMap; } -public static Pipeline load(String path) throws IOException { -throw new UnsupportedOperationException(); +@Override +public void save(String path) throws IOException { +ReadWriteUtils.savePipeline(this, stages, path); } -@Override -public Params getParams() { -return params; +public static Pipeline load(String path) throws IOException { +return new Pipeline(ReadWriteUtils.loadPipeline(path, Pipeline.class.getName())); } /** diff --git a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineModel.java b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineModel.java index 704fa8e..45bb757
[flink-ml] branch master updated (81cd74a -> 9c44eef)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git. from 81cd74a [hotfix] Remove those library infra classes that need to be revisited new 1f0fe56 [FLINK-24354][FLIP-174] Remove old param-related classes new 9c44eef [FLINK-24354][FLIP-174] Improve the WithParams interface The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: flink-ml-api/pom.xml | 15 + .../org/apache/flink/ml/api/core/Pipeline.java | 23 +- .../apache/flink/ml/api/core/PipelineModel.java| 23 +- .../java/org/apache/flink/ml/api/core/Stage.java | 2 +- .../apache/flink/ml/api/misc/param/ParamInfo.java | 151 - .../flink/ml/api/misc/param/ParamInfoFactory.java | 134 .../org/apache/flink/ml/api/misc/param/Params.java | 277 --- .../apache/flink/ml/api/misc/param/WithParams.java | 60 .../org/apache/flink/ml/param/BooleanParam.java| 24 +- .../apache/flink/ml/param/DoubleArrayParam.java| 24 +- .../org/apache/flink/ml/param/DoubleParam.java | 24 +- .../org/apache/flink/ml/param/FloatArrayParam.java | 24 +- .../java/org/apache/flink/ml/param/FloatParam.java | 21 +- .../org/apache/flink/ml/param/IntArrayParam.java | 24 +- .../java/org/apache/flink/ml/param/IntParam.java | 24 +- .../org/apache/flink/ml/param/LongArrayParam.java | 24 +- .../java/org/apache/flink/ml/param/LongParam.java | 21 +- .../main/java/org/apache/flink/ml/param/Param.java | 98 ++ .../ml/{api/misc => }/param/ParamValidator.java| 17 +- .../org/apache/flink/ml/param/ParamValidators.java | 98 ++ .../apache/flink/ml/param/StringArrayParam.java| 24 +- .../org/apache/flink/ml/param/StringParam.java | 24 +- .../java/org/apache/flink/ml/param/WithParams.java | 135 .../java/org/apache/flink/ml/util/ParamUtils.java | 89 + .../org/apache/flink/ml/util/ReadWriteUtils.java | 279 +++ .../flink/ml/util/param/ExtractParamInfosUtil.java | 71 .../apache/flink/ml/api/core/ExampleStages.java| 244 ++ .../org/apache/flink/ml/api/core/PipelineTest.java | 202 +-- .../org/apache/flink/ml/api/core/StageTest.java| 375 + .../org/apache/flink/ml/api/misc/ParamsTest.java | 179 -- .../ml/util/param/ExtractParamInfosUtilTest.java | 109 -- pom.xml| 2 - 32 files changed, 1568 insertions(+), 1273 deletions(-) delete mode 100644 flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java delete mode 100644 flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfoFactory.java delete mode 100644 flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/Params.java delete mode 100644 flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/WithParams.java copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/BooleanParam.java (61%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/DoubleArrayParam.java (60%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/DoubleParam.java (61%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/FloatArrayParam.java (60%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/FloatParam.java (63%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/IntArrayParam.java (60%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/IntParam.java (61%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/LongArrayParam.java (60%) copy flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java => flink-ml-api/src/main/java/org/apache/flink/ml/param/LongParam.java (64%) create mode 100644 flink-ml-api/src/main/java/org/apache/flink/ml/param/Param.java rename flink-ml-api/src/main/java/org/apache/
[flink-ml] 01/02: [FLINK-24354][FLIP-174] Remove old param-related classes
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit 1f0fe565f8f79ac084d7763f5f0da3fdb36bfa00 Author: Dong Lin AuthorDate: Sun Sep 26 21:37:32 2021 +0800 [FLINK-24354][FLIP-174] Remove old param-related classes --- .../apache/flink/ml/api/misc/param/ParamInfo.java | 151 --- .../flink/ml/api/misc/param/ParamInfoFactory.java | 134 -- .../flink/ml/api/misc/param/ParamValidator.java| 39 --- .../org/apache/flink/ml/api/misc/param/Params.java | 277 - .../apache/flink/ml/api/misc/param/WithParams.java | 60 - .../flink/ml/util/param/ExtractParamInfosUtil.java | 71 -- .../org/apache/flink/ml/api/misc/ParamsTest.java | 179 - .../ml/util/param/ExtractParamInfosUtilTest.java | 109 8 files changed, 1020 deletions(-) diff --git a/flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java b/flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java deleted file mode 100644 index b0f7ce9..000 --- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.ml.api.misc.param; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.util.Preconditions; - -/** - * Definition of a parameter, including name, type, default value, validator and so on. - * - * A parameter can either be optional or non-optional. - * - * - * A non-optional parameter should not have a default value. Instead, its value must be - * provided by the users. - * An optional parameter may or may not have a default value. - * - * - * Please see {@link Params#get(ParamInfo)} and {@link Params#contains(ParamInfo)} for more - * details about the behavior. - * - * A parameter may have aliases in addition to the parameter name for convenience and - * compatibility purposes. One should not set values for both parameter name and an alias. One and - * only one value should be set either under the parameter name or one of the alias. - * - * @param the type of the param value - */ -@PublicEvolving -public class ParamInfo { -private final String name; -private final String[] alias; -private final String description; -private final boolean isOptional; -private final boolean hasDefaultValue; -private final V defaultValue; -private final ParamValidator validator; -private final Class valueClass; - -ParamInfo( -String name, -String[] alias, -String description, -boolean isOptional, -boolean hasDefaultValue, -V defaultValue, -ParamValidator validator, -Class valueClass) { -this.name = name; -this.alias = alias; -this.description = description; -this.isOptional = isOptional; -this.hasDefaultValue = hasDefaultValue; -this.defaultValue = defaultValue; -this.validator = validator; -this.valueClass = valueClass; -} - -/** - * Returns the name of the parameter. The name must be unique in the stage the ParamInfo belongs - * to. - * - * @return the name of the parameter - */ -public String getName() { -return name; -} - -/** - * Returns the aliases of the parameter. The alias will be an empty string array by default. - * - * @return the aliases of the parameter - */ -public String[] getAlias() { -Preconditions.checkNotNull(alias); -return alias; -} - -/** - * Returns the description of the parameter. - * - * @return the description of the parameter - */ -public String getDescription() { -return description; -} - -/** - * Returns whether the parameter is optional. - * - * @return {@code true} if the param is optional, {@code false} otherwise - */ -public boolean isOptional() { -return isOptional; -} - -/** - * Returns w
[flink] branch master updated: [FLINK-24308][docs] Translate Kafka DataStream connector documentation to Chinese
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new aa0eb91 [FLINK-24308][docs] Translate Kafka DataStream connector documentation to Chinese aa0eb91 is described below commit aa0eb91714dbf11d0e01b63baaf910110c0ae991 Author: Qingsheng Ren AuthorDate: Wed Sep 22 10:59:53 2021 +0800 [FLINK-24308][docs] Translate Kafka DataStream connector documentation to Chinese --- .../content.zh/docs/connectors/datastream/kafka.md | 709 + docs/content/docs/connectors/datastream/kafka.md | 2 +- 2 files changed, 319 insertions(+), 392 deletions(-) diff --git a/docs/content.zh/docs/connectors/datastream/kafka.md b/docs/content.zh/docs/connectors/datastream/kafka.md index a9ae396..1329970b 100644 --- a/docs/content.zh/docs/connectors/datastream/kafka.md +++ b/docs/content.zh/docs/connectors/datastream/kafka.md @@ -27,463 +27,390 @@ under the License. # Apache Kafka 连接器 -Flink 提供了 [Apache Kafka](https://kafka.apache.org) 连接器,用于从 Kafka topic 中读取或者向其中写入数据,可提供精确一次的处理语义。 - - +Flink 提供了 [Apache Kafka](https://kafka.apache.org) 连接器使用精确一次(Exactly-once)的语义在 Kafka topic 中读取和写入数据。 ## 依赖 -Apache Flink 集成了通用的 Kafka 连接器,它会尽力与 Kafka client 的最新版本保持同步。该连接器使用的 Kafka client 版本可能会在 Flink 版本之间发生变化。 +Apache Flink 集成了通用的 Kafka 连接器,它会尽力与 Kafka client 的最新版本保持同步。 +该连接器使用的 Kafka client 版本可能会在 Flink 版本之间发生变化。 当前 Kafka client 向后兼容 0.10.0 或更高版本的 Kafka broker。 -有关 Kafka 兼容性的更多细节,请参考 [Kafka 官方文档](https://kafka.apache.org/protocol.html#protocol_compatibility)。 +有关 Kafka 兼容性的更多细节,请参考 [Kafka 官方文档](https://kafka.apache.org/protocol.html#protocol_compatibility)。 {{< artifact flink-connector-kafka withScalaVersion >}} -Flink 目前的流连接器还不是二进制发行版的一部分。 -[在此处]({{< ref "docs/dev/datastream/project-configuration" >}})可以了解到如何链接它们,从而在集群中运行。 - - - -## Kafka Consumer +如果使用 Kafka source,```flink-connector-base``` 也需要包含在依赖中: -Flink 的 Kafka consumer 称为 `FlinkKafkaConsumer`。它提供对一个或多个 Kafka topics 的访问。 +{{< artifact flink-connector-base >}} -构造函数接受以下参数: +Flink 目前的流连接器还不是二进制发行版的一部分。 +[在此处]({{< ref "docs/dev/datastream/project-configuration" >}})可以了解到如何链接它们,从而在集群中运行。 -1. Topic 名称或者名称列表 -2. 用于反序列化 Kafka 数据的 DeserializationSchema 或者 KafkaDeserializationSchema -3. Kafka 消费者的属性。需要以下属性: - - "bootstrap.servers"(以逗号分隔的 Kafka broker 列表) - - "group.id" 消费组 ID +## Kafka Source +{{< hint info >}} +该文档描述的是基于[新数据源 API]({{< ref "docs/dev/datastream/sources.md" >}}) 的 Kafka Source。 +{{< /hint >}} -{{< tabs "fdf41307-604d-426f-9863-666250ce0cdc" >}} -{{< tab "Java" >}} +### 使用方法 +Kafka Source 提供了构建类来创建 ```KafkaSource``` 的实例。以下代码片段展示了如何构建 ```KafkaSource``` +来消费 “input-topic” 最早位点的数据, 使用消费组 “my-group”,并且将 Kafka 消息体反序列化为字符串: ```java -Properties properties = new Properties(); -properties.setProperty("bootstrap.servers", "localhost:9092"); -properties.setProperty("group.id", "test"); -DataStream stream = env -.addSource(new FlinkKafkaConsumer<>("topic", new SimpleStringSchema(), properties)); -``` -{{< /tab >}} -{{< tab "Scala" >}} -```scala -val properties = new Properties() -properties.setProperty("bootstrap.servers", "localhost:9092") -properties.setProperty("group.id", "test") -val stream = env -.addSource(new FlinkKafkaConsumer[String]("topic", new SimpleStringSchema(), properties)) -``` -{{< /tab >}} -{{< /tabs >}} - - - -### `DeserializationSchema` - -Flink Kafka Consumer 需要知道如何将 Kafka 中的二进制数据转换为 Java 或者 Scala 对象。`KafkaDeserializationSchema` 允许用户指定这样的 schema,每条 Kafka 中的消息会调用 `T deserialize(ConsumerRecord record)` 反序列化。 - -为了方便使用,Flink 提供了以下几种 schemas: - -1. `TypeInformationSerializationSchema`(和 `TypeInformationKeyValueSerializationSchema`) 基于 Flink 的 `TypeInformation` 创建 `schema`。 -如果该数据的读和写都发生在 Flink 中,那么这将是非常有用的。此 schema 是其他通用序列化方法的高性能 Flink 替代方案。 - -2. `JsonDeserializationSchema`(和 `JSONKeyValueDeserializationSchema`)将序列化的 JSON 转化为 ObjectNode 对象,可以使用 `objectNode.get("field").as(Int/String/...)()` 来访问某个字段。 -KeyValue objectNode 包含一个含所有字段的 key 和 values 字段,以及一个可选的"metadata"字段,可以访问到消息的 offset、partition、topic 等信息。 - -3. `GlueSchemaRegistryJsonDeserializationSchema` 可以在[AWS Glue Schema Registry](https://docs.aws.amazon.com/glue/latest/dg/schema-registry.html) -查找编写器的 schema(用于编写记录的 schema)。使用这些反序列化 schema 记录将读取从 AWS Glue Schema Registry 检索到的 schema 转换为代表通用记录的`com.amazonaws.services.schemaregistry.serializers.json.JsonDataWithSchema` - 或者由[mbknor-jackson-jsonSchema](https://github.com/mbknor/mbknor-jackson-jsonSchema)生成的 Java POJO. - -要使用此反序列化 schema 必须添加以下依赖: - -{{< tabs "8c6721c7-4a48-496e-
[flink] branch release-1.14 updated: [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.14 by this push: new b0a593e [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name b0a593e is described below commit b0a593eaa1a039df751e36ad8fbffd61e6431ddf Author: Qingsheng Ren AuthorDate: Sun Sep 26 15:55:36 2021 +0800 [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name --- .../apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java index 536577b..75a2c27 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java @@ -442,7 +442,9 @@ public class StreamingJobGraphGenerator { createChainedPreferredResources(currentNodeId, chainableOutputs)); OperatorID currentOperatorId = -chainInfo.addNodeToChain(currentNodeId, chainedNames.get(currentNodeId)); +chainInfo.addNodeToChain( +currentNodeId, + streamGraph.getStreamNode(currentNodeId).getOperatorName()); if (currentNode.getInputFormat() != null) { getOrCreateFormatContainer(startNodeId)
[flink] branch master updated: [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new d05c385 [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name d05c385 is described below commit d05c38577df04e937ff80dfbc486c60f34e8e108 Author: Qingsheng Ren AuthorDate: Sun Sep 26 15:55:36 2021 +0800 [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name --- .../apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java index 536577b..75a2c27 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java @@ -442,7 +442,9 @@ public class StreamingJobGraphGenerator { createChainedPreferredResources(currentNodeId, chainableOutputs)); OperatorID currentOperatorId = -chainInfo.addNodeToChain(currentNodeId, chainedNames.get(currentNodeId)); +chainInfo.addNodeToChain( +currentNodeId, + streamGraph.getStreamNode(currentNodeId).getOperatorName()); if (currentNode.getInputFormat() != null) { getOrCreateFormatContainer(startNodeId)
[flink-ml] branch master updated: [FLINK-22915][FLIP-173] Update Flink ML API to support AlgoOperator with multiple input tables and multiple output tables
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git The following commit(s) were added to refs/heads/master by this push: new 5ff346e [FLINK-22915][FLIP-173] Update Flink ML API to support AlgoOperator with multiple input tables and multiple output tables 5ff346e is described below commit 5ff346ea1a508a00b89759492f09e7330e69baef Author: Dong Lin AuthorDate: Wed Sep 22 13:47:39 2021 +0800 [FLINK-22915][FLIP-173] Update Flink ML API to support AlgoOperator with multiple input tables and multiple output tables --- .../core/{PipelineStage.java => AlgoOperator.java} | 35 ++- .../org/apache/flink/ml/api/core/Estimator.java| 24 +- .../java/org/apache/flink/ml/api/core/Model.java | 34 ++- .../org/apache/flink/ml/api/core/Pipeline.java | 257 + .../apache/flink/ml/api/core/PipelineModel.java| 83 +++ .../java/org/apache/flink/ml/api/core/Stage.java | 44 .../org/apache/flink/ml/api/core/Transformer.java | 22 +- .../org/apache/flink/ml/api/core/PipelineTest.java | 69 +++--- 8 files changed, 269 insertions(+), 299 deletions(-) diff --git a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineStage.java b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/AlgoOperator.java similarity index 50% rename from flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineStage.java rename to flink-ml-api/src/main/java/org/apache/flink/ml/api/core/AlgoOperator.java index 0a3dd23..7f2d4b4 100644 --- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineStage.java +++ b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/AlgoOperator.java @@ -18,29 +18,22 @@ package org.apache.flink.ml.api.core; -import org.apache.flink.ml.api.misc.param.WithParams; - -import java.io.Serializable; +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.table.api.Table; /** - * Base class for a stage in a pipeline. The interface is only a concept, and does not have any - * actual functionality. Its subclasses must be either Estimator or Transformer. No other classes - * should inherit this interface directly. - * - * Each pipeline stage is with parameters, and requires a public empty constructor for - * restoration in Pipeline. + * An AlgoOperator takes a list of tables as inputs and produces a list of tables as results. It can + * be used to encode generic multi-input multi-output computation logic. * - * @param The class type of the PipelineStage implementation itself, used by {@link - * org.apache.flink.ml.api.misc.param.WithParams} - * @see WithParams + * @param The class type of the AlgoOperator implementation itself. */ -interface PipelineStage> extends WithParams, Serializable { - -default String toJson() { -return getParams().toJson(); -} - -default void loadJson(String json) { -getParams().loadJson(json); -} +@PublicEvolving +public interface AlgoOperator> extends Stage { +/** + * Applies the AlgoOperator on the given input tables and returns the result tables. + * + * @param inputs a list of tables + * @return a list of tables + */ +Table[] transform(Table... inputs); } diff --git a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java index 24c8349..bab9c7d 100644 --- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java +++ b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java @@ -20,28 +20,20 @@ package org.apache.flink.ml.api.core; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.table.api.Table; -import org.apache.flink.table.api.TableEnvironment; /** - * Estimators are {@link PipelineStage}s responsible for training and generating machine learning - * models. + * Estimators are responsible for training and generating Models. * - * The implementations are expected to take an input table as training samples and generate a - * {@link Model} which fits these samples. - * - * @param class type of the Estimator implementation itself, used by {@link - * org.apache.flink.ml.api.misc.param.WithParams}. - * @param class type of the {@link Model} this Estimator produces. + * @param class type of the Estimator implementation itself. + * @param class type of the Model this Estimator produces. */ @PublicEvolving -public interface Estimator, M extends Model> extends PipelineStage { - +public interface Estimator, M extends Model> extends Stage { /** - * Train and produce a {@link Model} which fits the records in the given {@link Table}. + * Trains on the given inputs and produces a Model. * - * @param tEnv the table environment to which the input table is bound. - * @param input the table
[flink] branch release-1.12 updated (b842230 -> 7249e0d)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.12 in repository https://gitbox.apache.org/repos/asf/flink.git. from b842230 [FLINK-23949][runtime][checkpoint] fix first incremental checkpoint after a savepoint will degenerate into a full checkpoint new bf3df16 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified new 7249e0d [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../flink/connector/kafka/source/KafkaSource.java | 6 + .../connector/kafka/source/KafkaSourceBuilder.java | 43 +- .../connector/kafka/source/KafkaSourceOptions.java | 6 + .../initializer/OffsetsInitializerValidator.java | 19 ++- .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 ++- .../source/reader/KafkaPartitionSplitReader.java | 24 ++-- .../kafka/source/reader/KafkaSourceReader.java | 17 +++ .../kafka/source/KafkaSourceBuilderTest.java | 156 - .../connector/kafka/source/KafkaSourceITCase.java | 19 +++ .../kafka/source/reader/KafkaSourceReaderTest.java | 36 - 11 files changed, 332 insertions(+), 33 deletions(-) copy flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java => flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java (61%)
[flink] 01/02: [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.12 in repository https://gitbox.apache.org/repos/asf/flink.git commit bf3df16e3cc9a3bede3f5dabc8d08c9369e02485 Author: Qingsheng Ren AuthorDate: Thu Sep 16 15:20:08 2021 +0800 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified --- .../flink/connector/kafka/source/KafkaSource.java | 6 ++ .../connector/kafka/source/KafkaSourceBuilder.java | 35 ++- .../connector/kafka/source/KafkaSourceOptions.java | 6 ++ .../source/reader/KafkaPartitionSplitReader.java | 24 ++--- .../kafka/source/reader/KafkaSourceReader.java | 17 .../kafka/source/KafkaSourceBuilderTest.java | 103 - .../connector/kafka/source/KafkaSourceITCase.java | 19 .../kafka/source/reader/KafkaSourceReaderTest.java | 36 ++- 8 files changed, 225 insertions(+), 21 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java index ea7ad6c..477a4d6 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java @@ -18,6 +18,7 @@ package org.apache.flink.connector.kafka.source; +import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.connector.source.Boundedness; import org.apache.flink.api.connector.source.Source; @@ -178,4 +179,9 @@ public class KafkaSource props.stringPropertyNames().forEach(key -> config.setString(key, props.getProperty(key))); return config; } + +@VisibleForTesting +Configuration getConfiguration() { +return toConfiguration(props); +} } diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index 0099df3..ddcf2c7 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -40,6 +40,7 @@ import java.util.Set; import java.util.regex.Pattern; import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; /** * The @builder class for {@link KafkaSource} to make it easier for the users to construct a {@link @@ -412,8 +413,12 @@ public class KafkaSourceBuilder { ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName(), true); -maybeOverride( -ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new Random().nextLong(), false); +if (!props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { +LOG.warn( +"Offset commit on checkpoint is disabled because {} is not specified", +ConsumerConfig.GROUP_ID_CONFIG); + maybeOverride(KafkaSourceOptions.COMMIT_OFFSETS_ON_CHECKPOINT.key(), "false", false); +} maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", false); maybeOverride( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, @@ -426,10 +431,13 @@ public class KafkaSourceBuilder { "-1", boundedness == Boundedness.BOUNDED); -// If the client id prefix is not set, reuse the consumer group id as the client id prefix. +// If the client id prefix is not set, reuse the consumer group id as the client id prefix, +// or generate a random string if consumer group id is not specified. maybeOverride( KafkaSourceOptions.CLIENT_ID_PREFIX.key(), -props.getProperty(ConsumerConfig.GROUP_ID_CONFIG), +props.containsKey(ConsumerConfig.GROUP_ID_CONFIG) +? props.getProperty(ConsumerConfig.GROUP_ID_CONFIG) +: "KafkaSource-" + new Random().nextLong(), false); } @@ -464,5 +472,24 @@ public class KafkaSourceBuilder { "No subscribe mode is specified, " + "should be one of topics, topic pattern and partition set."); checkNotNull(deserializationSchema, "Deserialization schema is required but not provided.")
[flink] 02/02: [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.12 in repository https://gitbox.apache.org/repos/asf/flink.git commit 7249e0d439206595de24c25c654ff204f4a2fde8 Author: Qingsheng Ren AuthorDate: Tue Sep 14 18:17:56 2021 +0800 [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder (cherry picked from commit 2da73edba95685537040305f30ee9d6dfd8d6c02) --- .../connector/kafka/source/KafkaSourceBuilder.java | 8 .../initializer/OffsetsInitializerValidator.java | 39 .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 - .../kafka/source/KafkaSourceBuilderTest.java | 53 ++ 5 files changed, 137 insertions(+), 2 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index ddcf2c7..f3ab1cd 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -21,6 +21,7 @@ package org.apache.flink.connector.kafka.source; import org.apache.flink.api.connector.source.Boundedness; import org.apache.flink.connector.kafka.source.enumerator.initializer.NoStoppingOffsetsInitializer; import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; +import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializerValidator; import org.apache.flink.connector.kafka.source.enumerator.subscriber.KafkaSubscriber; import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializer; @@ -478,6 +479,13 @@ public class KafkaSourceBuilder { String.format( "Property %s is required when offset commit is enabled", ConsumerConfig.GROUP_ID_CONFIG)); +// Check offsets initializers +if (startingOffsetsInitializer instanceof OffsetsInitializerValidator) { +((OffsetsInitializerValidator) startingOffsetsInitializer).validate(props); +} +if (stoppingOffsetsInitializer instanceof OffsetsInitializerValidator) { +((OffsetsInitializerValidator) stoppingOffsetsInitializer).validate(props); +} } private boolean offsetCommitEnabledManually() { diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java new file mode 100644 index 000..c198107 --- /dev/null +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.kafka.source.enumerator.initializer; + +import org.apache.flink.annotation.Internal; + +import java.util.Properties; + +/** + * Interface for validating {@link OffsetsInitializer} with properties from {@link + * org.apache.flink.connector.kafka.source.KafkaSource}. + */ +@Internal +public interface OffsetsInitializerValidator { + +/** + * Validate offsets initializer with properties of Kafka source. + * + * @param kafkaSourceProperties Properties of Kafka source + * @throws IllegalStateException if validation fails + */ +void validate(Properties kafkaSourceProperties) throws IllegalStateException; +} diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/ReaderHandledOffsetsInitializer.java b/flink-connec
[flink] 02/02: [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit c7c34ed414dcd069bde0d8de36ff049d39f2a618 Author: Qingsheng Ren AuthorDate: Tue Sep 14 18:17:56 2021 +0800 [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder (cherry picked from commit 2da73edba95685537040305f30ee9d6dfd8d6c02) --- .../connector/kafka/source/KafkaSourceBuilder.java | 8 .../initializer/OffsetsInitializerValidator.java | 39 .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 - .../kafka/source/KafkaSourceBuilderTest.java | 53 ++ 5 files changed, 137 insertions(+), 2 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index eb93683..d105cd8 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -22,6 +22,7 @@ import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.api.connector.source.Boundedness; import org.apache.flink.connector.kafka.source.enumerator.initializer.NoStoppingOffsetsInitializer; import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; +import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializerValidator; import org.apache.flink.connector.kafka.source.enumerator.subscriber.KafkaSubscriber; import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema; @@ -495,6 +496,13 @@ public class KafkaSourceBuilder { String.format( "Property %s is required when offset commit is enabled", ConsumerConfig.GROUP_ID_CONFIG)); +// Check offsets initializers +if (startingOffsetsInitializer instanceof OffsetsInitializerValidator) { +((OffsetsInitializerValidator) startingOffsetsInitializer).validate(props); +} +if (stoppingOffsetsInitializer instanceof OffsetsInitializerValidator) { +((OffsetsInitializerValidator) stoppingOffsetsInitializer).validate(props); +} } private boolean offsetCommitEnabledManually() { diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java new file mode 100644 index 000..c198107 --- /dev/null +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.kafka.source.enumerator.initializer; + +import org.apache.flink.annotation.Internal; + +import java.util.Properties; + +/** + * Interface for validating {@link OffsetsInitializer} with properties from {@link + * org.apache.flink.connector.kafka.source.KafkaSource}. + */ +@Internal +public interface OffsetsInitializerValidator { + +/** + * Validate offsets initializer with properties of Kafka source. + * + * @param kafkaSourceProperties Properties of Kafka source + * @throws IllegalStateException if validation fails + */ +void validate(Properties kafkaSourceProperties) throws IllegalStateException; +} diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/ReaderHandledOff
[flink] branch release-1.13 updated (c9995a9 -> c7c34ed)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git. from c9995a9 [FLINK-24317][python][tests] Optimize the implementation of Top2 in test_flat_aggregate new f0bd873 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified new c7c34ed [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: docs/content/docs/connectors/datastream/kafka.md | 1 + .../flink/connector/kafka/source/KafkaSource.java | 5 + .../connector/kafka/source/KafkaSourceBuilder.java | 43 +- .../connector/kafka/source/KafkaSourceOptions.java | 6 + .../initializer/OffsetsInitializerValidator.java | 19 ++- .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 ++- .../source/reader/KafkaPartitionSplitReader.java | 24 ++-- .../kafka/source/reader/KafkaSourceReader.java | 17 +++ .../kafka/source/KafkaSourceBuilderTest.java | 156 - .../connector/kafka/source/KafkaSourceITCase.java | 19 +++ .../kafka/source/reader/KafkaSourceReaderTest.java | 44 +- 12 files changed, 340 insertions(+), 33 deletions(-) copy flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java => flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java (61%)
[flink] 01/02: [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit f0bd873309c3b0a13edb5354912ffdb1169de5b4 Author: Qingsheng Ren AuthorDate: Tue Sep 14 15:22:00 2021 +0800 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified (cherry picked from commit ca8bff231aed2412f579d0a4e446e9a6bee42581) --- docs/content/docs/connectors/datastream/kafka.md | 1 + .../flink/connector/kafka/source/KafkaSource.java | 5 + .../connector/kafka/source/KafkaSourceBuilder.java | 35 ++- .../connector/kafka/source/KafkaSourceOptions.java | 6 ++ .../source/reader/KafkaPartitionSplitReader.java | 24 ++--- .../kafka/source/reader/KafkaSourceReader.java | 17 .../kafka/source/KafkaSourceBuilderTest.java | 103 - .../connector/kafka/source/KafkaSourceITCase.java | 19 .../kafka/source/reader/KafkaSourceReaderTest.java | 44 - 9 files changed, 233 insertions(+), 21 deletions(-) diff --git a/docs/content/docs/connectors/datastream/kafka.md b/docs/content/docs/connectors/datastream/kafka.md index 8fcd023..cab25dc 100644 --- a/docs/content/docs/connectors/datastream/kafka.md +++ b/docs/content/docs/connectors/datastream/kafka.md @@ -154,6 +154,7 @@ KafkaSource has following options for configuration: below for more details. - ```register.consumer.metrics``` specifies whether to register metrics of KafkaConsumer in Flink metric group +- ```commit.offsets.on.checkpoint``` specifies whether to commit consuming offsets to Kafka brokers on checkpoint For configurations of KafkaConsumer, you can refer to http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka documentation diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java index af0013b..d1219c0 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java @@ -214,4 +214,9 @@ public class KafkaSource props.stringPropertyNames().forEach(key -> config.setString(key, props.getProperty(key))); return config; } + +@VisibleForTesting +Configuration getConfiguration() { +return toConfiguration(props); +} } diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index cd286ed..eb93683 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.regex.Pattern; import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; /** * The @builder class for {@link KafkaSource} to make it easier for the users to construct a {@link @@ -429,8 +430,12 @@ public class KafkaSourceBuilder { ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName(), true); -maybeOverride( -ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new Random().nextLong(), false); +if (!props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { +LOG.warn( +"Offset commit on checkpoint is disabled because {} is not specified", +ConsumerConfig.GROUP_ID_CONFIG); + maybeOverride(KafkaSourceOptions.COMMIT_OFFSETS_ON_CHECKPOINT.key(), "false", false); +} maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", false); maybeOverride( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, @@ -443,10 +448,13 @@ public class KafkaSourceBuilder { "-1", boundedness == Boundedness.BOUNDED); -// If the client id prefix is not set, reuse the consumer group id as the client id prefix. +// If the client id prefix is not set, reuse the consumer group id as the client id prefix, +// or generate a random string if consumer group id is not specified. maybeOverride( KafkaSourceOptions.CLIENT_ID_PREFIX.key(), -props.getProperty(ConsumerConfig.GROUP_ID_CONFIG), +
[flink] 02/03: [FLINK-24277][connector/kafka] Remove auto-generated group id in Kafka table source
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git commit 984cfe9797562d578d54329e8660758d777d80ec Author: Qingsheng Ren AuthorDate: Tue Sep 14 15:24:10 2021 +0800 [FLINK-24277][connector/kafka] Remove auto-generated group id in Kafka table source --- .../connectors/kafka/table/KafkaDynamicSource.java | 16 -- .../kafka/source/KafkaSourceTestUtils.java | 6 .../kafka/table/KafkaDynamicTableFactoryTest.java | 34 +- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java index 8e12124..ab0fa13 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java @@ -49,12 +49,9 @@ import org.apache.flink.table.types.DataType; import org.apache.flink.table.types.utils.DataTypeUtils; import org.apache.flink.util.Preconditions; -import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.header.Header; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.annotation.Nullable; @@ -77,8 +74,6 @@ import java.util.stream.Stream; public class KafkaDynamicSource implements ScanTableSource, SupportsReadingMetadata, SupportsWatermarkPushDown { -private static final Logger LOG = LoggerFactory.getLogger(KafkaDynamicSource.class); - // // Mutable attributes // @@ -389,17 +384,6 @@ public class KafkaDynamicSource kafkaSourceBuilder.setTopicPattern(topicPattern); } -// For compatibility with legacy source that is not validating group id -if (!properties.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { -String generatedGroupId = "KafkaSource-" + tableIdentifier; -LOG.warn( -"Property \"{}\" is required for offset commit but not set in table options. " -+ "Assigning \"{}\" as consumer group id", -ConsumerConfig.GROUP_ID_CONFIG, -generatedGroupId); -kafkaSourceBuilder.setGroupId(generatedGroupId); -} - switch (startupMode) { case EARLIEST: kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.earliest()); diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java index fce9591..572b77d 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java @@ -19,6 +19,7 @@ package org.apache.flink.connector.kafka.source; import org.apache.flink.api.connector.source.SourceReaderContext; +import org.apache.flink.configuration.Configuration; import org.apache.flink.connector.kafka.source.reader.KafkaSourceReader; import java.util.Collection; @@ -44,4 +45,9 @@ public class KafkaSourceTestUtils { return ((KafkaSourceReader) kafkaSource.createReader(sourceReaderContext, splitFinishedHook)); } + +/** Get configuration of KafkaSource. */ +public static Configuration getKafkaSourceConfiguration(KafkaSource kafkaSource) { +return kafkaSource.getConfiguration(); +} } diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java index 01af4b0..a0cc3cf 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java @@ -22,9 +22,13 @@ import org.apache.flink.api.c
[flink] branch release-1.14 updated (791c1b9 -> cc19997)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git. from 791c1b9 [FLINK-24300] SourceOperator#getAvailableFuture reuses future new 677caa8 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified new 984cfe9 [FLINK-24277][connector/kafka] Remove auto-generated group id in Kafka table source new cc19997 [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: docs/content/docs/connectors/datastream/kafka.md | 1 + .../flink/connector/kafka/source/KafkaSource.java | 5 + .../connector/kafka/source/KafkaSourceBuilder.java | 43 ++- .../connector/kafka/source/KafkaSourceOptions.java | 6 + .../initializer/OffsetsInitializerValidator.java | 19 ++- .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 +++- .../source/reader/KafkaPartitionSplitReader.java | 24 ++-- .../kafka/source/reader/KafkaSourceReader.java | 17 +++ .../connectors/kafka/table/KafkaDynamicSource.java | 16 --- .../kafka/source/KafkaSourceBuilderTest.java | 140 - .../connector/kafka/source/KafkaSourceITCase.java | 21 .../kafka/source/KafkaSourceTestUtils.java | 6 + .../kafka/source/reader/KafkaSourceReaderTest.java | 44 ++- .../kafka/table/KafkaDynamicTableFactoryTest.java | 34 - 15 files changed, 365 insertions(+), 50 deletions(-) copy flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java => flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java (61%)
[flink] 01/03: [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git commit 677caa8d97161403b9b090fad8fe91d576db7069 Author: Qingsheng Ren AuthorDate: Tue Sep 14 15:22:00 2021 +0800 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified --- docs/content/docs/connectors/datastream/kafka.md | 1 + .../flink/connector/kafka/source/KafkaSource.java | 5 ++ .../connector/kafka/source/KafkaSourceBuilder.java | 35 - .../connector/kafka/source/KafkaSourceOptions.java | 6 ++ .../source/reader/KafkaPartitionSplitReader.java | 24 +++--- .../kafka/source/reader/KafkaSourceReader.java | 17 + .../kafka/source/KafkaSourceBuilderTest.java | 87 +- .../connector/kafka/source/KafkaSourceITCase.java | 21 ++ .../kafka/source/reader/KafkaSourceReaderTest.java | 44 ++- 9 files changed, 219 insertions(+), 21 deletions(-) diff --git a/docs/content/docs/connectors/datastream/kafka.md b/docs/content/docs/connectors/datastream/kafka.md index a94d7bd..b614c10 100644 --- a/docs/content/docs/connectors/datastream/kafka.md +++ b/docs/content/docs/connectors/datastream/kafka.md @@ -154,6 +154,7 @@ KafkaSource has following options for configuration: below for more details. - ```register.consumer.metrics``` specifies whether to register metrics of KafkaConsumer in Flink metric group +- ```commit.offsets.on.checkpoint``` specifies whether to commit consuming offsets to Kafka brokers on checkpoint For configurations of KafkaConsumer, you can refer to http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka documentation diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java index a5d89b9..9a05089 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java @@ -214,4 +214,9 @@ public class KafkaSource props.stringPropertyNames().forEach(key -> config.setString(key, props.getProperty(key))); return config; } + +@VisibleForTesting +Configuration getConfiguration() { +return toConfiguration(props); +} } diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index cd286ed..eb93683 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.regex.Pattern; import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; /** * The @builder class for {@link KafkaSource} to make it easier for the users to construct a {@link @@ -429,8 +430,12 @@ public class KafkaSourceBuilder { ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName(), true); -maybeOverride( -ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new Random().nextLong(), false); +if (!props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) { +LOG.warn( +"Offset commit on checkpoint is disabled because {} is not specified", +ConsumerConfig.GROUP_ID_CONFIG); + maybeOverride(KafkaSourceOptions.COMMIT_OFFSETS_ON_CHECKPOINT.key(), "false", false); +} maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", false); maybeOverride( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, @@ -443,10 +448,13 @@ public class KafkaSourceBuilder { "-1", boundedness == Boundedness.BOUNDED); -// If the client id prefix is not set, reuse the consumer group id as the client id prefix. +// If the client id prefix is not set, reuse the consumer group id as the client id prefix, +// or generate a random string if consumer group id is not specified. maybeOverride( KafkaSourceOptions.CLIENT_ID_PREFIX.key(), -props.getProperty(ConsumerConfig.GROUP_ID_CONFIG), +props.containsKey(ConsumerConfig.GROUP_ID_CONFIG) +
[flink] 03/03: [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git commit cc19997d6124e0b4f8c905601a3c98b328014f1d Author: Qingsheng Ren AuthorDate: Tue Sep 14 18:17:56 2021 +0800 [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder --- .../connector/kafka/source/KafkaSourceBuilder.java | 8 .../initializer/OffsetsInitializerValidator.java | 39 +++ .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 - .../kafka/source/KafkaSourceBuilderTest.java | 55 +- 5 files changed, 138 insertions(+), 3 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index eb93683..d105cd8 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -22,6 +22,7 @@ import org.apache.flink.api.common.serialization.DeserializationSchema; import org.apache.flink.api.connector.source.Boundedness; import org.apache.flink.connector.kafka.source.enumerator.initializer.NoStoppingOffsetsInitializer; import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; +import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializerValidator; import org.apache.flink.connector.kafka.source.enumerator.subscriber.KafkaSubscriber; import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema; @@ -495,6 +496,13 @@ public class KafkaSourceBuilder { String.format( "Property %s is required when offset commit is enabled", ConsumerConfig.GROUP_ID_CONFIG)); +// Check offsets initializers +if (startingOffsetsInitializer instanceof OffsetsInitializerValidator) { +((OffsetsInitializerValidator) startingOffsetsInitializer).validate(props); +} +if (stoppingOffsetsInitializer instanceof OffsetsInitializerValidator) { +((OffsetsInitializerValidator) stoppingOffsetsInitializer).validate(props); +} } private boolean offsetCommitEnabledManually() { diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java new file mode 100644 index 000..c198107 --- /dev/null +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.kafka.source.enumerator.initializer; + +import org.apache.flink.annotation.Internal; + +import java.util.Properties; + +/** + * Interface for validating {@link OffsetsInitializer} with properties from {@link + * org.apache.flink.connector.kafka.source.KafkaSource}. + */ +@Internal +public interface OffsetsInitializerValidator { + +/** + * Validate offsets initializer with properties of Kafka source. + * + * @param kafkaSourceProperties Properties of Kafka source + * @throws IllegalStateException if validation fails + */ +void validate(Properties kafkaSourceProperties) throws IllegalStateException; +} diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/ReaderHandledOffsetsInitializer.java b/flink-connectors/flink-connector-kafka/src/main/java/or
[flink] branch master updated (6f07196 -> 2da73ed)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 6f07196 [FLINK-24217][docs-zh] Translate "LOAD Statements" page of "SQL" into Chinese (#17221) add ca8bff2 [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified add f3ef860 [FLINK-24277][connector/kafka] Remove auto-generated group id in Kafka table source add 2da73ed [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder No new revisions were added by this update. Summary of changes: docs/content/docs/connectors/datastream/kafka.md | 1 + .../flink/connector/kafka/source/KafkaSource.java | 5 + .../connector/kafka/source/KafkaSourceBuilder.java | 43 ++- .../connector/kafka/source/KafkaSourceOptions.java | 6 + .../initializer/OffsetsInitializerValidator.java | 19 ++- .../ReaderHandledOffsetsInitializer.java | 17 ++- .../initializer/SpecifiedOffsetsInitializer.java | 22 +++- .../source/reader/KafkaPartitionSplitReader.java | 24 ++-- .../kafka/source/reader/KafkaSourceReader.java | 17 +++ .../connectors/kafka/table/KafkaDynamicSource.java | 16 --- .../kafka/source/KafkaSourceBuilderTest.java | 140 - .../connector/kafka/source/KafkaSourceITCase.java | 21 .../kafka/source/KafkaSourceTestUtils.java | 6 + .../kafka/source/reader/KafkaSourceReaderTest.java | 44 ++- .../kafka/table/KafkaDynamicTableFactoryTest.java | 34 - 15 files changed, 365 insertions(+), 50 deletions(-) copy flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java => flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java (61%)
[flink] branch release-1.14 updated: [FLINK-24059][Connectors/Common][test] Allow SourceReaderTestBase.NUM_SPLITS to be overridden (#17064)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.14 by this push: new 8d148a8 [FLINK-24059][Connectors/Common][test] Allow SourceReaderTestBase.NUM_SPLITS to be overridden (#17064) 8d148a8 is described below commit 8d148a8b7832fcefefa4818de8e700562f0ffd26 Author: Brian Zhou AuthorDate: Thu Sep 9 11:33:41 2021 +0800 [FLINK-24059][Connectors/Common][test] Allow SourceReaderTestBase.NUM_SPLITS to be overridden (#17064) --- .../kafka/source/reader/KafkaSourceReaderTest.java | 15 +--- .../source/reader/SourceReaderTestBase.java| 43 +- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java index 58b3fdd..f4e3fbd 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java @@ -76,6 +76,7 @@ import static org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderM import static org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP; import static org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderMetrics.PARTITION_GROUP; import static org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderMetrics.TOPIC_GROUP; +import static org.apache.flink.connector.kafka.source.testutils.KafkaSourceTestEnv.NUM_PARTITIONS; import static org.apache.flink.core.testutils.CommonTestUtils.waitUtil; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -89,7 +90,7 @@ public class KafkaSourceReaderTest extends SourceReaderTestBase) createReader(Boundedness.CONTINUOUS_UNBOUNDED, groupId)) { reader.addSplits( -getSplits(NUM_SPLITS, NUM_RECORDS_PER_SPLIT, Boundedness.CONTINUOUS_UNBOUNDED)); +getSplits(numSplits, NUM_RECORDS_PER_SPLIT, Boundedness.CONTINUOUS_UNBOUNDED)); ValidatingSourceOutput output = new ValidatingSourceOutput(); long checkpointId = 0; do { @@ -204,7 +209,7 @@ public class KafkaSourceReaderTest extends SourceReaderTestBase assertEquals(NUM_RECORDS_PER_SPLIT, offsetAndMetadata.offset())); @@ -480,7 +485,7 @@ public class KafkaSourceReaderTest extends SourceReaderTestBase> getRecords() { List> records = new ArrayList<>(); -for (int part = 0; part < NUM_SPLITS; part++) { +for (int part = 0; part < NUM_PARTITIONS; part++) { for (int i = 0; i < NUM_RECORDS_PER_SPLIT; i++) { records.add( new ProducerRecord<>( diff --git a/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java b/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java index c109aae..462c5b2 100644 --- a/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java +++ b/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java @@ -49,9 +49,18 @@ import static org.junit.Assert.assertFalse; */ public abstract class SourceReaderTestBase extends TestLogger { -protected static final int NUM_SPLITS = 10; +protected final int numSplits; +protected final int totalNumRecords; protected static final int NUM_RECORDS_PER_SPLIT = 10; -protected static final int TOTAL_NUM_RECORDS = NUM_RECORDS_PER_SPLIT * NUM_SPLITS; + +public SourceReaderTestBase() { +this.numSplits = getNumSplits(); +this.totalNumRecords = this.numSplits * NUM_RECORDS_PER_SPLIT; +} + +protected int getNumSplits() { +return 10; +} @Rule public ExpectedException expectedException = ExpectedException.none(); @@ -68,9 +77,9 @@ public abstract class SourceReaderTestBase extends T @Test public void testRead() throws Exception { try (SourceReader reader = createReader()) { -reader.addSplits(getSplits(NUM_SPLITS, NUM_RECORDS_PER_SPLIT, Boundedness.BOUNDED)); +reader.addSplits(getSplits(numSplits, NUM_RECORDS_PER_SPLIT, Boundedness.BOUNDED)); Validati
[flink] branch release-1.14 updated (da82cb1 -> b26f7e7)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git. from da82cb1 [FLINK-24155][documentation] Sync Chinese version of documentation to configure CheckpointFailureManager (#17173) (#17252) add 52a26c8 [hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for fine-grained testing add b26f7e7 [FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup states in SplitFetcher No new revisions were added by this update. Summary of changes: .../SingleThreadMultiplexSourceReaderBase.java | 3 +- .../reader/fetcher/SingleThreadFetcherManager.java | 21 +++ .../base/source/reader/fetcher/SplitFetcher.java | 6 +- .../source/reader/fetcher/SplitFetcherManager.java | 26 - .../source/reader/fetcher/SplitFetcherTest.java| 6 +- .../flink/connector/kafka/source/KafkaSource.java | 14 - .../source/reader/KafkaPartitionSplitReader.java | 31 +-- .../kafka/source/reader/KafkaSourceReader.java | 10 +--- .../reader/fetcher/KafkaSourceFetcherManager.java | 8 ++- .../kafka/source/KafkaSourceTestUtils.java | 47 .../reader/KafkaPartitionSplitReaderTest.java | 33 +++ .../kafka/source/reader/KafkaSourceReaderTest.java | 64 +++--- 12 files changed, 237 insertions(+), 32 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
[flink] 02/02: [FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup states in SplitFetcher
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit 763ac52092ba70dfef989d18b711400b437e6e09 Author: Qingsheng Ren AuthorDate: Mon Aug 23 11:26:44 2021 +0800 [FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup states in SplitFetcher (cherry picked from commit fe17ca6042c570ce603bf4308775f61db1d515c9) --- .../source/reader/KafkaPartitionSplitReader.java | 31 ++--- .../reader/KafkaPartitionSplitReaderTest.java | 34 +++ .../kafka/source/reader/KafkaSourceReaderTest.java | 39 ++ 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java index e5e1425..e13e10e 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java @@ -56,6 +56,7 @@ import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.StringJoiner; +import java.util.stream.Collectors; /** * A {@link SplitReader} implementation that reads records from Kafka partitions. @@ -77,6 +78,9 @@ public class KafkaPartitionSplitReader private final int subtaskId; private final KafkaSourceReaderMetrics kafkaSourceReaderMetrics; +// Tracking empty splits that has not been added to finished splits in fetch() +private final Set emptySplits = new HashSet<>(); + public KafkaPartitionSplitReader( Properties props, KafkaRecordDeserializationSchema deserializationSchema, @@ -174,6 +178,14 @@ public class KafkaPartitionSplitReader tp, recordsFromPartition.get(recordsFromPartition.size() - 1).offset()); } } + +// Some splits are discovered as empty when handling split additions. These splits should be +// added to finished splits to clean up states in split fetcher and source reader. +if (!emptySplits.isEmpty()) { +recordsBySplits.finishedSplits.addAll(emptySplits); +emptySplits.clear(); +} + // Unassign the partitions that has finished. if (!finishedPartitions.isEmpty()) { unassignPartitions(finishedPartitions); @@ -342,15 +354,24 @@ public class KafkaPartitionSplitReader } private void removeEmptySplits() { -List emptySplits = new ArrayList<>(); +List emptyPartitions = new ArrayList<>(); // If none of the partitions have any records, for (TopicPartition tp : consumer.assignment()) { if (consumer.position(tp) >= getStoppingOffset(tp)) { -emptySplits.add(tp); +emptyPartitions.add(tp); } } -if (!emptySplits.isEmpty()) { -unassignPartitions(emptySplits); +if (!emptyPartitions.isEmpty()) { +LOG.debug( +"These assigning splits are empty and will be marked as finished in later fetch: {}", +emptyPartitions); +// Add empty partitions to empty split set for later cleanup in fetch() +emptySplits.addAll( +emptyPartitions.stream() +.map(KafkaPartitionSplit::toSplitId) +.collect(Collectors.toSet())); +// Un-assign partitions from Kafka consumer +unassignPartitions(emptyPartitions); } } @@ -366,7 +387,7 @@ public class KafkaPartitionSplitReader "[%s, start:%d, stop: %d]", split.getTopicPartition(), startingOffset, stoppingOffset)); } -LOG.debug("SplitsChange handling result: {}", splitsInfo.toString()); +LOG.debug("SplitsChange handling result: {}", splitsInfo); } } diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReaderTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReaderTest.java index 2a477dd..d64225f 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReaderTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reade
[flink] branch release-1.13 updated (1af7731 -> 763ac52)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git. from 1af7731 [FLINK-22971][tests] Bump testcontainers to 1.16.0 new fdc2fb2 [hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for fine-grained testing new 763ac52 [FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup states in SplitFetcher The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../SingleThreadMultiplexSourceReaderBase.java | 3 +- .../reader/fetcher/SingleThreadFetcherManager.java | 21 +++ .../base/source/reader/fetcher/SplitFetcher.java | 6 +- .../source/reader/fetcher/SplitFetcherManager.java | 26 - .../source/reader/fetcher/SplitFetcherTest.java| 6 +- .../flink/connector/kafka/source/KafkaSource.java | 14 - .../source/reader/KafkaPartitionSplitReader.java | 31 -- .../kafka/source/reader/KafkaSourceReader.java | 10 +--- .../reader/fetcher/KafkaSourceFetcherManager.java | 8 ++- .../kafka/source/KafkaSourceTestUtils.java | 47 +++ .../reader/KafkaPartitionSplitReaderTest.java | 34 +++ .../kafka/source/reader/KafkaSourceReaderTest.java | 66 +++--- 12 files changed, 240 insertions(+), 32 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
[flink] 01/02: [hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for fine-grained testing
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit fdc2fb26622254b57f37fcfd405db2cbe0c71213 Author: Qingsheng Ren AuthorDate: Mon Aug 23 12:13:32 2021 +0800 [hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for fine-grained testing (cherry picked from commit 754b744c2a3d4a15c2197a2df4f92c19bd3e33b7) --- .../SingleThreadMultiplexSourceReaderBase.java | 3 +- .../reader/fetcher/SingleThreadFetcherManager.java | 21 ++ .../base/source/reader/fetcher/SplitFetcher.java | 6 ++- .../source/reader/fetcher/SplitFetcherManager.java | 26 +++- .../source/reader/fetcher/SplitFetcherTest.java| 6 ++- .../flink/connector/kafka/source/KafkaSource.java | 14 ++- .../kafka/source/reader/KafkaSourceReader.java | 10 + .../reader/fetcher/KafkaSourceFetcherManager.java | 8 +++- .../kafka/source/KafkaSourceTestUtils.java | 47 ++ .../kafka/source/reader/KafkaSourceReaderTest.java | 27 - 10 files changed, 141 insertions(+), 27 deletions(-) diff --git a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java index 377a72d..e3b8d43 100644 --- a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java +++ b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java @@ -26,7 +26,6 @@ import org.apache.flink.connector.base.source.reader.fetcher.SingleThreadFetcher import org.apache.flink.connector.base.source.reader.splitreader.SplitReader; import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue; -import java.util.Collection; import java.util.function.Supplier; /** @@ -47,7 +46,7 @@ import java.util.function.Supplier; * The class must override the methods to convert back and forth between the immutable splits * ({@code SplitT}) and the mutable split state representation ({@code SplitStateT}). * Finally, the reader must decide what to do when it starts ({@link #start()}) or when a - * split is finished ({@link #onSplitFinished(Collection)}). + * split is finished ({@link #onSplitFinished(java.util.Map)}). * * * @param The type of the records (the raw type that typically contains checkpointing diff --git a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java index 91d0d4d..2abed2e 100644 --- a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java +++ b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java @@ -18,13 +18,16 @@ package org.apache.flink.connector.base.source.reader.fetcher; +import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.connector.source.SourceSplit; import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds; import org.apache.flink.connector.base.source.reader.SourceReaderBase; import org.apache.flink.connector.base.source.reader.splitreader.SplitReader; import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue; +import java.util.Collection; import java.util.List; +import java.util.function.Consumer; import java.util.function.Supplier; /** @@ -54,6 +57,24 @@ public class SingleThreadFetcherManager super(elementsQueue, splitReaderSupplier); } +/** + * Creates a new SplitFetcherManager with a single I/O threads. + * + * @param elementsQueue The queue that is used to hand over data from the I/O thread (the + * fetchers) to the reader (which emits the records and book-keeps the state. This must be + * the same queue instance that is also passed to the {@link SourceReaderBase}. + * @param splitReaderSupplier The factory for the split reader that connects to the source + * system. + * @param splitFinishedHook Hook for handling finished splits in split fetchers + */ +@VisibleForTesting +public SingleThreadFetcherManager( +FutureCompletingBlockingQueue> elementsQueue, +Supplier> splitReaderSupplier, +Consumer> splitFinishedHook) { +super(elementsQueue, splitReade
[flink] branch master updated (125cb70 -> d4c483f)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 125cb70 [FLINK-24196][docs-zh] Translate "EXPLAIN Statements" page of "SQL" into Chinese (#17195) add d4c483f [FLINK-24059][Connectors/Common][test] Allow SourceReaderTestBase.NUM_SPLITS to be overridden (#17064) No new revisions were added by this update. Summary of changes: .../kafka/source/reader/KafkaSourceReaderTest.java | 15 +--- .../source/reader/SourceReaderTestBase.java| 43 +- 2 files changed, 36 insertions(+), 22 deletions(-)
[flink] branch master updated (5b47a81 -> fe17ca6)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 5b47a81 [FLINK-24090][docs] Added Troubleshooting section with ignoring in-flight data explanation into unaligned checkpoints page add 754b744 [hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for fine-grained testing add fe17ca6 [FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup states in SplitFetcher No new revisions were added by this update. Summary of changes: .../SingleThreadMultiplexSourceReaderBase.java | 3 +- .../reader/fetcher/SingleThreadFetcherManager.java | 21 +++ .../base/source/reader/fetcher/SplitFetcher.java | 6 +- .../source/reader/fetcher/SplitFetcherManager.java | 26 - .../source/reader/fetcher/SplitFetcherTest.java| 6 +- .../flink/connector/kafka/source/KafkaSource.java | 14 - .../source/reader/KafkaPartitionSplitReader.java | 31 +-- .../kafka/source/reader/KafkaSourceReader.java | 10 +--- .../reader/fetcher/KafkaSourceFetcherManager.java | 8 ++- .../kafka/source/KafkaSourceTestUtils.java | 47 .../reader/KafkaPartitionSplitReaderTest.java | 33 +++ .../kafka/source/reader/KafkaSourceReaderTest.java | 64 +++--- 12 files changed, 237 insertions(+), 32 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
[flink] branch release-1.14 updated: [FLINK-23971][tests] fix connector testing framework error when compare records in different splits
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.14 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.14 by this push: new 267b863 [FLINK-23971][tests] fix connector testing framework error when compare records in different splits 267b863 is described below commit 267b863683b23b8b3df29bee55ac58a25ca1fcd0 Author: Hang Ruan AuthorDate: Tue Aug 31 15:53:28 2021 +0800 [FLINK-23971][tests] fix connector testing framework error when compare records in different splits Add split index parameter to generate test data, make sure T.equals(object) return false when records come from differernt splits. --- .../testutils/KafkaSingleTopicExternalContext.java | 8 .../pulsar/testutils/PulsarTestContext.java | 4 ++-- .../cases/MultipleTopicConsumingContext.java| 4 ++-- .../cases/SingleTopicConsumingContext.java | 4 ++-- .../test/common/external/ExternalContext.java | 6 +- .../test/common/testsuites/SourceTestSuiteBase.java | 21 - 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java index 81240cf..ad5e31d 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java @@ -155,7 +155,7 @@ public class KafkaSingleTopicExternalContext implements ExternalContext } @Override -public Collection generateTestData(long seed) { +public Collection generateTestData(int splitIndex, long seed) { Random random = new Random(seed); List randomStringRecords = new ArrayList<>(); int recordNum = @@ -163,15 +163,15 @@ public class KafkaSingleTopicExternalContext implements ExternalContext + NUM_RECORDS_LOWER_BOUND; for (int i = 0; i < recordNum; i++) { int stringLength = random.nextInt(50) + 1; -randomStringRecords.add(generateRandomString(stringLength, random)); +randomStringRecords.add(generateRandomString(splitIndex, stringLength, random)); } return randomStringRecords; } -private String generateRandomString(int length, Random random) { +private String generateRandomString(int splitIndex, int length, Random random) { String alphaNumericString = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789"; -StringBuilder sb = new StringBuilder(); +StringBuilder sb = new StringBuilder().append(splitIndex).append("-"); for (int i = 0; i < length; ++i) { sb.append(alphaNumericString.charAt(random.nextInt(alphaNumericString.length(; } diff --git a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java index 6733439..a80d721 100644 --- a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java +++ b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java @@ -43,7 +43,7 @@ public abstract class PulsarTestContext implements ExternalContext { // Helper methods for generating data. -protected List generateStringTestData(long seed) { +protected List generateStringTestData(int splitIndex, long seed) { Random random = new Random(seed); int recordNum = random.nextInt(NUM_RECORDS_UPPER_BOUND - NUM_RECORDS_LOWER_BOUND) @@ -52,7 +52,7 @@ public abstract class PulsarTestContext implements ExternalContext { for (int i = 0; i < recordNum; i++) { int stringLength = random.nextInt(50) + 1; -records.add(randomAlphanumeric(stringLength)); +records.add(splitIndex + "-" + randomAlphanumeric(stringLength)); } return records; diff --git a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java index 60a0bfba..7ce676c 100644 --- a/flink-connector
[flink] branch master updated: [FLINK-23971][tests] fix connector testing framework error when compare records in different splits
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new ad052cc [FLINK-23971][tests] fix connector testing framework error when compare records in different splits ad052cc is described below commit ad052cc056c7d6e63d8356dbd22d6a98b54743c3 Author: Hang Ruan AuthorDate: Tue Aug 31 15:53:28 2021 +0800 [FLINK-23971][tests] fix connector testing framework error when compare records in different splits Add split index parameter to generate test data, make sure T.equals(object) return false when records come from differernt splits. --- .../testutils/KafkaSingleTopicExternalContext.java | 8 .../pulsar/testutils/PulsarTestContext.java | 4 ++-- .../cases/MultipleTopicConsumingContext.java| 4 ++-- .../cases/SingleTopicConsumingContext.java | 4 ++-- .../test/common/external/ExternalContext.java | 6 +- .../test/common/testsuites/SourceTestSuiteBase.java | 21 - 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java index 81240cf..ad5e31d 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java @@ -155,7 +155,7 @@ public class KafkaSingleTopicExternalContext implements ExternalContext } @Override -public Collection generateTestData(long seed) { +public Collection generateTestData(int splitIndex, long seed) { Random random = new Random(seed); List randomStringRecords = new ArrayList<>(); int recordNum = @@ -163,15 +163,15 @@ public class KafkaSingleTopicExternalContext implements ExternalContext + NUM_RECORDS_LOWER_BOUND; for (int i = 0; i < recordNum; i++) { int stringLength = random.nextInt(50) + 1; -randomStringRecords.add(generateRandomString(stringLength, random)); +randomStringRecords.add(generateRandomString(splitIndex, stringLength, random)); } return randomStringRecords; } -private String generateRandomString(int length, Random random) { +private String generateRandomString(int splitIndex, int length, Random random) { String alphaNumericString = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789"; -StringBuilder sb = new StringBuilder(); +StringBuilder sb = new StringBuilder().append(splitIndex).append("-"); for (int i = 0; i < length; ++i) { sb.append(alphaNumericString.charAt(random.nextInt(alphaNumericString.length(; } diff --git a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java index 6733439..a80d721 100644 --- a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java +++ b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java @@ -43,7 +43,7 @@ public abstract class PulsarTestContext implements ExternalContext { // Helper methods for generating data. -protected List generateStringTestData(long seed) { +protected List generateStringTestData(int splitIndex, long seed) { Random random = new Random(seed); int recordNum = random.nextInt(NUM_RECORDS_UPPER_BOUND - NUM_RECORDS_LOWER_BOUND) @@ -52,7 +52,7 @@ public abstract class PulsarTestContext implements ExternalContext { for (int i = 0; i < recordNum; i++) { int stringLength = random.nextInt(50) + 1; -records.add(randomAlphanumeric(stringLength)); +records.add(splitIndex + "-" + randomAlphanumeric(stringLength)); } return records; diff --git a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java index 60a0bfba..7ce676c 100644 --- a/flink-connectors/flink
[flink] branch release-1.13 updated: [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.13 by this push: new 5ce61a3 [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition 5ce61a3 is described below commit 5ce61a31ff8a184ce3f8457471ffc6f5f4439b5d Author: Qingsheng Ren AuthorDate: Sat Aug 14 19:03:24 2021 +0800 [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition --- .../source/metrics/KafkaSourceReaderMetrics.java | 6 - .../kafka/source/reader/KafkaSourceReader.java | 1 + .../metrics/KafkaSourceReaderMetricsTest.java | 12 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 27 ++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java index df1abe5..356409f 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java @@ -137,10 +137,14 @@ public class KafkaSourceReaderMetrics { */ public void recordCommittedOffset(TopicPartition tp, long offset) { checkTopicPartitionTracked(tp); -commitsSucceeded.inc(); offsets.get(tp).committedOffset = offset; } +/** Mark a successful commit. */ +public void recordSucceededCommit() { +commitsSucceeded.inc(); +} + /** Mark a failure commit. */ public void recordFailedCommit() { commitsFailed.inc(); diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java index 3a00be5..287dadf 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java @@ -130,6 +130,7 @@ public class KafkaSourceReader LOG.debug( "Successfully committed offsets for checkpoint {}", checkpointId); + kafkaSourceReaderMetrics.recordSucceededCommit(); // If the finished topic partition has been committed, we remove it // from the offsets of the finished splits map. Map committedPartitions = diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java index c5dfdf3..1e66747 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java @@ -82,7 +82,17 @@ public class KafkaSourceReaderMetricsTest { assertCommittedOffset(BAR_1, 15513L, metricListener); assertEquals( -4L, +0L, +metricListener +.getCounter( + KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP, + KafkaSourceReaderMetrics.COMMITS_SUCCEEDED_METRIC_COUNTER) +.getCount()); + +kafkaSourceReaderMetrics.recordSucceededCommit(); + +assertEquals( +1L, metricListener .getCounter( KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP, diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java index 18a024b..53f61e4 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
[flink] branch master updated (5289b0e -> 83b9ee8)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 5289b0e [FLINK-23818][python][docs] Add documentation about tgz files for python archives add 83b9ee8 [FLINK-22702][tests] Add test data supplier which provide null timestamp field to kafka connector tests No new revisions were added by this update. Summary of changes: .../kafka/source/split/KafkaPartitionSplit.java| 4 +-- .../connector/kafka/source/KafkaSourceITCase.java | 18 +++ .../source/enumerator/KafkaEnumeratorTest.java | 4 +-- .../initializer/OffsetsInitializerTest.java| 4 +-- .../reader/KafkaPartitionSplitReaderTest.java | 4 +-- .../kafka/source/testutils/KafkaSourceTestEnv.java | 37 -- 6 files changed, 55 insertions(+), 16 deletions(-)
[flink] branch master updated: [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new 62931a1 [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition 62931a1 is described below commit 62931a1665e6a6976d088ed49375f9fdf00229d9 Author: Qingsheng Ren AuthorDate: Sat Aug 14 19:03:24 2021 +0800 [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition --- .../source/metrics/KafkaSourceReaderMetrics.java | 6 - .../kafka/source/reader/KafkaSourceReader.java | 1 + .../metrics/KafkaSourceReaderMetricsTest.java | 12 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 27 ++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java index df1abe5..356409f 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java @@ -137,10 +137,14 @@ public class KafkaSourceReaderMetrics { */ public void recordCommittedOffset(TopicPartition tp, long offset) { checkTopicPartitionTracked(tp); -commitsSucceeded.inc(); offsets.get(tp).committedOffset = offset; } +/** Mark a successful commit. */ +public void recordSucceededCommit() { +commitsSucceeded.inc(); +} + /** Mark a failure commit. */ public void recordFailedCommit() { commitsFailed.inc(); diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java index 3a00be5..287dadf 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java @@ -130,6 +130,7 @@ public class KafkaSourceReader LOG.debug( "Successfully committed offsets for checkpoint {}", checkpointId); + kafkaSourceReaderMetrics.recordSucceededCommit(); // If the finished topic partition has been committed, we remove it // from the offsets of the finished splits map. Map committedPartitions = diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java index b16a835..c7df9b6 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java @@ -83,7 +83,17 @@ public class KafkaSourceReaderMetricsTest { assertCommittedOffset(BAR_1, 15513L, metricListener); assertEquals( -4L, +0L, +metricListener +.getCounter( + KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP, + KafkaSourceReaderMetrics.COMMITS_SUCCEEDED_METRIC_COUNTER) +.getCount()); + +kafkaSourceReaderMetrics.recordSucceededCommit(); + +assertEquals( +1L, metricListener .getCounter( KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP, diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java index 96f0bed..16e3d6a 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java +++ b/flink-co
[flink] 03/03: [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit 2c455f324b9ec7ef053253cf4904413b1e5f7a98 Author: Qingsheng Ren AuthorDate: Tue Jun 8 11:57:52 2021 +0800 [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group (cherry picked from commit b094a932845db5539fc07b032d49d0bcefd15df2) --- docs/content/docs/connectors/datastream/kafka.md | 36 .../flink/connector/kafka/source/KafkaSource.java | 11 +- .../connector/kafka/source/KafkaSourceOptions.java | 9 +- .../source/metrics/KafkaSourceReaderMetrics.java | 183 + .../source/reader/KafkaPartitionSplitReader.java | 33 +++- .../kafka/source/reader/KafkaSourceReader.java | 11 +- .../metrics/KafkaSourceReaderMetricsTest.java | 166 +++ .../reader/KafkaPartitionSplitReaderTest.java | 8 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +- .../source/reader/TestingReaderContext.java| 7 +- 10 files changed, 575 insertions(+), 11 deletions(-) diff --git a/docs/content/docs/connectors/datastream/kafka.md b/docs/content/docs/connectors/datastream/kafka.md index 1f21e7d..d540004 100644 --- a/docs/content/docs/connectors/datastream/kafka.md +++ b/docs/content/docs/connectors/datastream/kafka.md @@ -153,6 +153,8 @@ KafkaSource has following options for configuration: - ```partition.discovery.interval.ms``` defines the interval im milliseconds for Kafka source to discover new partitions. See Dynamic Partition Discovery below for more details. +- ```register.consumer.metrics``` specifies whether to register metrics of KafkaConsumer in Flink +metric group For configurations of KafkaConsumer, you can refer to http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka documentation @@ -210,6 +212,40 @@ the properties of Kafka consumer. Note that Kafka source does **NOT** rely on committed offsets for fault tolerance. Committing offset is only for exposing the progress of consumer and consuming group for monitoring. +### Monitoring +Kafka source exposes metrics in Flink's metric group for monitoring and diagnosing. + Scope of Metric +All metrics of Kafka source reader are registered under group ```KafkaSourceReader```, which is a +child group of operator metric group. Metrics related to a specific topic partition will be registered +in the group ```KafkaSourceReader.topic..partition.```. + +For example, current consuming offset of topic "my-topic" and partition 1 will be reported in metric: +```.operator.KafkaSourceReader.topic.my-topic.partition.1.currentOffset``` , + +and number of successful commits will be reported in metric: +```.operator.KafkaSourceReader.commitsSucceeded``` . + + List of Metrics + +|Metric Name | Description | Scope | +|::|:---:|:-:| +| currentOffset | Current consuming offset of the topic partition | TopicPartition | +| committedOffset | Committed offset of the topic partition | TopicPartition | +| commitsSucceeded | Number of successful commits| KafkaSourceReader | +| commitsFailed | Number of failed commits| KafkaSourceReader | + + Kafka Consumer Metrics +All metrics of Kafka consumer are also registered under group ```KafkaSourceReader.KafkaConsumer```. +For example, Kafka consumer metric "records-consumed-total" will be reported in metric: +```.operator.KafkaSourceReader.KafkaConsumer.records-consumed-total``` . + +You can configure whether to register Kafka consumer's metric by configuring option +```register.consumer.metrics```. This option will be set as true by default. + +For metrics of Kafka consumer, you can refer to +http://kafka.apache.org/documentation/#consumer_monitoring";>Apache Kafka Documentation +for more details. + ### Behind the Scene {{< hint info >}} If you are interested in how Kafka source works under the design of new data source API, you may diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java index dd1c3bf..35fd954 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java @@ -36,6 +36,7 @@ import org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumStateSe import org.apache.flink.connector.ka
[flink] 02/03: [hotfix][testutil] Add test utilization for listening metric registration
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit 1e8619e0d5f9d82d987af41ba897370920cb9310 Author: Qingsheng Ren AuthorDate: Tue Jun 29 11:53:50 2021 +0800 [hotfix][testutil] Add test utilization for listening metric registration (cherry picked from commit 8bb629460ae45b841034be660aaace3851f141fe) --- .../flink/metrics/testutils/MetricListener.java| 130 .../flink/metric/testutils/MetricListenerTest.java | 131 + 2 files changed, 261 insertions(+) diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java new file mode 100644 index 000..cbada82 --- /dev/null +++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.metrics.testutils; + +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.Gauge; +import org.apache.flink.metrics.Histogram; +import org.apache.flink.metrics.Meter; +import org.apache.flink.metrics.Metric; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.runtime.metrics.groups.GenericMetricGroup; +import org.apache.flink.runtime.metrics.util.TestingMetricRegistry; + +import java.util.HashMap; +import java.util.Map; + +/** + * A MetricListener listens metric and group registration under the provided root metric group, and + * stores them in an internal HashMap for fetching. + */ +public class MetricListener { + +// Constants +public static final String DELIMITER = "."; +public static final String ROOT_METRIC_GROUP_NAME = "rootMetricGroup"; + +// Root metric group +private final MetricGroup rootMetricGroup; + +// Map for storing registered metrics +private final Map metrics = new HashMap<>(); + +public MetricListener() { +TestingMetricRegistry registry = +TestingMetricRegistry.builder() +.setDelimiter(DELIMITER.charAt(0)) +.setRegisterConsumer( +(metric, name, group) -> + this.metrics.put(group.getMetricIdentifier(name), metric)) +.build(); + +this.rootMetricGroup = new GenericMetricGroup(registry, null, ROOT_METRIC_GROUP_NAME); +} + +/** + * Get the root metric group of this listener. Note that only metrics and groups registered + * under this group will be listened. + * + * @return Root metric group + */ +public MetricGroup getMetricGroup() { +return this.rootMetricGroup; +} + +/** + * Get registered {@link Metric} with identifier relative to the root metric group. + * + * For example, identifier of metric "myMetric" registered in group "myGroup" under root + * metric group can be reached by identifier ("myGroup", "myMetric") + * + * @param identifier identifier relative to the root metric group + * @return Registered metric + */ +public T getMetric(Class metricType, String... identifier) { +String actualIdentifier = +ROOT_METRIC_GROUP_NAME + DELIMITER + String.join(DELIMITER, identifier); +if (!metrics.containsKey(actualIdentifier)) { +throw new IllegalArgumentException( +String.format("Metric '%s' is not registered", actualIdentifier)); +} +return metricType.cast(metrics.get(actualIdentifier)); +} + +/** + * Get registered {@link Meter} with identifier relative to the root metric group. + * + * @param identifier identifier relative to the root metric group + * @return Registered meter + */ +public Meter getMeter(String... identifier) { +return
[flink] 01/03: [FLINK-22722][docs/kafka] Add documentation for Kafka new source (#15974)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git commit ef21d506bebee0ac6466d4abe2f844ef7c15cde5 Author: Qingsheng Ren AuthorDate: Tue Jun 1 08:37:57 2021 +0800 [FLINK-22722][docs/kafka] Add documentation for Kafka new source (#15974) (cherry picked from commit b582991b8b2b8dadb89e71d5002c4a9cc2055e34) --- docs/content/docs/connectors/datastream/kafka.md | 215 ++- 1 file changed, 212 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/connectors/datastream/kafka.md b/docs/content/docs/connectors/datastream/kafka.md index 192d185..1f21e7d 100644 --- a/docs/content/docs/connectors/datastream/kafka.md +++ b/docs/content/docs/connectors/datastream/kafka.md @@ -38,10 +38,219 @@ For details on Kafka compatibility, please refer to the official [Kafka document {{< artifact flink-connector-kafka withScalaVersion >}} +if you are using Kafka source, ```flink-connector-base``` is also required as dependency: + +{{< artifact flink-connector-base >}} + Flink's streaming connectors are not currently part of the binary distribution. See how to link with them for cluster execution [here]({{< ref "docs/dev/datastream/project-configuration" >}}). -## Kafka Consumer +## Kafka Source +{{< hint info >}} +This part describes the Kafka source based on the new +[data source]({{< ref "docs/dev/datastream/sources.md" >}}) API. +{{< /hint >}} + +### Usage +Kafka source provides a builder class for constructing instance of KafkaSource. The code snippet +below shows how to build a KafkaSource to consume messages from the earliest offset of topic +"input-topic", with consumer group "my-group" and deserialize only the value of message as string. +```java +KafkaSource source = KafkaSource.builder() +.setBootstrapServers(brokers) +.setTopics("input-topic") +.setGroupId("my-group") +.setStartingOffsets(OffsetsInitializer.earliest()) +.setValueOnlyDeserializer(new SimpleStringSchema()) +.build(); + +env.fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source"); +``` +The following properties are **required** for building a KafkaSource: +- Bootstrap servers, configured by ```setBootstrapServers(String)``` +- Consumer group ID, configured by ```setGroupId(String)``` +- Topics / partitions to subscribe, see the following + Topic-partition subscription for more details. +- Deserializer to parse Kafka messages, see the following + Deserializer for more details. + +### Topic-partition Subscription +Kafka source provide 3 ways of topic-partition subscription: +- Topic list, subscribing messages from all partitions in a list of topics. For example: + ```java + KafkaSource.builder().setTopics("topic-a", "topic-b") + ``` +- Topic pattern, subscribing messages from all topics whose name matches the provided regular + expression. For example: + ```java + KafkaSource.builder().setTopicPattern("topic.*") + ``` +- Partition set, subscribing partitions in the provided partition set. For example: + ```java + final HashSet partitionSet = new HashSet<>(Arrays.asList( + new TopicPartition("topic-a", 0),// Partition 0 of topic "topic-a" + new TopicPartition("topic-b", 5))); // Partition 5 of topic "topic-b" + KafkaSource.builder().setPartitions(partitionSet) + ``` +### Deserializer +A deserializer is required for parsing Kafka messages. Deserializer (Deserialization schema) can be +configured by ```setDeserializer(KakfaRecordDeserializationSchema)```, where +```KafkaRecordDeserializationSchema``` defines how to deserialize a Kafka ```ConsumerRecord```. + +If only the value of Kafka ```ConsumerRecord``` is needed, you can use +```setValueOnlyDeserializer(DeserializationSchema)``` in the builder, where +```DeserializationSchema``` defines how to deserialize binaries of Kafka message value. + +You can also use a https://kafka.apache.org/24/javadoc/org/apache/kafka/common/serialization/Deserializer.html";>```Kafka Deserializer``` +for deserializing Kafka message value. For example using ```StringDeserializer``` for deserializing +Kafka message value as string: +```java +import org.apache.kafka.common.serialization.StringDeserializer; + +KafkaSource.builder() + .setDeserializer(KafkaRecordDeserializationSchema.valueOnly(StringSerializer.class)); +``` + +### Starting Offset +Kafka source is able to consume messages starting from different offsets by specifying +```OffsetsInitializer```. Built-in initializers include: + +```java +KafkaSource.builder() +// Start from committed offset of the consuming group, without reset strategy +.setStartingOffsets(OffsetsInitializer.committedOffsets()) +
[flink] branch release-1.13 updated (4d86534 -> 2c455f3)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git. from 4d86534 [FLINK-23359][test] Fix the number of available slots in testResourceCanBeAllocatedForDifferentJobAfterFree new ef21d50 [FLINK-22722][docs/kafka] Add documentation for Kafka new source (#15974) new 1e8619e [hotfix][testutil] Add test utilization for listening metric registration new 2c455f3 [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: docs/content/docs/connectors/datastream/kafka.md | 251 - .../flink/connector/kafka/source/KafkaSource.java | 11 +- .../connector/kafka/source/KafkaSourceOptions.java | 9 +- .../source/metrics/KafkaSourceReaderMetrics.java | 183 +++ .../source/reader/KafkaPartitionSplitReader.java | 33 ++- .../kafka/source/reader/KafkaSourceReader.java | 11 +- .../metrics/KafkaSourceReaderMetricsTest.java | 166 ++ .../reader/KafkaPartitionSplitReaderTest.java | 8 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +- .../source/reader/TestingReaderContext.java| 7 +- .../flink/metrics/testutils/MetricListener.java| 130 +++ .../flink/metric/testutils/MetricListenerTest.java | 131 +++ 12 files changed, 1048 insertions(+), 14 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java create mode 100644 flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java create mode 100644 flink-test-utils-parent/flink-test-utils/src/test/java/org/apache/flink/metric/testutils/MetricListenerTest.java
[flink] 02/02: [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit b094a932845db5539fc07b032d49d0bcefd15df2 Author: Qingsheng Ren AuthorDate: Tue Jun 8 11:57:52 2021 +0800 [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group --- docs/content/docs/connectors/datastream/kafka.md | 36 .../flink/connector/kafka/source/KafkaSource.java | 11 +- .../connector/kafka/source/KafkaSourceOptions.java | 9 +- .../source/metrics/KafkaSourceReaderMetrics.java | 183 + .../source/reader/KafkaPartitionSplitReader.java | 33 +++- .../kafka/source/reader/KafkaSourceReader.java | 11 +- .../metrics/KafkaSourceReaderMetricsTest.java | 156 ++ .../reader/KafkaPartitionSplitReaderTest.java | 8 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +- .../source/reader/TestingReaderContext.java| 7 +- 10 files changed, 565 insertions(+), 11 deletions(-) diff --git a/docs/content/docs/connectors/datastream/kafka.md b/docs/content/docs/connectors/datastream/kafka.md index c52a6f0..afed481 100644 --- a/docs/content/docs/connectors/datastream/kafka.md +++ b/docs/content/docs/connectors/datastream/kafka.md @@ -153,6 +153,8 @@ KafkaSource has following options for configuration: - ```partition.discovery.interval.ms``` defines the interval im milliseconds for Kafka source to discover new partitions. See Dynamic Partition Discovery below for more details. +- ```register.consumer.metrics``` specifies whether to register metrics of KafkaConsumer in Flink +metric group For configurations of KafkaConsumer, you can refer to http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka documentation @@ -210,6 +212,40 @@ the properties of Kafka consumer. Note that Kafka source does **NOT** rely on committed offsets for fault tolerance. Committing offset is only for exposing the progress of consumer and consuming group for monitoring. +### Monitoring +Kafka source exposes metrics in Flink's metric group for monitoring and diagnosing. + Scope of Metric +All metrics of Kafka source reader are registered under group ```KafkaSourceReader```, which is a +child group of operator metric group. Metrics related to a specific topic partition will be registered +in the group ```KafkaSourceReader.topic..partition.```. + +For example, current consuming offset of topic "my-topic" and partition 1 will be reported in metric: +```.operator.KafkaSourceReader.topic.my-topic.partition.1.currentOffset``` , + +and number of successful commits will be reported in metric: +```.operator.KafkaSourceReader.commitsSucceeded``` . + + List of Metrics + +|Metric Name | Description | Scope | +|::|:---:|:-:| +| currentOffset | Current consuming offset of the topic partition | TopicPartition | +| committedOffset | Committed offset of the topic partition | TopicPartition | +| commitsSucceeded | Number of successful commits| KafkaSourceReader | +| commitsFailed | Number of failed commits| KafkaSourceReader | + + Kafka Consumer Metrics +All metrics of Kafka consumer are also registered under group ```KafkaSourceReader.KafkaConsumer```. +For example, Kafka consumer metric "records-consumed-total" will be reported in metric: +```.operator.KafkaSourceReader.KafkaConsumer.records-consumed-total``` . + +You can configure whether to register Kafka consumer's metric by configuring option +```register.consumer.metrics```. This option will be set as true by default. + +For metrics of Kafka consumer, you can refer to +http://kafka.apache.org/documentation/#consumer_monitoring";>Apache Kafka Documentation +for more details. + ### Behind the Scene {{< hint info >}} If you are interested in how Kafka source works under the design of new data source API, you may diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java index dd1c3bf..35fd954 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java @@ -36,6 +36,7 @@ import org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumStateSe import org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumerator; import org.apache.flink.connector.kafka.sourc
[flink] branch master updated (284f484 -> b094a93)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 284f484 [FLINK-23232][python] Use pickle.loads defined in pyflink to avoid race condition of the default pickle new 8bb6294 [hotfix][testutil] Add test utilization for listening metric registration new b094a93 [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: docs/content/docs/connectors/datastream/kafka.md | 36 .../flink/connector/kafka/source/KafkaSource.java | 11 +- .../connector/kafka/source/KafkaSourceOptions.java | 9 +- .../source/metrics/KafkaSourceReaderMetrics.java | 183 + .../source/reader/KafkaPartitionSplitReader.java | 33 +++- .../kafka/source/reader/KafkaSourceReader.java | 11 +- .../metrics/KafkaSourceReaderMetricsTest.java | 156 ++ .../reader/KafkaPartitionSplitReaderTest.java | 8 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +- .../source/reader/TestingReaderContext.java| 7 +- .../flink/metrics/testutils/MetricListener.java| 130 +++ .../flink/metric/testutils/MetricListenerTest.java | 131 +++ 12 files changed, 826 insertions(+), 11 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java create mode 100644 flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java create mode 100644 flink-test-utils-parent/flink-test-utils/src/test/java/org/apache/flink/metric/testutils/MetricListenerTest.java
[flink] 01/02: [hotfix][testutil] Add test utilization for listening metric registration
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 8bb629460ae45b841034be660aaace3851f141fe Author: Qingsheng Ren AuthorDate: Tue Jun 29 11:53:50 2021 +0800 [hotfix][testutil] Add test utilization for listening metric registration --- .../flink/metrics/testutils/MetricListener.java| 130 .../flink/metric/testutils/MetricListenerTest.java | 131 + 2 files changed, 261 insertions(+) diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java new file mode 100644 index 000..cbada82 --- /dev/null +++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.metrics.testutils; + +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.Gauge; +import org.apache.flink.metrics.Histogram; +import org.apache.flink.metrics.Meter; +import org.apache.flink.metrics.Metric; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.runtime.metrics.groups.GenericMetricGroup; +import org.apache.flink.runtime.metrics.util.TestingMetricRegistry; + +import java.util.HashMap; +import java.util.Map; + +/** + * A MetricListener listens metric and group registration under the provided root metric group, and + * stores them in an internal HashMap for fetching. + */ +public class MetricListener { + +// Constants +public static final String DELIMITER = "."; +public static final String ROOT_METRIC_GROUP_NAME = "rootMetricGroup"; + +// Root metric group +private final MetricGroup rootMetricGroup; + +// Map for storing registered metrics +private final Map metrics = new HashMap<>(); + +public MetricListener() { +TestingMetricRegistry registry = +TestingMetricRegistry.builder() +.setDelimiter(DELIMITER.charAt(0)) +.setRegisterConsumer( +(metric, name, group) -> + this.metrics.put(group.getMetricIdentifier(name), metric)) +.build(); + +this.rootMetricGroup = new GenericMetricGroup(registry, null, ROOT_METRIC_GROUP_NAME); +} + +/** + * Get the root metric group of this listener. Note that only metrics and groups registered + * under this group will be listened. + * + * @return Root metric group + */ +public MetricGroup getMetricGroup() { +return this.rootMetricGroup; +} + +/** + * Get registered {@link Metric} with identifier relative to the root metric group. + * + * For example, identifier of metric "myMetric" registered in group "myGroup" under root + * metric group can be reached by identifier ("myGroup", "myMetric") + * + * @param identifier identifier relative to the root metric group + * @return Registered metric + */ +public T getMetric(Class metricType, String... identifier) { +String actualIdentifier = +ROOT_METRIC_GROUP_NAME + DELIMITER + String.join(DELIMITER, identifier); +if (!metrics.containsKey(actualIdentifier)) { +throw new IllegalArgumentException( +String.format("Metric '%s' is not registered", actualIdentifier)); +} +return metricType.cast(metrics.get(actualIdentifier)); +} + +/** + * Get registered {@link Meter} with identifier relative to the root metric group. + * + * @param identifier identifier relative to the root metric group + * @return Registered meter + */ +public Meter getMeter(String... identifier) { +return getMetric(Meter.class, identifier); +} + +/** + * Get registered {@link Count
[flink] branch master updated (53034ea -> 1418a1d)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 53034ea [FLINK-23010][hive] HivePartitionFetcherContextBase shouldn't list folders to discover new partitions add 07f2705 [FLINK-22147][connector/kafka] Refactor partition discovery logic in Kafka source enumerator add 1418a1d [hotfix][connector/test] Make MockSplitEnumeratorContext implement AutoClosable and shutdown executors at closing No new revisions were added by this update. Summary of changes: .../source/enumerator/KafkaSourceEnumerator.java | 167 + .../enumerator/subscriber/KafkaSubscriber.java | 32 +--- .../subscriber/KafkaSubscriberUtils.java | 43 ++--- .../subscriber/PartitionSetSubscriber.java | 48 +++-- .../enumerator/subscriber/TopicListSubscriber.java | 34 ++-- .../subscriber/TopicPatternSubscriber.java | 39 ++-- .../source/enumerator/KafkaEnumeratorTest.java | 200 + .../enumerator/subscriber/KafkaSubscriberTest.java | 93 ++ .../source/mocks/MockSplitEnumeratorContext.java | 10 +- .../source/coordinator/SourceCoordinatorTest.java | 94 +- 10 files changed, 456 insertions(+), 304 deletions(-)
[flink] branch master updated (884ff61 -> b582991)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 884ff61 [FLINK-22782][docs] Remove legacy planner from Chinese docs add b582991 [FLINK-22722][docs/kafka] Add documentation for Kafka new source (#15974) No new revisions were added by this update. Summary of changes: docs/content/docs/connectors/datastream/kafka.md | 215 ++- 1 file changed, 212 insertions(+), 3 deletions(-)
[flink] branch release-1.12 updated (19b3683 -> 0913824)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.12 in repository https://gitbox.apache.org/repos/asf/flink.git. from 19b3683 [FLINK-22208][build] Bump snappy-java to 1.1.8.3 add 0913824 [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask(). No new revisions were added by this update. Summary of changes: .../runtime/tasks/SourceOperatorStreamTask.java| 5 ++ .../flink/test/checkpointing/SavepointITCase.java | 54 ++ 2 files changed, 59 insertions(+)
[flink-ml] 02/02: [FLINK-21976] Add StreamingExamplesITCase
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit 195e489cd8114a9cba9ac6857e2d08f4403bfc5b Author: Dong Lin AuthorDate: Wed Mar 31 17:53:59 2021 +0800 [FLINK-21976] Add StreamingExamplesITCase --- .../streaming/test/StreamingExamplesITCase.java| 36 + .../scala/examples/StreamingExamplesITCase.scala | 37 ++ 2 files changed, 73 insertions(+) diff --git a/flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java b/flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java new file mode 100644 index 000..6f7bf5d --- /dev/null +++ b/flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.test; + +import org.apache.flink.streaming.examples.ml.util.IncrementalLearningSkeletonData; +import org.apache.flink.test.util.AbstractTestBase; + +import org.junit.Test; + +/** Integration test for streaming programs in Java examples. */ +public class StreamingExamplesITCase extends AbstractTestBase { + +@Test +public void testIncrementalLearningSkeleton() throws Exception { +final String resultPath = getTempDirPath("result"); + org.apache.flink.streaming.examples.ml.IncrementalLearningSkeleton.main( +new String[] {"--output", resultPath}); +compareResultsByLinesInMemory(IncrementalLearningSkeletonData.RESULTS, resultPath); +} +} diff --git a/flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala b/flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala new file mode 100644 index 000..7fcbe9d --- /dev/null +++ b/flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.scala.examples + +import org.apache.flink.streaming.examples.ml.util.IncrementalLearningSkeletonData +import org.apache.flink.streaming.scala.examples.ml.IncrementalLearningSkeleton +import org.apache.flink.test.util.{AbstractTestBase, TestBaseUtils} +import org.junit.Test + +/** + * Integration test for streaming programs in Scala examples. + */ +class StreamingExamplesITCase extends AbstractTestBase { + + @Test + def testIncrementalLearningSkeleton(): Unit = { +val resultPath = getTempDirPath("result") +IncrementalLearningSkeleton.main(Array("--output", resultPath)) + TestBaseUtils.compareResultsByLinesInMemory(IncrementalLearningSkeletonData.RESULTS, resultPath) + } +}
[flink-ml] branch master updated (492f5a1 -> 195e489)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git. from 492f5a1 [FLINK-22013] Add Github Actions to flink-ml for every push and pull request new ebe84a3 [FLINK-21976] Move ML examples from flink/flink-examples to the flink-ml repo. new 195e489 [FLINK-21976] Add StreamingExamplesITCase The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: flink-ml-examples/examples-batch/pom.xml | 126 ++ .../flink/examples/java/ml/LinearRegression.java | 257 + .../java/ml/util/LinearRegressionData.java | 69 ++ .../ml/util/LinearRegressionDataGenerator.java | 115 + .../flink/examples/scala/ml/LinearRegression.scala | 159 + flink-ml-examples/examples-streaming/pom.xml | 190 +++ .../examples/ml/IncrementalLearningSkeleton.java | 212 + .../ml/util/IncrementalLearningSkeletonData.java | 33 +++ .../examples/ml/IncrementalLearningSkeleton.scala | 184 +++ .../streaming/test/StreamingExamplesITCase.java| 31 ++- .../scala/examples/StreamingExamplesITCase.scala | 30 ++- flink-ml-examples/pom.xml | 111 + pom.xml| 134 +++ tools/maven/scalastyle-config.xml | 146 14 files changed, 1764 insertions(+), 33 deletions(-) create mode 100644 flink-ml-examples/examples-batch/pom.xml create mode 100644 flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java create mode 100644 flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/util/LinearRegressionData.java create mode 100644 flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/util/LinearRegressionDataGenerator.java create mode 100644 flink-ml-examples/examples-batch/src/main/scala/org/apache/flink/examples/scala/ml/LinearRegression.scala create mode 100644 flink-ml-examples/examples-streaming/pom.xml create mode 100644 flink-ml-examples/examples-streaming/src/main/java/org/apache/flink/streaming/examples/ml/IncrementalLearningSkeleton.java create mode 100644 flink-ml-examples/examples-streaming/src/main/java/org/apache/flink/streaming/examples/ml/util/IncrementalLearningSkeletonData.java create mode 100644 flink-ml-examples/examples-streaming/src/main/scala/org/apache/flink/streaming/scala/examples/ml/IncrementalLearningSkeleton.scala copy flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamValidator.java => flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java (53%) copy flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamValidator.java => flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala (53%) create mode 100644 flink-ml-examples/pom.xml create mode 100644 tools/maven/scalastyle-config.xml
[flink-ml] 01/02: [FLINK-21976] Move ML examples from flink/flink-examples to the flink-ml repo.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit ebe84a336f691d611f34c30a2c43370457fdbc3f Author: Dong Lin AuthorDate: Wed Mar 31 15:04:54 2021 +0800 [FLINK-21976] Move ML examples from flink/flink-examples to the flink-ml repo. --- flink-ml-examples/examples-batch/pom.xml | 126 ++ .../flink/examples/java/ml/LinearRegression.java | 257 + .../java/ml/util/LinearRegressionData.java | 69 ++ .../ml/util/LinearRegressionDataGenerator.java | 115 + .../flink/examples/scala/ml/LinearRegression.scala | 159 + flink-ml-examples/examples-streaming/pom.xml | 190 +++ .../examples/ml/IncrementalLearningSkeleton.java | 212 + .../ml/util/IncrementalLearningSkeletonData.java | 33 +++ .../examples/ml/IncrementalLearningSkeleton.scala | 184 +++ flink-ml-examples/pom.xml | 111 + pom.xml| 134 +++ tools/maven/scalastyle-config.xml | 146 12 files changed, 1736 insertions(+) diff --git a/flink-ml-examples/examples-batch/pom.xml b/flink-ml-examples/examples-batch/pom.xml new file mode 100644 index 000..599f839 --- /dev/null +++ b/flink-ml-examples/examples-batch/pom.xml @@ -0,0 +1,126 @@ + + +http://maven.apache.org/POM/4.0.0"; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd";> + + 4.0.0 + + +org.apache.flink +flink-ml-examples +0.1-SNAPSHOT + + + flink-ml-examples-batch_${scala.binary.version} + Flink ML : Examples : Batch + + jar + + + + org.apache.flink + flink-java + ${flink.version} + + + + org.apache.flink + flink-scala_${scala.binary.version} + ${flink.version} + + + + org.apache.flink + flink-clients_${scala.binary.version} + ${flink.version} + + + + + + + + +net.alchim31.maven +scala-maven-plugin + + + +scala-compile-first +process-resources + + compile + + + + + +-Xms128m +-Xmx512m + + + + + + +org.codehaus.mojo +build-helper-maven-plugin + + + +add-source +generate-sources + + add-source + + + +src/main/scala + + + + + +add-test-source +generate-test-sources + + add-test-source + + + +src/test/scala + + + + + + + + +org.scalastyle +scalastyle-maven-plugin + + ${project.basedir}/../../tools/maven/scalastyle-config.xml + + + + + + diff --git a/flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java b/flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java new file mode 100644 index 000..4f2f528 --- /dev/null +++ b/flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.examples.java.ml; + +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.common.functions.ReduceFunction; +import org.apache.flink.api.common.functions.RichMapFunction; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.ExecutionEnvironment; +import org.apache.flink.api.java.operators.IterativeDataSet; +import
[flink] branch release-1.13 updated: [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask().
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.13 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.13 by this push: new e129817 [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask(). e129817 is described below commit e129817f290442c6fd7e891a6d28d848ec090050 Author: Jiangjie (Becket) Qin AuthorDate: Mon Mar 29 16:06:31 2021 +0800 [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask(). --- .../runtime/tasks/SourceOperatorStreamTask.java| 5 ++ .../flink/test/checkpointing/SavepointITCase.java | 54 ++ 2 files changed, 59 insertions(+) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java index 1c8589f..7b3b06b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java @@ -108,6 +108,11 @@ public class SourceOperatorStreamTask extends StreamTask triggerCheckpointAsync( CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions) { if (!isExternallyInducedSource) { diff --git a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java index 44b206f..fa3709d 100644 --- a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java +++ b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java @@ -154,6 +154,60 @@ public class SavepointITCase extends TestLogger { } } +@Test +public void testStopWithSavepointForFlip27SourceWithDrain() throws Exception { +testStopWithSavepointForFlip27Source(true); +} + +@Test +public void testStopWithSavepointForFlip27SourceWithoutDrain() throws Exception { +testStopWithSavepointForFlip27Source(false); +} + +private void testStopWithSavepointForFlip27Source(boolean drain) throws Exception { +final int numTaskManagers = 2; +final int numSlotsPerTaskManager = 2; + +final MiniClusterResourceFactory clusterFactory = +new MiniClusterResourceFactory( +numTaskManagers, numSlotsPerTaskManager, getFileBasedCheckpointsConfig()); + +StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); +env.setParallelism(1); + +BoundedPassThroughOperator operator = +new BoundedPassThroughOperator<>(ChainingStrategy.ALWAYS); +DataStream stream = +env.fromSequence(0, Long.MAX_VALUE) +.transform("pass-through", BasicTypeInfo.LONG_TYPE_INFO, operator); +stream.addSink(new DiscardingSink<>()); + +final JobGraph jobGraph = env.getStreamGraph().getJobGraph(); +final JobID jobId = jobGraph.getJobID(); + +MiniClusterWithClientResource cluster = clusterFactory.get(); +cluster.before(); +ClusterClient client = cluster.getClusterClient(); + +try { +BoundedPassThroughOperator.resetForTest(1, true); + +client.submitJob(jobGraph).get(); + +BoundedPassThroughOperator.getProgressLatch().await(); + +client.stopWithSavepoint(jobId, drain, null).get(); + +if (drain) { +Assert.assertTrue(BoundedPassThroughOperator.inputEnded); +} else { +Assert.assertFalse(BoundedPassThroughOperator.inputEnded); +} +} finally { +cluster.after(); +} +} + /** * Triggers a savepoint for a job that uses the FsStateBackend. We expect that all checkpoint * files are written to a new savepoint directory.
[flink] branch master updated: [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask().
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new a9cf18b [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask(). a9cf18b is described below commit a9cf18b4d25f130e0bd24d51b128bbcf71892b45 Author: Jiangjie (Becket) Qin AuthorDate: Mon Mar 29 16:06:31 2021 +0800 [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask(). --- .../runtime/tasks/SourceOperatorStreamTask.java| 5 ++ .../flink/test/checkpointing/SavepointITCase.java | 54 ++ 2 files changed, 59 insertions(+) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java index 1c8589f..7b3b06b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java @@ -108,6 +108,11 @@ public class SourceOperatorStreamTask extends StreamTask triggerCheckpointAsync( CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions) { if (!isExternallyInducedSource) { diff --git a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java index 44b206f..fa3709d 100644 --- a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java +++ b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java @@ -154,6 +154,60 @@ public class SavepointITCase extends TestLogger { } } +@Test +public void testStopWithSavepointForFlip27SourceWithDrain() throws Exception { +testStopWithSavepointForFlip27Source(true); +} + +@Test +public void testStopWithSavepointForFlip27SourceWithoutDrain() throws Exception { +testStopWithSavepointForFlip27Source(false); +} + +private void testStopWithSavepointForFlip27Source(boolean drain) throws Exception { +final int numTaskManagers = 2; +final int numSlotsPerTaskManager = 2; + +final MiniClusterResourceFactory clusterFactory = +new MiniClusterResourceFactory( +numTaskManagers, numSlotsPerTaskManager, getFileBasedCheckpointsConfig()); + +StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); +env.setParallelism(1); + +BoundedPassThroughOperator operator = +new BoundedPassThroughOperator<>(ChainingStrategy.ALWAYS); +DataStream stream = +env.fromSequence(0, Long.MAX_VALUE) +.transform("pass-through", BasicTypeInfo.LONG_TYPE_INFO, operator); +stream.addSink(new DiscardingSink<>()); + +final JobGraph jobGraph = env.getStreamGraph().getJobGraph(); +final JobID jobId = jobGraph.getJobID(); + +MiniClusterWithClientResource cluster = clusterFactory.get(); +cluster.before(); +ClusterClient client = cluster.getClusterClient(); + +try { +BoundedPassThroughOperator.resetForTest(1, true); + +client.submitJob(jobGraph).get(); + +BoundedPassThroughOperator.getProgressLatch().await(); + +client.stopWithSavepoint(jobId, drain, null).get(); + +if (drain) { +Assert.assertTrue(BoundedPassThroughOperator.inputEnded); +} else { +Assert.assertFalse(BoundedPassThroughOperator.inputEnded); +} +} finally { +cluster.after(); +} +} + /** * Triggers a savepoint for a job that uses the FsStateBackend. We expect that all checkpoint * files are written to a new savepoint directory.
[flink-ml] branch master updated: [FLINK-22013] Add Github Actions to flink-ml for every push and pull request
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git The following commit(s) were added to refs/heads/master by this push: new 492f5a1 [FLINK-22013] Add Github Actions to flink-ml for every push and pull request 492f5a1 is described below commit 492f5a12d2cbfa9346d4772306a469bd1a5163f5 Author: Dong Lin AuthorDate: Wed Mar 31 10:00:07 2021 +0800 [FLINK-22013] Add Github Actions to flink-ml for every push and pull request --- .github/workflows/java8-build.yml | 18 ++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/java8-build.yml b/.github/workflows/java8-build.yml new file mode 100644 index 000..d68d465 --- /dev/null +++ b/.github/workflows/java8-build.yml @@ -0,0 +1,18 @@ +name: Java 8 Build + +on: [push, pull_request] + +jobs: + build: +runs-on: ubuntu-latest + +steps: + - name: Checkout code +uses: actions/checkout@v2 + - name: Set up JDK 1.8 +uses: actions/setup-java@v1 +with: + java-version: 1.8 + - name: Build +run: mvn clean install +
[flink-ml] branch master updated: [hotfix] Change the notification list of GitHub activities to iss...@flink.apache.org
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git The following commit(s) were added to refs/heads/master by this push: new fb67074 [hotfix] Change the notification list of GitHub activities to iss...@flink.apache.org fb67074 is described below commit fb6707434bda77b1bb529f223a77f5316eeb04f6 Author: Jiangjie (Becket) Qin AuthorDate: Wed Mar 31 12:04:36 2021 +0800 [hotfix] Change the notification list of GitHub activities to iss...@flink.apache.org --- .asf.yaml | 5 + 1 file changed, 5 insertions(+) diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 000..82eef0b --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,5 @@ +notifications: + commits: commits@flink.apache.org + issues: iss...@flink.apache.org + pullrequests: iss...@flink.apache.org + jira_options: link label
[flink] 03/12: [hotfix][connector/kafka] Reduce the offset commit logging verbosity from INFO to DEBUG.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit b5b9682827f0698d9b4d24215b4dd2daaf25ec30 Author: Dong Lin AuthorDate: Sat Mar 6 12:51:52 2021 +0800 [hotfix][connector/kafka] Reduce the offset commit logging verbosity from INFO to DEBUG. --- .../flink/connector/kafka/source/reader/KafkaSourceReader.java | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java index 8ca0337..bf3d42e 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java @@ -109,11 +109,13 @@ public class KafkaSourceReader @Override public void notifyCheckpointComplete(long checkpointId) throws Exception { -LOG.info("Committing offsets for checkpoint {}", checkpointId); +LOG.debug("Committing offsets for checkpoint {}", checkpointId); ((KafkaSourceFetcherManager) splitFetcherManager) .commitOffsets( offsetsToCommit.get(checkpointId), (ignored, e) -> { +// The offset commit here is needed by the external monitoring. It won't +// break Flink job's correctness if we fail to commit the offset here. if (e != null) { LOG.warn( "Failed to commit consumer offsets for checkpoint {}", @@ -124,7 +126,7 @@ public class KafkaSourceReader "Successfully committed offsets for checkpoint {}", checkpointId); // If the finished topic partition has been committed, we remove it -// from the offsets of finsihed splits map. +// from the offsets of the finished splits map. Map committedPartitions = offsetsToCommit.get(checkpointId); offsetsOfFinishedSplits
[flink] 08/12: [FLINK-20114][connector/kafka] SourceOperatorStreamTask should check the committed offset first before using OffsetResetStrategy.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 8de17841d144c780a46389f5ef69b8c484c0747b Author: Dong Lin AuthorDate: Sun Mar 7 19:33:58 2021 +0800 [FLINK-20114][connector/kafka] SourceOperatorStreamTask should check the committed offset first before using OffsetResetStrategy. This is necessary to keep the same behavior as the legacy FlinkKafkaConsumer. --- .../initializer/SpecifiedOffsetsInitializer.java| 6 ++ .../initializer/OffsetsInitializerTest.java | 21 - 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java index 186e2d5..d3335de 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java @@ -60,6 +60,12 @@ class SpecifiedOffsetsInitializer implements OffsetsInitializer { } } if (!toLookup.isEmpty()) { +// First check the committed offsets. +Map committedOffsets = +partitionOffsetsRetriever.committedOffsets(toLookup); +offsets.putAll(committedOffsets); +toLookup.removeAll(committedOffsets.keySet()); + switch (offsetResetStrategy) { case EARLIEST: offsets.putAll(partitionOffsetsRetriever.beginningOffsets(toLookup)); diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java index 94d0c30..0e84882 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java @@ -41,12 +41,14 @@ import static org.junit.Assert.assertTrue; /** Unit tests for {@link OffsetsInitializer}. */ public class OffsetsInitializerTest { private static final String TOPIC = "topic"; +private static final String TOPIC2 = "topic2"; private static KafkaSourceEnumerator.PartitionOffsetsRetrieverImpl retriever; @BeforeClass public static void setup() throws Throwable { KafkaSourceTestEnv.setup(); KafkaSourceTestEnv.setupTopic(TOPIC, true, true); +KafkaSourceTestEnv.setupTopic(TOPIC2, false, false); retriever = new KafkaSourceEnumerator.PartitionOffsetsRetrieverImpl( KafkaSourceTestEnv.getConsumer(), @@ -116,19 +118,28 @@ public class OffsetsInitializerTest { List partitions = KafkaSourceTestEnv.getPartitionsForTopic(TOPIC); Map committedOffsets = KafkaSourceTestEnv.getCommittedOffsets(partitions); -committedOffsets.forEach((tp, oam) -> specifiedOffsets.put(tp, oam.offset())); +partitions.forEach(tp -> specifiedOffsets.put(tp, (long) tp.partition())); // Remove the specified offsets for partition 0. -TopicPartition missingPartition = new TopicPartition(TOPIC, 0); -specifiedOffsets.remove(missingPartition); +TopicPartition partitionSetToCommitted = new TopicPartition(TOPIC, 0); +specifiedOffsets.remove(partitionSetToCommitted); OffsetsInitializer initializer = OffsetsInitializer.offsets(specifiedOffsets); assertEquals(OffsetResetStrategy.EARLIEST, initializer.getAutoOffsetResetStrategy()); +// The partition without committed offset should fallback to offset reset strategy. +TopicPartition partitionSetToEarliest = new TopicPartition(TOPIC2, 0); +partitions.add(partitionSetToEarliest); Map offsets = initializer.getPartitionOffsets(partitions, retriever); for (TopicPartition tp : partitions) { Long offset = offsets.get(tp); -long expectedOffset = -tp.equals(missingPartition) ? 0L : committedOffsets.get(tp).offset(); +long expectedOffset; +if (tp.equals(partitionSetToCommitted)) { +expectedOffset = committedOffsets.get(tp).offset(); +} else if (tp.equ
[flink] 06/12: [hotfix][connector/kafka] Remove the unused close.timeout.ms config.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit b3471f2a55df9d10248c41c6d4efedec188b93d6 Author: Dong Lin AuthorDate: Sun Mar 7 17:44:59 2021 +0800 [hotfix][connector/kafka] Remove the unused close.timeout.ms config. --- .../org/apache/flink/connector/kafka/source/KafkaSourceOptions.java | 6 -- 1 file changed, 6 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java index 03d417f..db09fa5 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java @@ -41,12 +41,6 @@ public class KafkaSourceOptions { "The interval in milliseconds for the Kafka source to discover " + "the new partitions. A non-positive value disables the partition discovery."); -public static final ConfigOption CLOSE_TIMEOUT_MS = -ConfigOptions.key("close.timeout.ms") -.longType() -.defaultValue(1L) -.withDescription("The max time to wait when closing components."); - @SuppressWarnings("unchecked") public static T getOption( Properties props, ConfigOption configOption, Function parser) {
[flink] 12/12: [FLINK-20114][connector/kafka] Remove duplicated warning and remove redundant default value for partition.discovery.interval.ms
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 8d2fa1bb81eff790bdc21a69d15a17aa074010ae Author: Dong Lin AuthorDate: Wed Mar 24 15:26:23 2021 +0800 [FLINK-20114][connector/kafka] Remove duplicated warning and remove redundant default value for partition.discovery.interval.ms --- .../apache/flink/connector/kafka/source/KafkaSourceBuilder.java | 8 ++-- .../apache/flink/connector/kafka/source/KafkaSourceOptions.java | 2 +- .../streaming/runtime/io/StreamMultipleInputProcessorFactory.java | 6 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index 8ede378..77f844f 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -442,14 +442,10 @@ public class KafkaSourceBuilder { true); // If the source is bounded, do not run periodic partition discovery. -if (maybeOverride( +maybeOverride( KafkaSourceOptions.PARTITION_DISCOVERY_INTERVAL_MS.key(), "-1", -boundedness == Boundedness.BOUNDED)) { -LOG.warn( -"{} property is overridden to -1 because the source is bounded.", -KafkaSourceOptions.PARTITION_DISCOVERY_INTERVAL_MS); -} +boundedness == Boundedness.BOUNDED); // If the client id prefix is not set, reuse the consumer group id as the client id prefix. maybeOverride( diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java index db09fa5..98c735f 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java @@ -36,7 +36,7 @@ public class KafkaSourceOptions { public static final ConfigOption PARTITION_DISCOVERY_INTERVAL_MS = ConfigOptions.key("partition.discovery.interval.ms") .longType() -.defaultValue(3L) +.noDefaultValue() .withDescription( "The interval in milliseconds for the Kafka source to discover " + "the new partitions. A non-positive value disables the partition discovery."); diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java index a70b112..db351bc 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java @@ -351,7 +351,11 @@ public class StreamMultipleInputProcessorFactory { WatermarkGauge inputWatermarkGauge, MultiStreamStreamStatusTracker streamStatusTracker, int inputIndex) { -super(chainedSourceOutput, streamStatusMaintainer, new SimpleCounter(), inputWatermarkGauge); +super( +chainedSourceOutput, +streamStatusMaintainer, +new SimpleCounter(), +inputWatermarkGauge); this.streamStatusTracker = streamStatusTracker; this.inputIndex = inputIndex; }
[flink] 11/12: [hotfix][examples] Update StateMachineExample to use KafkaSource
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit e079aef0efcd544322a028cc15a24bdaa456af4f Author: Dong Lin AuthorDate: Sun Mar 21 17:33:39 2021 +0800 [hotfix][examples] Update StateMachineExample to use KafkaSource --- .../examples/statemachine/StateMachineExample.java | 69 +++--- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java b/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java index 3dfd131..1bb76ad 100644 --- a/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java +++ b/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java @@ -18,18 +18,20 @@ package org.apache.flink.streaming.examples.statemachine; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; import org.apache.flink.api.common.functions.RichFlatMapFunction; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; +import org.apache.flink.connector.kafka.source.KafkaSource; +import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; +import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema; import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend; import org.apache.flink.core.fs.FileSystem; import org.apache.flink.runtime.state.hashmap.HashMapStateBackend; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.examples.statemachine.dfa.State; import org.apache.flink.streaming.examples.statemachine.event.Alert; import org.apache.flink.streaming.examples.statemachine.event.Event; @@ -37,8 +39,6 @@ import org.apache.flink.streaming.examples.statemachine.generator.EventsGenerato import org.apache.flink.streaming.examples.statemachine.kafka.EventDeSerializer; import org.apache.flink.util.Collector; -import java.util.Properties; - /** * Main class of the state machine example. This class implements the streaming application that * receives the stream of events and evaluates a state machine (per originating address) to validate @@ -69,9 +69,26 @@ public class StateMachineExample { // determine whether to use the built-in source, or read from Kafka -final SourceFunction source; +final DataStream events; final ParameterTool params = ParameterTool.fromArgs(args); +// create the environment to create streams and configure execution +final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); +env.enableCheckpointing(2000L); + +final String stateBackend = params.get("backend", "memory"); +if ("hashmap".equals(stateBackend)) { +final String checkpointDir = params.get("checkpoint-dir"); +boolean asyncCheckpoints = params.getBoolean("async-checkpoints", false); +env.setStateBackend(new HashMapStateBackend(asyncCheckpoints)); +env.getCheckpointConfig().setCheckpointStorage(checkpointDir); +} else if ("rocks".equals(stateBackend)) { +final String checkpointDir = params.get("checkpoint-dir"); +boolean incrementalCheckpoints = params.getBoolean("incremental-checkpoints", false); +env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints)); +env.getCheckpointConfig().setCheckpointStorage(checkpointDir); +} + if (params.has("kafka-topic")) { // set up the Kafka reader String kafkaTopic = params.get("kafka-topic"); @@ -80,14 +97,19 @@ public class StateMachineExample { System.out.printf("Reading from kafka topic %s @ %s\n", kafkaTopic, brokers); System.out.println(); -Properties kafkaProps = new Properties(); -kafkaProps.setProperty("bootstrap.servers", brokers); - -FlinkKafkaConsumer kafka = -new FlinkKafkaConsumer<>(kafkaTopic, new EventDeSerializer(), kafkaProps); -kafka.setStartFromL
[flink] 10/12: [FLINK-20114][connector/kafka] Add IT cases for KafkaSource by migrating IT cases from FlinkKafkaConsumer.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 3abf5550a11ac4733799187bf49122417a177b6a Author: Dong Lin AuthorDate: Mon Mar 8 09:35:30 2021 +0800 [FLINK-20114][connector/kafka] Add IT cases for KafkaSource by migrating IT cases from FlinkKafkaConsumer. --- .../connector/kafka/source/KafkaSourceITCase.java | 63 + .../kafka/source/KafkaSourceLegacyITCase.java | 162 + .../connectors/kafka/KafkaConsumerTestBase.java| 260 +++-- .../connectors/kafka/KafkaTestEnvironment.java | 20 ++ .../connectors/kafka/KafkaTestEnvironmentImpl.java | 19 ++ 5 files changed, 453 insertions(+), 71 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java index 25acf75..6f7c66d 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java @@ -18,19 +18,25 @@ package org.apache.flink.connector.kafka.source; +import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.common.accumulators.ListAccumulator; import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.configuration.Configuration; import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.sink.DiscardingSink; import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; +import org.apache.flink.streaming.api.operators.StreamMap; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.util.CloseableIterator; import org.apache.flink.util.Collector; import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.IntegerDeserializer; @@ -67,6 +73,42 @@ public class KafkaSourceITCase { } @Test +public void testTimestamp() throws Throwable { +final String topic = "testTimestamp"; +KafkaSourceTestEnv.createTestTopic(topic, 1, 1); +KafkaSourceTestEnv.produceToKafka( +Arrays.asList( +new ProducerRecord<>(topic, 0, 1L, "key0", 0), +new ProducerRecord<>(topic, 0, 2L, "key1", 1), +new ProducerRecord<>(topic, 0, 3L, "key2", 2))); + +KafkaSource source = +KafkaSource.builder() + .setBootstrapServers(KafkaSourceTestEnv.brokerConnectionStrings) +.setGroupId("testTimestampAndWatermark") +.setTopics(topic) +.setDeserializer(new TestingKafkaRecordDeserializationSchema()) +.setStartingOffsets(OffsetsInitializer.earliest()) +.setBounded(OffsetsInitializer.latest()) +.build(); + +StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); +env.setParallelism(1); +DataStream stream = +env.fromSource(source, WatermarkStrategy.noWatermarks(), "testTimestamp"); + +// Verify that the timestamp and watermark are working fine. +stream.transform( +"timestampVerifier", +TypeInformation.of(PartitionAndValue.class), +new WatermarkVerifyingOperator(v -> v)); +stream.addSink(new DiscardingSink<>()); +JobExecutionResult result = env.execute(); + +assertEquals(Arrays.asList(1L, 2L, 3L), result.getAccumulatorResult("timestamp")); +} + +@Test public void testBasicRead() throws Exception { KafkaSource source = KafkaSource.builder() @@ -167,6 +209,27 @@ public class KafkaSourceITCase { } } +private static class WatermarkVerifyingOperator +extends StreamMap { + +public WatermarkVerifyingOperator( +MapFunc
[flink] 07/12: [FLINK-20114][connector/kafka] PartitionOffsetsRetrieverImpl.committedOffsets() should handle the case without committed offsets.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit b831b853b908d26a2b7890478fc9f8446b2eb724 Author: Dong Lin AuthorDate: Sun Mar 7 17:53:27 2021 +0800 [FLINK-20114][connector/kafka] PartitionOffsetsRetrieverImpl.committedOffsets() should handle the case without committed offsets. --- .../connector/kafka/source/enumerator/KafkaSourceEnumerator.java | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java index 92ac770..1f3f3c6 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java @@ -407,7 +407,12 @@ public class KafkaSourceEnumerator .thenApply( result -> { Map offsets = new HashMap<>(); -result.forEach((tp, oam) -> offsets.put(tp, oam.offset())); +result.forEach( +(tp, oam) -> { +if (oam != null) { +offsets.put(tp, oam.offset()); +} +}); return offsets; }) .get();
[flink] 01/12: [FLINK-20114][connector/kafka] KafkaSourceReader should not commit offsets for partitions whose offsets have not been initialized.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 7a36623dc081a555d4e6e9d2b1f7bfb908640fee Author: Dong Lin AuthorDate: Wed Mar 3 22:37:25 2021 +0800 [FLINK-20114][connector/kafka] KafkaSourceReader should not commit offsets for partitions whose offsets have not been initialized. --- .../kafka/source/reader/KafkaSourceReader.java | 18 -- .../kafka/source/reader/KafkaSourceReaderTest.java | 14 ++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java index 9fd3a70..8ca0337 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java @@ -75,9 +75,11 @@ public class KafkaSourceReader protected void onSplitFinished(Map finishedSplitIds) { finishedSplitIds.forEach( (ignored, splitState) -> { -offsetsOfFinishedSplits.put( -splitState.getTopicPartition(), -new OffsetAndMetadata(splitState.getCurrentOffset())); +if (splitState.getCurrentOffset() >= 0) { +offsetsOfFinishedSplits.put( +splitState.getTopicPartition(), +new OffsetAndMetadata(splitState.getCurrentOffset())); +} }); } @@ -91,9 +93,13 @@ public class KafkaSourceReader offsetsToCommit.computeIfAbsent(checkpointId, id -> new HashMap<>()); // Put the offsets of the active splits. for (KafkaPartitionSplit split : splits) { -offsetsMap.put( -split.getTopicPartition(), -new OffsetAndMetadata(split.getStartingOffset(), null)); +// If the checkpoint is triggered before the partition starting offsets +// is retrieved, do not commit the offsets for those partitions. +if (split.getStartingOffset() >= 0) { +offsetsMap.put( +split.getTopicPartition(), +new OffsetAndMetadata(split.getStartingOffset())); +} } // Put offsets of all the finished splits. offsetsMap.putAll(offsetsOfFinishedSplits); diff --git a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java index 6366d40..579bab3 100644 --- a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java +++ b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java @@ -207,6 +207,20 @@ public class KafkaSourceReaderTest extends SourceReaderTestBase reader = (KafkaSourceReader) createReader()) { +KafkaPartitionSplit split = +new KafkaPartitionSplit( +new TopicPartition(TOPIC, 0), KafkaPartitionSplit.EARLIEST_OFFSET); +reader.addSplits(Collections.singletonList(split)); +reader.snapshotState(checkpointId); +assertEquals(1, reader.getOffsetsToCommit().size()); + assertTrue(reader.getOffsetsToCommit().get(checkpointId).isEmpty()); +} +} + // -- @Override
[flink] 04/12: [FLINK-20114][connector/common] SourceOperatorStreamTask should update the numRecordsOutCount metric
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit b23f65f17b4de16cf0fd91b225c3c3c61c849450 Author: Dong Lin AuthorDate: Sat Mar 6 13:07:31 2021 +0800 [FLINK-20114][connector/common] SourceOperatorStreamTask should update the numRecordsOutCount metric --- .../io/StreamMultipleInputProcessorFactory.java| 2 +- .../runtime/tasks/SourceOperatorStreamTask.java| 18 -- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java index fdea270..a70b112 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java @@ -351,7 +351,7 @@ public class StreamMultipleInputProcessorFactory { WatermarkGauge inputWatermarkGauge, MultiStreamStreamStatusTracker streamStatusTracker, int inputIndex) { -super(chainedSourceOutput, streamStatusMaintainer, inputWatermarkGauge); +super(chainedSourceOutput, streamStatusMaintainer, new SimpleCounter(), inputWatermarkGauge); this.streamStatusTracker = streamStatusTracker; this.inputIndex = inputIndex; } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java index b922dba..1c8589f 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java @@ -21,10 +21,12 @@ package org.apache.flink.streaming.runtime.tasks; import org.apache.flink.annotation.Internal; import org.apache.flink.api.connector.source.ExternallyInducedSourceReader; import org.apache.flink.api.connector.source.SourceReader; +import org.apache.flink.metrics.Counter; import org.apache.flink.runtime.checkpoint.CheckpointMetaData; import org.apache.flink.runtime.checkpoint.CheckpointOptions; import org.apache.flink.runtime.checkpoint.CheckpointType; import org.apache.flink.runtime.execution.Environment; +import org.apache.flink.runtime.metrics.groups.OperatorMetricGroup; import org.apache.flink.runtime.state.CheckpointStorageLocationReference; import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.api.operators.SourceOperator; @@ -72,7 +74,7 @@ public class SourceOperatorStreamTask extends StreamTask sourceReader = mainOperator.getSourceReader(); +final SourceReader sourceReader = sourceOperator.getSourceReader(); final StreamTaskInput input; if (sourceReader instanceof ExternallyInducedSourceReader) { @@ -88,11 +90,19 @@ public class SourceOperatorStreamTask extends StreamTask(sourceOperator, 0, 0); } +Counter numRecordsOut = +((OperatorMetricGroup) sourceOperator.getMetricGroup()) +.getIOMetricGroup() +.getNumRecordsOutCounter(); + // The SourceOperatorStreamTask doesn't have any inputs, so there is no need for // a WatermarkGauge on the input. output = new AsyncDataOutputToOutput<>( -operatorChain.getMainOperatorOutput(), getStreamStatusMaintainer(), null); +operatorChain.getMainOperatorOutput(), +getStreamStatusMaintainer(), +numRecordsOut, +null); inputProcessor = new StreamOneInputProcessor<>(input, output, operatorChain); } @@ -144,20 +154,24 @@ public class SourceOperatorStreamTask extends StreamTask extends AbstractDataOutput { private final Output> output; +private final Counter numRecordsOut; @Nullable private final WatermarkGauge inputWatermarkGauge; public AsyncDataOutputToOutput( Output> output, StreamStatusMaintainer streamStatusMaintainer, +Counter numRecordsOut, @Nullable WatermarkGauge inputWatermarkGauge) { super(streamStatusMaintainer); this.output = checkNotNull(output); +this.numRecordsOut = numRecordsOut; this.inputWatermarkGauge = inputWatermarkGauge; } @Override public void emitRecord(StreamRecord streamRecord) { +
[flink] branch master updated (5f0c76f -> 8d2fa1b)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 5f0c76f [FLINK-21330] Optimize the performance of PipelinedRegionSchedulingStrategy new 7a36623 [FLINK-20114][connector/kafka] KafkaSourceReader should not commit offsets for partitions whose offsets have not been initialized. new b0077c4 [FLINK-20114][connector/common] SourceCoordinatorContext should not log and fail job again if it receives InterruptedException after it is closed. new b5b9682 [hotfix][connector/kafka] Reduce the offset commit logging verbosity from INFO to DEBUG. new b23f65f [FLINK-20114][connector/common] SourceOperatorStreamTask should update the numRecordsOutCount metric new 4c6c423 [FLINK-20114][connector/kafka] KafkaSourceEnumerator should close the admin client early if periodic partition discovery is disabled. new b3471f2 [hotfix][connector/kafka] Remove the unused close.timeout.ms config. new b831b85 [FLINK-20114][connector/kafka] PartitionOffsetsRetrieverImpl.committedOffsets() should handle the case without committed offsets. new 8de1784 [FLINK-20114][connector/kafka] SourceOperatorStreamTask should check the committed offset first before using OffsetResetStrategy. new a4360c7 [FLINK-20114][connector/kafka] Auto offset commit should be disabled by default. new 3abf555 [FLINK-20114][connector/kafka] Add IT cases for KafkaSource by migrating IT cases from FlinkKafkaConsumer. new e079aef [hotfix][examples] Update StateMachineExample to use KafkaSource new 8d2fa1b [FLINK-20114][connector/kafka] Remove duplicated warning and remove redundant default value for partition.discovery.interval.ms The 12 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../connector/kafka/source/KafkaSourceBuilder.java | 9 +- .../connector/kafka/source/KafkaSourceOptions.java | 8 +- .../source/enumerator/KafkaSourceEnumerator.java | 17 +- .../initializer/SpecifiedOffsetsInitializer.java | 6 + .../kafka/source/reader/KafkaSourceReader.java | 24 +- .../connector/kafka/source/KafkaSourceITCase.java | 63 + .../kafka/source/KafkaSourceLegacyITCase.java | 162 + .../initializer/OffsetsInitializerTest.java| 21 +- .../kafka/source/reader/KafkaSourceReaderTest.java | 14 ++ .../connectors/kafka/KafkaConsumerTestBase.java| 260 +++-- .../connectors/kafka/KafkaTestEnvironment.java | 20 ++ .../connectors/kafka/KafkaTestEnvironmentImpl.java | 19 ++ .../connector/source/SplitEnumeratorContext.java | 2 +- .../examples/statemachine/StateMachineExample.java | 69 +++--- .../coordinator/SourceCoordinatorContext.java | 23 +- .../coordinator/SourceCoordinatorContextTest.java | 49 +++- .../coordinator/SourceCoordinatorTestBase.java | 7 +- .../io/StreamMultipleInputProcessorFactory.java| 6 +- .../runtime/tasks/SourceOperatorStreamTask.java| 18 +- 19 files changed, 651 insertions(+), 146 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceLegacyITCase.java
[flink] 09/12: [FLINK-20114][connector/kafka] Auto offset commit should be disabled by default.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit a4360c7f4128eb7ec97c9707b184ab121e829312 Author: Dong Lin AuthorDate: Sun Mar 7 23:05:02 2021 +0800 [FLINK-20114][connector/kafka] Auto offset commit should be disabled by default. --- .../java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java | 1 + 1 file changed, 1 insertion(+) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java index 73ba675..8ede378 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java @@ -435,6 +435,7 @@ public class KafkaSourceBuilder { true); maybeOverride( ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new Random().nextLong(), false); +maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", false); maybeOverride( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, startingOffsetsInitializer.getAutoOffsetResetStrategy().name().toLowerCase(),
[flink] 05/12: [FLINK-20114][connector/kafka] KafkaSourceEnumerator should close the admin client early if periodic partition discovery is disabled.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit 4c6c42392e7d116dee572fefb2e4e0e02abacefb Author: Dong Lin AuthorDate: Sun Mar 7 13:10:08 2021 +0800 [FLINK-20114][connector/kafka] KafkaSourceEnumerator should close the admin client early if periodic partition discovery is disabled. --- .../kafka/source/enumerator/KafkaSourceEnumerator.java | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java index bc34630..92ac770 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java @@ -149,7 +149,15 @@ public class KafkaSourceEnumerator + "without periodic partition discovery.", consumerGroupId); context.callAsync( -this::discoverAndInitializePartitionSplit, this::handlePartitionSplitChanges); +() -> { +try { +return discoverAndInitializePartitionSplit(); +} finally { +// Close the admin client early because we won't use it anymore. +adminClient.close(); +} +}, +this::handlePartitionSplitChanges); } }
[flink] 02/12: [FLINK-20114][connector/common] SourceCoordinatorContext should not log and fail job again if it receives InterruptedException after it is closed.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git commit b0077c4a4a0f867069e428b1fa13ee87fc5a5556 Author: Dong Lin AuthorDate: Sat Mar 6 12:45:23 2021 +0800 [FLINK-20114][connector/common] SourceCoordinatorContext should not log and fail job again if it receives InterruptedException after it is closed. --- .../connector/source/SplitEnumeratorContext.java | 2 +- .../coordinator/SourceCoordinatorContext.java | 23 ++ .../coordinator/SourceCoordinatorContextTest.java | 49 +- .../coordinator/SourceCoordinatorTestBase.java | 7 +++- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java b/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java index bef1666..66b3ef4 100644 --- a/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java +++ b/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java @@ -110,7 +110,7 @@ public interface SplitEnumeratorContext { /** * Invoke the given callable periodically and handover the return value to the handler which * will be executed by the source coordinator. When this method is invoked multiple times, The - * Coallbles may be executed in a thread pool concurrently. + * Callables may be executed in a thread pool concurrently. * * It is important to make sure that the callable does not modify any shared state, * especially the states that will be a part of the {@link SplitEnumerator#snapshotState()}. diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java index a262807..6c0dcd9 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java @@ -19,6 +19,7 @@ limitations under the License. package org.apache.flink.runtime.source.coordinator; import org.apache.flink.annotation.Internal; +import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.connector.source.ReaderInfo; import org.apache.flink.api.connector.source.SourceEvent; import org.apache.flink.api.connector.source.SourceSplit; @@ -54,6 +55,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; @@ -94,6 +96,7 @@ public class SourceCoordinatorContext private final SourceCoordinatorProvider.CoordinatorExecutorThreadFactory coordinatorThreadFactory; private final String coordinatorThreadName; +private volatile boolean closed; public SourceCoordinatorContext( ExecutorService coordinatorExecutor, @@ -103,18 +106,22 @@ public class SourceCoordinatorContext SimpleVersionedSerializer splitSerializer) { this( coordinatorExecutor, +Executors.newScheduledThreadPool( +numWorkerThreads, +new ExecutorThreadFactory( + coordinatorThreadFactory.getCoordinatorThreadName() + "-worker")), coordinatorThreadFactory, -numWorkerThreads, operatorCoordinatorContext, splitSerializer, new SplitAssignmentTracker<>()); } // Package private method for unit test. +@VisibleForTesting SourceCoordinatorContext( ExecutorService coordinatorExecutor, +ScheduledExecutorService workerExecutor, SourceCoordinatorProvider.CoordinatorExecutorThreadFactory coordinatorThreadFactory, -int numWorkerThreads, OperatorCoordinator.Context operatorCoordinatorContext, SimpleVersionedSerializer splitSerializer, SplitAssignmentTracker splitAssignmentTracker) { @@ -132,12 +139,7 @@ public class SourceCoordinatorContext new ThrowableCatchingRunnable( this::handleUncaughtExceptionFromAsyncCall, runnable)); -this.notifier = -new ExecutorNotifier( -Executors.newScheduledThreadPool( -numWorkerThreads, -new ExecutorThreadFactory(coordinatorThrea
[flink-ml] 05/05: [FLINK-21976] Update README.md to include overview, build, contributing and license sections
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit 08d058046f34b711128e0646ffbdc7e384c22064 Author: Dong Lin AuthorDate: Mon Mar 29 17:00:54 2021 +0800 [FLINK-21976] Update README.md to include overview, build, contributing and license sections --- README.md | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c2031eb..e5d4eaf 100644 --- a/README.md +++ b/README.md @@ -1 +1,17 @@ -# flink-ml +Flink ML is a library which provides machine learning (ML) APIs and libraries that simplify the building of machine learning pipelines. It provides a set of standard ML APIs for MLlib developers to implement ML algorithms, as well as libraries of ML algorithms that can be used to build ML pipelines for both training and inference jobs. + +Flink ML is developed under the umbrella of [Apache Flink](https://flink.apache.org/). + +## Building the Project + +Run the `mvn clean package` command. + +Then You will find a JAR file that contains your application, plus any libraries that you may have added as dependencies to the application: `target/-.jar`. + +## Contributing + +You can learn more about how to contribute in the [Apache Flink website](https://flink.apache.org/contributing/how-to-contribute.html). For code contributions, please read carefully the [Contributing Code](https://flink.apache.org/contributing/contribute-code.html) section for an overview of ongoing community work. + +## License + +The code in this repository is licensed under the [Apache Software License 2](LICENSE).
[flink-ml] 02/05: [FLINK-21976] Add CODE_OF_CONDUCT.md, LICENSE and .gitignore
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit f8b84cc202ecbe7de2ca26651191b3991319 Author: Dong Lin AuthorDate: Sun Mar 28 19:28:53 2021 +0800 [FLINK-21976] Add CODE_OF_CONDUCT.md, LICENSE and .gitignore --- .gitignore | 18 + CODE_OF_CONDUCT.md | 3 + LICENSE| 201 + 3 files changed, 222 insertions(+) diff --git a/.gitignore b/.gitignore new file mode 100644 index 000..afd1e95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +.cache +.classpath +.idea +.metadata +.settings +.project +target +.version.properties +*.class +*.iml +*.swp +*.jar +*.zip +*.log +*.pyc +.DS_Store +*.ipr +*.iws diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000..8821b1e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,3 @@ +# Code of Conduct + +Apache Flink and all its associated repositories follow the [Code of Conduct of the Apache Software Foundation](https://www.apache.org/foundation/policies/conduct). diff --git a/LICENSE b/LICENSE new file mode 100644 index 000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 +http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +
[flink-ml] 04/05: [FLINK-21976] Update pom.xml
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit 8e2415bfb2ddb00074dc5a44ffd9c828d63af9c7 Author: Dong Lin AuthorDate: Sun Mar 28 16:51:46 2021 +0800 [FLINK-21976] Update pom.xml --- flink-ml-api/pom.xml | 6 +- flink-ml-lib/pom.xml | 14 +- flink-ml-uber/.pom.xml.swo | Bin 12288 -> 0 bytes flink-ml-uber/pom.xml | 8 +- pom.xml| 578 - 5 files changed, 586 insertions(+), 20 deletions(-) diff --git a/flink-ml-api/pom.xml b/flink-ml-api/pom.xml index dd7863d..81fdcc7 100644 --- a/flink-ml-api/pom.xml +++ b/flink-ml-api/pom.xml @@ -25,17 +25,17 @@ under the License. org.apache.flink flink-ml-parent -1.13-SNAPSHOT +0.1-SNAPSHOT flink-ml-api - Flink : ML : API + Flink ML : API org.apache.flink flink-table-api-java - ${project.version} + ${flink.version} provided diff --git a/flink-ml-lib/pom.xml b/flink-ml-lib/pom.xml index d6ca639..bd5f3ac 100644 --- a/flink-ml-lib/pom.xml +++ b/flink-ml-lib/pom.xml @@ -23,41 +23,41 @@ under the License. org.apache.flink flink-ml-parent -1.13-SNAPSHOT +0.1-SNAPSHOT flink-ml-lib_${scala.binary.version} - Flink : ML : Lib + Flink ML : Lib org.apache.flink flink-ml-api - ${project.version} + ${flink.version} provided org.apache.flink flink-table-api-java - ${project.version} + ${flink.version} provided org.apache.flink flink-table-api-java-bridge_${scala.binary.version} - ${project.version} + ${flink.version} provided org.apache.flink flink-table-planner_${scala.binary.version} - ${project.version} + ${flink.version} test org.apache.flink flink-clients_${scala.binary.version} - ${project.version} + ${flink.version} provided diff --git a/flink-ml-uber/.pom.xml.swo b/flink-ml-uber/.pom.xml.swo deleted file mode 100644 index 6d87542..000 Binary files a/flink-ml-uber/.pom.xml.swo and /dev/null differ diff --git a/flink-ml-uber/pom.xml b/flink-ml-uber/pom.xml index f28a27f..ae9850b 100644 --- a/flink-ml-uber/pom.xml +++ b/flink-ml-uber/pom.xml @@ -23,11 +23,11 @@ under the License. org.apache.flink flink-ml-parent -1.13-SNAPSHOT +0.1-SNAPSHOT flink-ml-uber_${scala.binary.version} - Flink : ML : Uber + Flink ML : Uber This module contains both the api and libraries for writing Flink ML programs. @@ -38,12 +38,12 @@ under the License. org.apache.flink flink-ml-api - ${project.version} + ${flink.version} org.apache.flink flink-ml-lib_${scala.binary.version} - ${project.version} + ${flink.version} diff --git a/pom.xml b/pom.xml index d5a5018..0e9479a 100644 --- a/pom.xml +++ b/pom.xml @@ -23,20 +23,586 @@ under the License. 4.0.0 -org.apache.flink -flink-parent -1.13-SNAPSHOT -.. +org.apache +apache +23 flink-ml-parent - Flink : ML : - + org.apache.flink + Flink ML + 0.1-SNAPSHOT pom + http://flink.apache.org + 2019 + + + + The Apache Software License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + +https://github.com/apache/flink-ml +g...@github.com:apache/flink-ml.git + scm:git:https://gitbox.apache.org/repos/asf/flink-ml.git + + flink-ml-api flink-ml-lib flink-ml-uber + + +12.0 +2.11 +2.10.1 +1.8 +2.4.2 +4.12 +1C +true +1.12.1 + + + +**/*Test.* + + + + + + + + junit + junit + jar + test + + + + + + +org.apache.flink +flink-shaded-jackson +${jackson.version}-${flink.shaded.version} + + + +junit +junit +${junit.version} + + + + + + + + release + + + release + + + +1.8 + + + + +org.apache.maven.plugins +maven-gpg-plugin +1.4 + + +sign-artifacts +verify + + sign + + + + + +org.apache.maven.plugins +maven-enforcer-plugin + + +enforce-maven + + enforce + + + + + 1.
[flink-ml] 03/05: [FLINK-21976] Add files needed for checkstyle under tools/maven
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit bdb1b93130254dcdb868cc48b3873db825312c72 Author: Dong Lin AuthorDate: Sun Mar 28 22:07:10 2021 +0800 [FLINK-21976] Add files needed for checkstyle under tools/maven --- tools/maven/checkstyle.xml | 562 +++ tools/maven/suppressions.xml | 85 +++ 2 files changed, 647 insertions(+) diff --git a/tools/maven/checkstyle.xml b/tools/maven/checkstyle.xml new file mode 100644 index 000..2048fd1 --- /dev/null +++ b/tools/maven/checkstyle.xml @@ -0,0 +1,562 @@ + + +http://www.puppycrawl.com/dtds/configuration_1_3.dtd
[flink-ml] branch master updated (ea541d7 -> 08d0580)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git. from ea541d7 Initialize repository. new 8f86ecf [FLINK-21976] Move files under flink/flink-ml-parent to flink-ml repo new f8b84cc [FLINK-21976] Add CODE_OF_CONDUCT.md, LICENSE and .gitignore new bdb1b93 [FLINK-21976] Add files needed for checkstyle under tools/maven new 8e2415b [FLINK-21976] Update pom.xml new 08d0580 [FLINK-21976] Update README.md to include overview, build, contributing and license sections The 5 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .gitignore | 18 + CODE_OF_CONDUCT.md | 3 + LICENSE| 201 +++ README.md | 18 +- flink-ml-api/pom.xml | 47 ++ .../org/apache/flink/ml/api/core/Estimator.java| 47 ++ .../java/org/apache/flink/ml/api/core/Model.java | 37 ++ .../org/apache/flink/ml/api/core/Pipeline.java | 259 + .../apache/flink/ml/api/core/PipelineStage.java| 46 ++ .../org/apache/flink/ml/api/core/Transformer.java | 42 ++ .../apache/flink/ml/api/misc/param/ParamInfo.java | 151 + .../flink/ml/api/misc/param/ParamInfoFactory.java | 134 + .../flink/ml/api/misc/param/ParamValidator.java| 39 ++ .../org/apache/flink/ml/api/misc/param/Params.java | 277 ++ .../apache/flink/ml/api/misc/param/WithParams.java | 60 ++ .../flink/ml/util/param/ExtractParamInfosUtil.java | 71 +++ .../org/apache/flink/ml/api/core/PipelineTest.java | 167 ++ .../org/apache/flink/ml/api/misc/ParamsTest.java | 179 ++ .../ml/util/param/ExtractParamInfosUtilTest.java | 109 flink-ml-lib/pom.xml | 86 +++ .../org/apache/flink/ml/common/MLEnvironment.java | 151 + .../flink/ml/common/MLEnvironmentFactory.java | 116 .../org/apache/flink/ml/common/linalg/BLAS.java| 234 .../apache/flink/ml/common/linalg/DenseMatrix.java | 577 +++ .../apache/flink/ml/common/linalg/DenseVector.java | 379 + .../apache/flink/ml/common/linalg/MatVecOp.java| 307 +++ .../flink/ml/common/linalg/SparseVector.java | 574 +++ .../org/apache/flink/ml/common/linalg/Vector.java | 89 +++ .../flink/ml/common/linalg/VectorIterator.java | 73 +++ .../apache/flink/ml/common/linalg/VectorUtil.java | 240 .../org/apache/flink/ml/common/mapper/Mapper.java | 79 +++ .../flink/ml/common/mapper/MapperAdapter.java | 46 ++ .../apache/flink/ml/common/mapper/ModelMapper.java | 66 +++ .../flink/ml/common/mapper/ModelMapperAdapter.java | 62 +++ .../common/model/BroadcastVariableModelSource.java | 47 ++ .../apache/flink/ml/common/model/ModelSource.java | 40 ++ .../flink/ml/common/model/RowsModelSource.java | 46 ++ .../basicstatistic/MultivariateGaussian.java | 137 + .../ml/common/utils/DataSetConversionUtil.java | 167 ++ .../ml/common/utils/DataStreamConversionUtil.java | 167 ++ .../flink/ml/common/utils/OutputColsHelper.java| 211 +++ .../apache/flink/ml/common/utils/TableUtil.java| 439 +++ .../apache/flink/ml/common/utils/VectorTypes.java | 43 ++ .../org/apache/flink/ml/operator/AlgoOperator.java | 186 +++ .../flink/ml/operator/batch/BatchOperator.java | 113 .../operator/batch/source/TableSourceBatchOp.java | 40 ++ .../flink/ml/operator/stream/StreamOperator.java | 114 .../stream/source/TableSourceStreamOp.java | 40 ++ .../flink/ml/params/shared/HasMLEnvironmentId.java | 43 ++ .../ml/params/shared/colname/HasOutputCol.java | 48 ++ .../shared/colname/HasOutputColDefaultAsNull.java | 49 ++ .../ml/params/shared/colname/HasOutputCols.java| 48 ++ .../shared/colname/HasOutputColsDefaultAsNull.java | 49 ++ .../ml/params/shared/colname/HasPredictionCol.java | 42 ++ .../shared/colname/HasPredictionDetailCol.java | 47 ++ .../ml/params/shared/colname/HasReservedCols.java | 45 ++ .../ml/params/shared/colname/HasSelectedCol.java | 48 ++ .../colname/HasSelectedColDefaultAsNull.java | 49 ++ .../ml/params/shared/colname/HasSelectedCols.java | 48 ++ .../colname/HasSelectedColsDefaultAsNull.java | 49 ++ .../apache/flink/ml/pipeline/EstimatorBase.java| 103 .../org/apache/flink/ml/pipeline/ModelBase.java| 68 +++ .../flink/ml/pipeline/PipelineStageBase.java | 72 +++ .../apache/flink/ml/pipeline/TransformerBase.java | 99 .../apache/flink/ml/common/MLEnviron
[flink-ml] 01/01: Initialize repository.
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git commit ea541d70e4a31006ba0a06d8465e77dda81528f7 Author: Jiangjie (Becket) Qin AuthorDate: Mon Mar 29 14:19:04 2021 +0800 Initialize repository. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md new file mode 100644 index 000..c2031eb --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# flink-ml
[flink-ml] branch master created (now ea541d7)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink-ml.git. at ea541d7 Initialize repository. This branch includes the following new commits: new ea541d7 Initialize repository. The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[flink] branch master updated (3582bb2 -> 844601b)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 3582bb2 [FLINK-21811][blink-table-planner] Support StreamExecJoin json serialization/deserialization add 844601b [FLINK-21160][connector/kafka] Use deserializer class instance instead of class name to avoid NPE when invoking getProducedType (#14784) No new revisions were added by this update. Summary of changes: .../KafkaValueOnlyDeserializerWrapper.java | 9 ++--- .../connector/kafka/source/KafkaSourceITCase.java | 47 ++ 2 files changed, 51 insertions(+), 5 deletions(-)
[flink] branch master updated (cb987a1 -> 2339616)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from cb987a1 [hotfix][connectors/jdbc] Use full checkpoint ID in XA global transaction ID add 133385e [FLINK-20379][connector/kafka] Rename KafkaRecordDeserializer to KafkaRecordDeserializationSchema to follow the naming convention. add 0f89bc5 [FLINK-20379][connector/common] Add a method of getUserCodeClassLoader() method to the SourceReaderContext. add 1b7939b [FLINK-20379][connector/kafka] Added methods valueOnly(...) and open(..) in the KafkaRecordDeserializationSchema interface to enable the reuse of the DeserializationSchema and KafkaDeserializationSchema. add 3102bcf [FLINK-20379][connector/kafka] Add a convenient method setValueOnlyDeserializer(DeserializationSchema) to KafkaSourceBuilder. add 2339616 [FLINK-20379][connector/common][test] Add TestingDeserializationContext and KafkaRecordDeserializationSchemaTest No new revisions were added by this update. Summary of changes: .../file/src/FileSourceHeavyThroughputTest.java| 7 ++ .../flink/connector/kafka/source/KafkaSource.java | 27 - .../connector/kafka/source/KafkaSourceBuilder.java | 29 - .../source/reader/KafkaPartitionSplitReader.java | 8 +- .../KafkaDeserializationSchemaWrapper.java | 65 ++ .../KafkaRecordDeserializationSchema.java | 131 + .../deserializer/KafkaRecordDeserializer.java | 72 --- ...afkaValueOnlyDeserializationSchemaWrapper.java} | 35 +++--- ...java => KafkaValueOnlyDeserializerWrapper.java} | 30 - .../connector/kafka/source/KafkaSourceITCase.java | 11 +- .../reader/KafkaPartitionSplitReaderTest.java | 15 ++- .../kafka/source/reader/KafkaSourceReaderTest.java | 11 +- .../KafkaRecordDeserializationSchemaTest.java | 123 +++ .../api/connector/source/SourceReaderContext.java | 9 ++ .../source/lib/NumberSequenceSourceTest.java | 7 ++ .../streaming/api/operators/SourceOperator.java| 19 +++ .../TestingDeserializationContext.java | 31 ++--- .../source/reader/SourceReaderTestBase.java| 2 +- .../source/reader/TestingReaderContext.java| 7 ++ 19 files changed, 491 insertions(+), 148 deletions(-) create mode 100644 flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaDeserializationSchemaWrapper.java create mode 100644 flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaRecordDeserializationSchema.java delete mode 100644 flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaRecordDeserializer.java copy flink-connectors/{flink-connector-gcp-pubsub/src/main/java/org/apache/flink/streaming/connectors/gcp/pubsub/DeserializationSchemaWrapper.java => flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaValueOnlyDeserializationSchemaWrapper.java} (58%) rename flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/{ValueDeserializerWrapper.java => KafkaValueOnlyDeserializerWrapper.java} (72%) create mode 100644 flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaRecordDeserializationSchemaTest.java copy flink-runtime/src/test/java/org/apache/flink/runtime/metrics/util/TestReporter.java => flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/deserialization/TestingDeserializationContext.java (54%)
[flink] branch release-1.12 updated (9f7c7be -> 4b1212d)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch release-1.12 in repository https://gitbox.apache.org/repos/asf/flink.git. from 9f7c7be [hotfix][connectors/kafka] Correctly check required configs in KafkaSourceBuilder add 4b1212d [FLINK-21178][Runtime/Checkpointing] Task failure should trigger master hook's reset() (#14890) No new revisions were added by this update. Summary of changes: .../runtime/checkpoint/CheckpointCoordinator.java | 6 +-- .../checkpoint/CheckpointCoordinatorTest.java | 58 ++ 2 files changed, 61 insertions(+), 3 deletions(-)
[flink] branch master updated (01794fe -> a08ba48)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/flink.git. from 01794fe [FLINK-21542][docs][table] Add documentation for supporting INSERT INTO specific columns add a08ba48 [FLINK-21178][runtime/checkpoint] Task failure will not trigger master hook's reset() (re-merge after rebase). (#15067) No new revisions were added by this update. Summary of changes: .../runtime/checkpoint/CheckpointCoordinator.java | 6 +-- .../checkpoint/CheckpointCoordinatorTest.java | 53 ++ 2 files changed, 56 insertions(+), 3 deletions(-)
[flink] branch master updated: [FLINK-21178][Runtime/Checkpointing] Task failure should trigger master hook's reset() (#14890)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/master by this push: new 816ce96 [FLINK-21178][Runtime/Checkpointing] Task failure should trigger master hook's reset() (#14890) 816ce96 is described below commit 816ce969df408dcaff52c6341be9299ffaa61805 Author: Brian Zhou AuthorDate: Wed Mar 3 09:03:16 2021 +0800 [FLINK-21178][Runtime/Checkpointing] Task failure should trigger master hook's reset() (#14890) --- .../runtime/checkpoint/CheckpointCoordinator.java | 6 +-- .../checkpoint/CheckpointCoordinatorTest.java | 58 ++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java index 38ddee5..c84ca91 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java @@ -1536,13 +1536,13 @@ public class CheckpointCoordinator { throw new IllegalStateException("No completed checkpoint available"); } +LOG.debug("Resetting the master hooks."); +MasterHooks.reset(masterHooks.values(), LOG); + if (operatorCoordinatorRestoreBehavior == OperatorCoordinatorRestoreBehavior.RESTORE_OR_RESET) { // we let the JobManager-side components know that there was a recovery, // even if there was no checkpoint to recover from, yet -LOG.debug("Resetting the master hooks."); -MasterHooks.reset(masterHooks.values(), LOG); - LOG.info("Resetting the Operator Coordinators to an empty state."); restoreStateToCoordinators( OperatorCoordinator.NO_CHECKPOINT, Collections.emptyMap()); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java index f8cbf4d..bb9bb15 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java @@ -3351,6 +3351,26 @@ public class CheckpointCoordinatorTest extends TestLogger { } @Test +public void testResetCalledInRegionRecovery() throws Exception { +final JobID jobId = new JobID(); + +// set up the coordinator +CheckpointCoordinator checkpointCoordinator = +new CheckpointCoordinatorBuilder() +.setJobId(jobId) +.setTimer(manuallyTriggeredScheduledExecutor) +.build(); + +TestResetHook hook = new TestResetHook("id"); + +// Add a master hook +checkpointCoordinator.addMasterHook(hook); +assertFalse(hook.resetCalled); + checkpointCoordinator.restoreLatestCheckpointedStateToSubtasks(Collections.emptySet()); +assertTrue(hook.resetCalled); +} + +@Test public void testNotifyCheckpointAbortionInOperatorCoordinator() throws Exception { JobVertexID jobVertexID = new JobVertexID(); ExecutionGraph graph = @@ -3592,4 +3612,42 @@ public class CheckpointCoordinatorTest extends TestLogger { return invokeCounter; } } + +private static class TestResetHook implements MasterTriggerRestoreHook { + +private final String id; +boolean resetCalled; + +TestResetHook(String id) { +this.id = id; +this.resetCalled = false; +} + +@Override +public String getIdentifier() { +return id; +} + +@Override +public void reset() throws Exception { +resetCalled = true; +} + +@Override +public CompletableFuture triggerCheckpoint( +long checkpointId, long timestamp, Executor executor) { +throw new UnsupportedOperationException(); +} + +@Override +public void restoreCheckpoint(long checkpointId, @Nullable String checkpointData) +throws Exception { +throw new UnsupportedOperationException(); +} + +@Override +public SimpleVersionedSerializer createCheckpointDataSerializer() { +throw new UnsupportedOperationException(); +} +} }
[flink] branch release-1.12 updated: [FLINK-20848][connector/kafka] Fix Kafka consumer client ID with subtask ID suffix (#14556)
This is an automated email from the ASF dual-hosted git repository. jqin pushed a commit to branch release-1.12 in repository https://gitbox.apache.org/repos/asf/flink.git The following commit(s) were added to refs/heads/release-1.12 by this push: new e203e24 [FLINK-20848][connector/kafka] Fix Kafka consumer client ID with subtask ID suffix (#14556) e203e24 is described below commit e203e24cf775e4d5cdd0469b9db3cf2894b2f9c5 Author: Qingsheng Ren AuthorDate: Wed Jan 13 10:29:05 2021 +0800 [FLINK-20848][connector/kafka] Fix Kafka consumer client ID with subtask ID suffix (#14556) --- .../flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java index 7d9e7ba..c1a827c 100644 --- a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java +++ b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java @@ -77,6 +77,7 @@ public class KafkaPartitionSplitReader public KafkaPartitionSplitReader( Properties props, KafkaRecordDeserializer deserializationSchema, int subtaskId) { +this.subtaskId = subtaskId; Properties consumerProps = new Properties(); consumerProps.putAll(props); consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, createConsumerClientId(props)); @@ -85,7 +86,6 @@ public class KafkaPartitionSplitReader this.deserializationSchema = deserializationSchema; this.collector = new SimpleCollector<>(); this.groupId = consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG); -this.subtaskId = subtaskId; } @Override