svn commit: r72258 - /release/flink/KEYS

2024-10-10 Thread jqin
Author: jqin
Date: Fri Oct 11 06:57:01 2024
New Revision: 72258

Log:
[flink] Add Becket's public key

Modified:
release/flink/KEYS

Modified: release/flink/KEYS
==
--- release/flink/KEYS (original)
+++ release/flink/KEYS Fri Oct 11 06:57:01 2024
@@ -3931,3 +3931,62 @@ CQWjmoAACgkQU4tJ6bzwty+BmQEA7tp61tPgq9Wr
 8sLoOEoBAOCcCJiOv7KiplIac1xv8WHA5eacb5Bm2HGsX7zOTHMN
 =iyaV
 -END PGP PUBLIC KEY BLOCK-
+pub   rsa4096 2024-10-11 [SC]
+  A9AD7F73911D747F7ED3128E15E0F69F4424F39A
+uid   [ultimate] Jiangjie (Becket) Qin 
+sig 315E0F69F4424F39A 2024-10-11  [self-signature]
+sub   rsa4096 2024-10-11 [E]
+sig  15E0F69F4424F39A 2024-10-11  [self-signature]
+
+-BEGIN PGP PUBLIC KEY BLOCK-
+
+mQINBGcIv9oBEADdjqFEOv7aWGo12DZfeOh2C5pumMUJAG5rTiJpO5cl/a8sSwKw
+EhtkrEST+qQaMPutX4p2zE7SZinCFuyjfGFAz3pNlFtURWSHEz2wiPI3EArdrooe
+usAbASRil/bcj4LQIkDaLgiq6LnWXq5AQfttayOszMK/TmU2gnHMyQdjnMddNY1E
+0+GkmeTgRyQKy8GY2S7hS5+v0y8YJEvh1D34hBpZiF0rSSXPiA/LNanLTRSGYd0x
+zmXZkNKZDl51r4IdpoAnOCLvSWEZy2fq9khDIAEbuRfHZKYVaZ0rAXvmxCmgiMob
+j+QHeWHzAb3kbGLAJNaongIazZugtMlL5j0vEqCTTMPWuBk7jCd0ZSuxU8U+ksdY
+Hkac0flsIoEW8RacGFWY6w8r7Xj68f0dzYrsBk38S8GO4KIbTpT0dGQNxX1jN0pu
+d7nrAmQAJXvfBpufs0AtZtR7I9nNkS8ELvSCPim3sVpuk5YIJE9ccjwgrn+AetMx
+AMeMZg6NNNhlB9D9pZOAh7UqksQBqijMY1OB2d4UQ6LNMAHpFbIUhVDlwHREBlWl
+PVKW9J1OzT0cd1xObf9OdbnMQp4GlC5alP3g3VNMk/3fCgLlxWDt7UsJS/r6LkbV
+q9QSS8TuvdY96A1dDr9upvxGoqE7GaMgGgEphLwfysI+nCvp4EjBzs+37QARAQAB
+tCdKaWFuZ2ppZSAoQmVja2V0KSBRaW4gPGpxaW5AYXBhY2hlLm9yZz6JAlEEEwEI
+ADsWIQSprX9zkR10f37TEo4V4PafRCTzmgUCZwi/2gIbAwULCQgHAgIiAgYVCgkI
+CwIEFgIDAQIeBwIXgAAKCRAV4PafRCTzmqtREADbSX1mBhPMTjNB//+bRfkfUaUI
+2FYr22hsYxeIiO+oJTRMsMAB1rP4MoYbUGQXshp21i+8wJDu49XBUs96ZnL/qPZk
+KUst3wXao8B/b9gpIWgz1piuBQP+ERLpRLFBGx1vtbSbT950l5AqlaC2KfKduYZg
+ndEYafpxElv8bsmms+GNVkfAXfQZhYoxZK/PyHFu6pY3gBL+nzvHNWmbXeGGrxLL
+J2mb6ustAFZEupN8zLkBfcD9F2c/Ozgn42dQUsIJkkajKF36j5nkz1F4bjKGG3DA
+YHwwHFRtuMdtgDg9Qc8ycOlPxtc2DJIcMMneKFVi2DeTPixviMtg10sFDa11t1cl
+sDtZk9tpSLWCcv0MDtux7qxZdNOutDUNStheEf0ywG1mtA59l01s3hmkKZe2trtY
+D2bCXvzr2n0+rTRgeoO31cozuprLHlxH/Hj9CnE9bDv23LCD1nl56QV+R6GVxsol
+BtJy+yxzPQRBeI5d3ss1/vOV3QiNfuBA+4c8Mm0kYcBE360TUgsCM6sMerWZQ6HW
+mZYVjU/XZq87x+Vh6G/O7ZohA7DLJkANxlzMvCDD2cH1d8d7/HuH+l/C80Kzs16s
+z1aj4fgfI3TuQiGvapDAZDfL3SDJUydOWvQNUCrrLFMy6xJitq3X09gJwdD8bg55
+dhoUW7pWAYBP8XAUqLkCDQRnCL/aARAArPWssV0jG95sLeMd+ydzVSLymw0eY0ZG
+njmqO86ymswRbnEZR/Y8NRgV1r9ZVhyB23796wtZ7dv8UjGxPZRiP5E0YAvY7cQS
+KMHKIfALzGzMEOKHTXlymgO4FK2iUT4z1juG0y9FB9OEyPrgoD68pjU8KRJDKrQp
+rBn80ZXOWd/lnFQCOipP26lTiq7EV1b/v0GBmPbKp4XNQBtnUyMndBEXXTeRmfJ5
+yf6h/9tGfa60Dy2PhTk4+X0CPFWe0X+K22eEwKYPn8K4UhtntbqQCNaj9xUVDkmN
+SEMaHwWxHluSwf1WK+qbFIYuVsu5q6UkbkXEuPRix/pjtl0kbuAg2ncpGYhQ3j1n
+/V2EeaXSj9fsmFVKBiQaPU53kQb56NII0ZS2mMkm0s1PoNZ7LBVpk4gVTq5YG7Fc
+osMLp3X7WaVFOXwbopgOcE8jW8JdnmGbMfM4S7Q4h9yKr3mSos9V3sI6QBmSzWcC
+3D4mgH7a+W3Cvz4E7MgUmA7Tg4uRgLWuiCM3VTfc9l7OvhOmxOhY3V/poyxS7VOL
+LXYGw0SWMAW7oEv88sV6cLiAXK+P4g7a94cYEt2jA27C1BCXNsvvkGv1RhrmH2gT
+ccyK9viSFBi90G2jXHHpd9b+MUfFIZt8ObvWx7Ola5Du+JblS1eD30x84dvxxUfs
+gYxzNkFY20cAEQEAAYkCNgQYAQgAIBYhBKmtf3ORHXR/ftMSjhXg9p9EJPOaBQJn
+CL/aAhsMAAoJEBXg9p9EJPOa4IkP/1n3blONpP+Efba/6b0+SlzGp6FkxtpH0yJm
+4OlwiPJ7IY5J06AP3ZAl5MaDd7p23FogN/wwaZ+szWdpbdUF5EX/oisppXYyCEdj
+3ELcTELGEFlcd/1LDtIHRKql4Y5vm9DbQessybthWWQoxtWWOAI6IRDrmFRobA51
+36IFMWQdfA2RdGAPrC3B+OmhI8emInDGEX+jzamLbqxQUL+YPytCpad3KtTp69B3
+mzJn89idzH6FT+NxucyxMsrixcZZeSPsNLR6BEW4WceHERwz2QZ6SqwV6Ss9H+89
++88Dg9THvO1kLGT3C42EZHPgDZzxCPCOH6OAk0/73vIObtgOwnEmkUj5v3hviHY9
+MqSabHgSow/xZoXPhJVMJd3qzgqgODO0GgupyG1sGTHnf3YoOHCsVr/EJtAe0+vH
+HytjUBghh71wJA0mvajs2Jqbz5pw6WQ60HeH7dfabyE6FnXBTxPAvErqyDkLnGaj
+qefAHR6TgCN8kLYikDOVcFG8Ti8ZpFiuOdf6Ivyq4SlHVQWVnBU+TiWe6bSevNCt
+302DaARiwKZCzSi8ROCNjhYlF/xA71oXA4u733cPXLVeISxeT9uHpvMmhJ3onJu4
+gn0az213T6Xzycl4sEWSiF2qGQ5RifH8nPlpr4SQ2FzvE7fseLVonxt6KIJHA270
++aNJBX6t
+=diOL
+-END PGP PUBLIC KEY BLOCK-




(flink) branch master updated: [FLINK-36292][Connectors/Common][test]: remove timeout to avoid timeout exception (#25371)

2024-09-26 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new cc0a8b3f69b [FLINK-36292][Connectors/Common][test]: remove timeout to 
avoid timeout exception (#25371)
cc0a8b3f69b is described below

commit cc0a8b3f69b8036534a5f0132bbe8f470d945ca0
Author: Luke Chen 
AuthorDate: Fri Sep 27 03:25:07 2024 +0900

[FLINK-36292][Connectors/Common][test]: remove timeout to avoid timeout 
exception (#25371)
---
 .../connector/base/source/reader/fetcher/SplitFetcherManagerTest.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java
 
b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java
index 21572b1c84d..c225ea8e718 100644
--- 
a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java
+++ 
b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcherManagerTest.java
@@ -71,7 +71,7 @@ public class SplitFetcherManagerTest {
 .hasRootCauseMessage("Artificial exception on closing the 
split reader.");
 }
 
-@Test(timeout = 3)
+@Test
 public void testCloseCleansUpPreviouslyClosedFetcher() throws Exception {
 final String splitId = "testSplit";
 // Set the queue capacity to 1 to make sure in this case the
@@ -94,7 +94,7 @@ public class SplitFetcherManagerTest {
 },
 "The idle fetcher should have been removed.");
 // Now close the fetcher manager. The fetcher manager closing should 
not block.
-fetcherManager.close(60_000);
+fetcherManager.close(Long.MAX_VALUE);
 }
 
 @Test



(flink) branch master updated: [FLINK-33681][Runtime/Metrics] Reuse input/output metrics of SourceOperator/SinkWriterOperator for task (#23998)

2024-08-21 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new 0309f13e8af [FLINK-33681][Runtime/Metrics] Reuse input/output metrics 
of SourceOperator/SinkWriterOperator for task (#23998)
0309f13e8af is described below

commit 0309f13e8af62f9b523e227a3a66ff59e838a1b4
Author: Zhanghao Chen 
AuthorDate: Thu Aug 22 06:31:44 2024 +0800

[FLINK-33681][Runtime/Metrics] Reuse input/output metrics of 
SourceOperator/SinkWriterOperator for task (#23998)
---
 .../base/source/reader/SourceMetricsITCase.java| 21 +
 .../groups/InternalOperatorIOMetricGroup.java  | 12 
 .../runtime/metrics/groups/TaskIOMetricGroup.java  | 17 +
 .../flink/runtime/testutils/InMemoryReporter.java  | 22 +-
 .../streaming/api/operators/SourceOperator.java|  5 +
 .../runtime/operators/sink/SinkWriterOperator.java | 15 +++
 .../test/streaming/runtime/SinkMetricsITCase.java  | 21 +
 .../streaming/runtime/SinkV2MetricsITCase.java | 20 
 8 files changed, 128 insertions(+), 5 deletions(-)

diff --git 
a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java
 
b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java
index b7631ef0311..3c315f694b9 100644
--- 
a/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java
+++ 
b/flink-connectors/flink-connector-base/src/test/java/org/apache/flink/connector/base/source/reader/SourceMetricsITCase.java
@@ -34,6 +34,7 @@ import org.apache.flink.metrics.Metric;
 import org.apache.flink.metrics.groups.OperatorMetricGroup;
 import org.apache.flink.runtime.metrics.MetricNames;
 import org.apache.flink.runtime.metrics.groups.InternalSourceReaderMetricGroup;
+import org.apache.flink.runtime.metrics.groups.TaskMetricGroup;
 import org.apache.flink.runtime.testutils.InMemoryReporter;
 import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
 import org.apache.flink.streaming.api.datastream.DataStream;
@@ -223,6 +224,26 @@ public class SourceMetricsITCase extends TestLogger {
 
assertThatGauge(metrics.get(MetricNames.SOURCE_IDLE_TIME)).isEqualTo(0L);
 }
 assertThat(subtaskWithMetrics).isEqualTo(numSplits);
+
+// Test operator I/O metrics are reused by task metrics
+List taskMetricGroups =
+reporter.findTaskMetricGroups(jobId, "MetricTestingSource");
+assertThat(taskMetricGroups).hasSize(parallelism);
+
+int subtaskWithTaskMetrics = 0;
+for (TaskMetricGroup taskMetricGroup : taskMetricGroups) {
+// there are only 2 splits assigned; so two groups will not update 
metrics
+if 
(taskMetricGroup.getIOMetricGroup().getNumRecordsInCounter().getCount() == 0) {
+continue;
+}
+
+subtaskWithTaskMetrics++;
+
assertThatCounter(taskMetricGroup.getIOMetricGroup().getNumRecordsInCounter())
+.isEqualTo(processedRecordsPerSubtask);
+
assertThatCounter(taskMetricGroup.getIOMetricGroup().getNumBytesInCounter())
+.isEqualTo(processedRecordsPerSubtask * 
MockRecordEmitter.RECORD_SIZE_IN_BYTES);
+}
+assertThat(subtaskWithTaskMetrics).isEqualTo(numSplits);
 }
 
 private static class LaggingTimestampAssigner
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java
index 31cf560ce78..0405b2d6e07 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/metrics/groups/InternalOperatorIOMetricGroup.java
@@ -97,4 +97,16 @@ public class InternalOperatorIOMetricGroup extends 
ProxyMetricGroup {
 
 private final Clock clock;
 
-private final Counter numBytesIn;
-private final Counter numBytesOut;
+private final SumCounter numBytesIn;
+private final SumCounter numBytesOut;
 private final SumCounter numRecordsIn;
 private final SumCounter numRecordsOut;
 private final Counter numBuffersOut;
@@ -95,8 +95,8 @@ public class TaskIOMetricGroup extends 
ProxyMetricGroup {
 public TaskIOMetricGroup(TaskMetricGroup parent, Clock clock) {
 super(parent);
 this.clock = clock;
-this.numBytesIn = counter(MetricNames.IO_NUM_BYTES_IN);
-this.numBytesOut = counter(MetricNames.IO_NUM_BYTES_OUT);
+this.numBytesIn

(flink) branch master updated: [FLINK-35924][Connectors / Common] delay the SplitReader closure to until all the emitted records are processed. (#25130)

2024-08-06 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new a15bf58da54 [FLINK-35924][Connectors / Common] delay the SplitReader 
closure to until all the emitted records are processed. (#25130)
a15bf58da54 is described below

commit a15bf58da5442deeb07ac2a1795a961a0ec75561
Author: Jiangjie (Becket) Qin 
AuthorDate: Tue Aug 6 08:31:59 2024 -0700

[FLINK-35924][Connectors / Common] delay the SplitReader closure to until 
all the emitted records are processed. (#25130)

This patch delays the SplitReader closure to wait until all the previously 
emitted records are processed. This is needed for some of the SplitReader 
implementation which stores the returned records in internal buffer to save a 
data copy. In that case, closing the SplitReader will result in corruption of 
the emitted but not yet processed records.
---
 .../base/source/reader/fetcher/SplitFetcher.java   | 45 
 .../source/reader/fetcher/SplitFetcherManager.java | 29 +++-
 .../reader/fetcher/SplitFetcherManagerTest.java| 79 +-
 .../source/reader/fetcher/SplitFetcherTest.java| 36 ++
 .../java/org/apache/flink/test/util/TestUtils.java | 38 +++
 5 files changed, 223 insertions(+), 4 deletions(-)

diff --git 
a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java
 
b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java
index 339686415ee..f05d7d16a45 100644
--- 
a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java
+++ 
b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SplitFetcher.java
@@ -20,6 +20,7 @@ package org.apache.flink.connector.base.source.reader.fetcher;
 
 import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.api.connector.source.SourceSplit;
+import org.apache.flink.connector.base.source.reader.RecordsBySplits;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import 
org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
@@ -32,10 +33,12 @@ import javax.annotation.concurrent.GuardedBy;
 
 import java.util.ArrayDeque;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Deque;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
@@ -82,6 +85,14 @@ public class SplitFetcher 
implements Runnable {
 
 private final Consumer> splitFinishedHook;
 
+/**
+ * A shutdown latch to help make sure the SplitReader is only closed after 
all the emitted
+ * records have been processed by the main reader thread. This is needed 
because in some cases,
+ * the records in the RecordsWithSplitIds may have not been 
processed when the split
+ * fetcher shuts down.
+ */
+private final CountDownLatch recordsProcessedLatch;
+
 SplitFetcher(
 int id,
 FutureCompletingBlockingQueue> 
elementsQueue,
@@ -97,6 +108,7 @@ public class SplitFetcher 
implements Runnable {
 this.shutdownHook = checkNotNull(shutdownHook);
 this.allowUnalignedSourceSplits = allowUnalignedSourceSplits;
 this.splitFinishedHook = splitFinishedHook;
+this.recordsProcessedLatch = new CountDownLatch(1);
 
 this.fetchTask =
 new FetchTask<>(
@@ -117,10 +129,25 @@ public class SplitFetcher 
implements Runnable {
 while (runOnce()) {
 // nothing to do, everything is inside #runOnce.
 }
+if (recordsProcessedLatch.getCount() > 0) {
+// Put an empty synchronization batch to the element queue.
+// When this batch is recycled, all the records emitted earlier
+// must have already been processed.
+elementsQueue.put(
+fetcherId(),
+new RecordsBySplits(Collections.emptyMap(), 
Collections.emptySet()) {
+@Override
+public void recycle() {
+super.recycle();
+recordsProcessedLatch.countDown();
+}
+});
+}
 } catch (Throwable t) {
 errorHandler.accept(t);
 } finally {
 

(flink) branch master updated: [FLIP-321] Update the docs to add migration periods for deprecated APIs. (#23865)

2023-12-15 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new d4a3687aacd [FLIP-321] Update the docs to add migration periods for 
deprecated APIs. (#23865)
d4a3687aacd is described below

commit d4a3687aacdea61920098dd7814776655fde19db
Author: Jiangjie (Becket) Qin 
AuthorDate: Fri Dec 15 16:11:42 2023 +0800

[FLIP-321] Update the docs to add migration periods for deprecated APIs. 
(#23865)
---
 docs/content/docs/ops/upgrading.md | 38 +++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/docs/content/docs/ops/upgrading.md 
b/docs/content/docs/ops/upgrading.md
index ce58a7848e5..cc7d5e28cd8 100644
--- a/docs/content/docs/ops/upgrading.md
+++ b/docs/content/docs/ops/upgrading.md
@@ -55,11 +55,43 @@ This table lists the `source` / `binary` compatibility 
guarantees for each annot
 |  `Experimental`  |{{< xmark >}}/{{< xmark >}} |{{< xmark >}}/{{< 
xmark >}} |{{< xmark >}}/{{< xmark >}} |
 
 {{< hint info >}}
-{{< label Example >}}  
-Code written against a `PublicEvolving` API in 1.15.2 will continue to run in 
1.15.3, without having to recompile the code.  
-That same code would have to be recompiled when upgrading to 1.16.0 though.
+{{< label Example >}}
+Consider the code written against a `Public` API in 1.15.2:
+* The code can continue to run when upgrading to Flink 1.15.3 without 
recompiling, because patch version upgrades for `Public` APIs guarantee 
`binary` compatibility.
+* The same code may have to be recompiled when upgrading from 1.15.x to 
1.16.0, because minor version upgrades for `Public` APIs only provide `source` 
compatibility, not `binary` compatibility.
+* Code change may be required when upgrading from 1.x to 2.x because major 
version upgrades for `Public` APIs provide neither `source` nor `binary` 
compatibility.
+
+Consider the code written against a `PublicEvolving` API in 1.15.2:
+* The code can continue to run when upgrading to Flink 1.15.3 without 
recompiling, because patch version upgrades for `PublicEvolving` APIs guarantee 
`binary` compatibility.
+* A code change may be required when upgrading from 1.15.x to Flink 1.16.0, 
because minor version upgrades for `PublicEvolving` APIs provide neither 
`source` nor binary compatibility.
 {{< /hint >}}
 
+### Deprecated API Migration Period
+When an API is deprecated, it is marked with the `@Deprecated` annotation and 
a deprecation message is added to the Javadoc.
+According to 
[FLIP-321](https://cwiki.apache.org/confluence/display/FLINK/FLIP-321%3A+Introduce+an+API+deprecation+process),
 
+starting from release 1.18, each deprecated API will have a guaranteed 
migration period depending on the API stability level:
+
+|Annotation|  Guaranteed Migration Period   |Could be 
removed after the migration period|
+|::|:--:|:-:|
+| `Public` |2 minor releases|  
  Next major version |
+| `PublicEvolving` |1 minor release |  
  Next minor version |
+|  `Experimental`  | 1 patch release for the affected minor release |  
  Next patch version |
+
+The source code of a deprecated API will be kept for at least the guaranteed 
migration period, 
+and may be removed at any point after the migration period has passed.
+
+{{< hint info >}}
+{{< label Example >}}
+Assuming a release sequence of 1.18, 1.19, 1.20, 2.0, 2.1, ..., 3.0,
+- if a `Public` API is deprecated in 1.18, it will not be removed until 2.0.
+- if a `Public` API is deprecated in 1.20, the source code will be kept in 2.0 
because the migration period is 2 minor releases. Also, because a `Public` API 
must maintain source compatibility throughout a major version, the source code 
will be kept for all the 2.x versions and removed in 3.0 at the earliest.
+- if a `PublicEvolving` API is deprecated in 1.18, it will be removed in 1.20 
at the earliest. 
+- if a `PublicEvolving` API is deprecated in 1.20, the source code will be 
kept in 2.0 because the migration period is 1 minor releases. The source code 
may be removed in 2.1 at the earliest.
+- if an `Experimental` API is deprecated in 1.18.0, the source code will be 
kept for 1.18.1 and removed in 1.18.2 at the earliest. Also, the source code 
can be removed in 1.19.0.  
+{{< /hint >}}
+
+Please check the 
[FLIP-321](https://cwiki.apache.org/confluence/display/FLINK/FLIP-321%3A+Introduce+an+API+deprecation+process)
 wiki for more details.
+
 ## Restarting Streaming Applications
 
 The line of action for upgrading a streaming application or m

[flink] branch master updated (b1a111c85c1 -> 5be4688e8b8)

2023-09-26 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


from b1a111c85c1 [hotfix][tests] Decrease the network memory size to make 
HybridShuffleITCase more stable
 add 5be4688e8b8 [FLINK-20767][table planner] Support filter push down on 
nested fields (#23313)

No new revisions were added by this update.

Summary of changes:
 .../utils/ApiExpressionDefaultVisitor.java |  6 ++
 .../utils/ResolvedExpressionDefaultVisitor.java|  6 ++
 .../expressions/ExpressionDefaultVisitor.java  |  5 ++
 .../flink/table/expressions/ExpressionVisitor.java |  4 +
 ...on.java => NestedFieldReferenceExpression.java} | 73 -
 .../expressions/converter/ExpressionConverter.java | 12 +++
 .../plan/abilities/source/FilterPushDownSpec.java  |  7 +-
 .../table/planner/plan/utils/FlinkRexUtil.scala|  3 +-
 .../planner/plan/utils/RexNodeExtractor.scala  | 48 ++-
 .../table/planner/factories/TestValuesCatalog.java |  2 +-
 .../planner/factories/TestValuesTableFactory.java  | 15 +++-
 .../PushFilterIntoTableSourceScanRuleTest.java | 63 +++
 .../flink/table/planner/utils/FilterUtils.java | 45 +--
 .../PushFilterIntoTableSourceScanRuleTest.xml  | 92 ++
 .../runtime/batch/sql/TableSourceITCase.scala  | 38 +
 .../runtime/stream/sql/TableSourceITCase.scala | 54 +
 16 files changed, 421 insertions(+), 52 deletions(-)
 copy 
flink-table/flink-table-common/src/main/java/org/apache/flink/table/expressions/{FieldReferenceExpression.java
 => NestedFieldReferenceExpression.java} (53%)



[flink] branch master updated: [FLINK-30424][DataStream API] Add source operator addSplits log when restore from state

2022-12-16 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new 528186b62da [FLINK-30424][DataStream API] Add source operator 
addSplits log when restore from state
528186b62da is described below

commit 528186b62da92ecde0fa308f1df0cc6f95495f4d
Author: Ran Tao 
AuthorDate: Thu Dec 15 13:18:32 2022 +0800

[FLINK-30424][DataStream API] Add source operator addSplits log when 
restore from state
---
 .../java/org/apache/flink/streaming/api/operators/SourceOperator.java| 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java
index 817d19dd795..3e962596e66 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/SourceOperator.java
@@ -333,6 +333,7 @@ public class SourceOperator extends AbstractStr
 // restore the state if necessary.
 final List splits = 
CollectionUtil.iterableToList(readerState.get());
 if (!splits.isEmpty()) {
+LOG.info("Restoring state for {} split(s) to reader.", 
splits.size());
 sourceReader.addSplits(splits);
 }
 



[flink] branch release-1.15 updated: Revert "[FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting."

2022-04-14 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.15
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.15 by this push:
 new 0c718666476 Revert "[FLINK-26394][checkpoint] Cancel the checkpoint 
completable future when checkpoint is aborting."
0c718666476 is described below

commit 0c718666476ae469fb825e8fdf362470d7af2488
Author: Jiangjie (Becket) Qin 
AuthorDate: Thu Apr 14 17:56:59 2022 +0800

Revert "[FLINK-26394][checkpoint] Cancel the checkpoint completable future 
when checkpoint is aborting."

This reverts commit 9fc89a05f128ab645b73687f240fb14b57790fc6 due to a
 FLINK-27148.
---
 .../runtime/checkpoint/CheckpointCoordinator.java  | 17 ++---
 .../runtime/checkpoint/PendingCheckpoint.java  |  7 +---
 .../checkpoint/CheckpointCoordinatorTest.java  | 44 --
 .../runtime/checkpoint/PendingCheckpointTest.java  |  3 +-
 4 files changed, 6 insertions(+), 65 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
index 2d47d79f063..72a6b7032de 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
@@ -536,8 +536,6 @@ public class CheckpointCoordinator {
 boolean initializeBaseLocations = 
!baseLocationsForCheckpointInitialized;
 baseLocationsForCheckpointInitialized = true;
 
-CompletableFuture masterTriggerCompletionPromise = new 
CompletableFuture<>();
-
 final CompletableFuture 
pendingCheckpointCompletableFuture =
 checkpointPlanFuture
 .thenApplyAsync(
@@ -562,8 +560,7 @@ public class CheckpointCoordinator {
 checkpointInfo.f0,
 request.isPeriodic,
 checkpointInfo.f1,
-
request.getOnCompletionFuture(),
-
masterTriggerCompletionPromise),
+
request.getOnCompletionFuture()),
 timer);
 
 final CompletableFuture coordinatorCheckpointsComplete =
@@ -618,12 +615,8 @@ public class CheckpointCoordinator {
 },
 timer);
 
-FutureUtils.forward(
-CompletableFuture.allOf(masterStatesComplete, 
coordinatorCheckpointsComplete),
-masterTriggerCompletionPromise);
-
 FutureUtils.assertNoException(
-masterTriggerCompletionPromise
+CompletableFuture.allOf(masterStatesComplete, 
coordinatorCheckpointsComplete)
 .handleAsync(
 (ignored, throwable) -> {
 final PendingCheckpoint checkpoint =
@@ -785,8 +778,7 @@ public class CheckpointCoordinator {
 CheckpointPlan checkpointPlan,
 boolean isPeriodic,
 long checkpointID,
-CompletableFuture onCompletionPromise,
-CompletableFuture masterTriggerCompletionPromise) {
+CompletableFuture onCompletionPromise) {
 
 synchronized (lock) {
 try {
@@ -811,8 +803,7 @@ public class CheckpointCoordinator {
 masterHooks.keySet(),
 props,
 onCompletionPromise,
-pendingCheckpointStats,
-masterTriggerCompletionPromise);
+pendingCheckpointStats);
 
 synchronized (lock) {
 pendingCheckpoints.put(checkpointID, checkpoint);
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
index 8ca6e22020e..b4bd8eacc5e 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
@@ -112,8 +112,6 @@ public class PendingCheckpoint implements Checkpoint {
 
 @Nullable private final PendingCheckpointStats pendingCheckpointStats;
 
-private final CompletableFuture masterTriggerCompletionPromise;
-
 /** Target storage location to persist the checkpoint metadata to. */
 @Nullable private CheckpointStorageLocation targetLocation;
 
@@ -138,8 +136,7 @@ public class PendingChe

[flink] branch release-1.15 updated: [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting.

2022-04-14 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.15
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.15 by this push:
 new 9fc89a05f12 [FLINK-26394][checkpoint] Cancel the checkpoint 
completable future when checkpoint is aborting.
9fc89a05f12 is described below

commit 9fc89a05f128ab645b73687f240fb14b57790fc6
Author: Gen Luo 
AuthorDate: Wed Apr 6 16:38:39 2022 +0800

[FLINK-26394][checkpoint] Cancel the checkpoint completable future when 
checkpoint is aborting.
---
 .../runtime/checkpoint/CheckpointCoordinator.java  | 17 +++--
 .../runtime/checkpoint/PendingCheckpoint.java  |  7 +++-
 .../checkpoint/CheckpointCoordinatorTest.java  | 44 ++
 .../runtime/checkpoint/PendingCheckpointTest.java  |  3 +-
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
index 72a6b7032de..2d47d79f063 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
@@ -536,6 +536,8 @@ public class CheckpointCoordinator {
 boolean initializeBaseLocations = 
!baseLocationsForCheckpointInitialized;
 baseLocationsForCheckpointInitialized = true;
 
+CompletableFuture masterTriggerCompletionPromise = new 
CompletableFuture<>();
+
 final CompletableFuture 
pendingCheckpointCompletableFuture =
 checkpointPlanFuture
 .thenApplyAsync(
@@ -560,7 +562,8 @@ public class CheckpointCoordinator {
 checkpointInfo.f0,
 request.isPeriodic,
 checkpointInfo.f1,
-
request.getOnCompletionFuture()),
+
request.getOnCompletionFuture(),
+
masterTriggerCompletionPromise),
 timer);
 
 final CompletableFuture coordinatorCheckpointsComplete =
@@ -615,8 +618,12 @@ public class CheckpointCoordinator {
 },
 timer);
 
+FutureUtils.forward(
+CompletableFuture.allOf(masterStatesComplete, 
coordinatorCheckpointsComplete),
+masterTriggerCompletionPromise);
+
 FutureUtils.assertNoException(
-CompletableFuture.allOf(masterStatesComplete, 
coordinatorCheckpointsComplete)
+masterTriggerCompletionPromise
 .handleAsync(
 (ignored, throwable) -> {
 final PendingCheckpoint checkpoint =
@@ -778,7 +785,8 @@ public class CheckpointCoordinator {
 CheckpointPlan checkpointPlan,
 boolean isPeriodic,
 long checkpointID,
-CompletableFuture onCompletionPromise) {
+CompletableFuture onCompletionPromise,
+CompletableFuture masterTriggerCompletionPromise) {
 
 synchronized (lock) {
 try {
@@ -803,7 +811,8 @@ public class CheckpointCoordinator {
 masterHooks.keySet(),
 props,
 onCompletionPromise,
-pendingCheckpointStats);
+pendingCheckpointStats,
+masterTriggerCompletionPromise);
 
 synchronized (lock) {
 pendingCheckpoints.put(checkpointID, checkpoint);
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
index b4bd8eacc5e..8ca6e22020e 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
@@ -112,6 +112,8 @@ public class PendingCheckpoint implements Checkpoint {
 
 @Nullable private final PendingCheckpointStats pendingCheckpointStats;
 
+private final CompletableFuture masterTriggerCompletionPromise;
+
 /** Target storage location to persist the checkpoint metadata to. */
 @Nullable private CheckpointStorageLocation targetLocation;
 
@@ -136,7 +138,8 @@ public class PendingCheckpoint implements Checkpoint {
 Collection masterStateIdentifiers,
 CheckpointProperties props,
 Completa

[flink] branch master updated: [FLINK-26394][checkpoint] Cancel the checkpoint completable future when checkpoint is aborting.

2022-04-07 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new 31222b9adf1 [FLINK-26394][checkpoint] Cancel the checkpoint 
completable future when checkpoint is aborting.
31222b9adf1 is described below

commit 31222b9adf1c354b22fd50c587efc16734b18d40
Author: Gen Luo 
AuthorDate: Wed Apr 6 16:38:39 2022 +0800

[FLINK-26394][checkpoint] Cancel the checkpoint completable future when 
checkpoint is aborting.
---
 .../runtime/checkpoint/CheckpointCoordinator.java  | 17 +++--
 .../runtime/checkpoint/PendingCheckpoint.java  |  7 +++-
 .../checkpoint/CheckpointCoordinatorTest.java  | 44 ++
 .../runtime/checkpoint/PendingCheckpointTest.java  |  3 +-
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
index 2efc034ca90..6fd6ad19fea 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
@@ -536,6 +536,8 @@ public class CheckpointCoordinator {
 boolean initializeBaseLocations = 
!baseLocationsForCheckpointInitialized;
 baseLocationsForCheckpointInitialized = true;
 
+CompletableFuture masterTriggerCompletionPromise = new 
CompletableFuture<>();
+
 final CompletableFuture 
pendingCheckpointCompletableFuture =
 checkpointPlanFuture
 .thenApplyAsync(
@@ -560,7 +562,8 @@ public class CheckpointCoordinator {
 checkpointInfo.f0,
 request.isPeriodic,
 checkpointInfo.f1,
-
request.getOnCompletionFuture()),
+
request.getOnCompletionFuture(),
+
masterTriggerCompletionPromise),
 timer);
 
 final CompletableFuture coordinatorCheckpointsComplete =
@@ -615,8 +618,12 @@ public class CheckpointCoordinator {
 },
 timer);
 
+FutureUtils.forward(
+CompletableFuture.allOf(masterStatesComplete, 
coordinatorCheckpointsComplete),
+masterTriggerCompletionPromise);
+
 FutureUtils.assertNoException(
-CompletableFuture.allOf(masterStatesComplete, 
coordinatorCheckpointsComplete)
+masterTriggerCompletionPromise
 .handleAsync(
 (ignored, throwable) -> {
 final PendingCheckpoint checkpoint =
@@ -778,7 +785,8 @@ public class CheckpointCoordinator {
 CheckpointPlan checkpointPlan,
 boolean isPeriodic,
 long checkpointID,
-CompletableFuture onCompletionPromise) {
+CompletableFuture onCompletionPromise,
+CompletableFuture masterTriggerCompletionPromise) {
 
 synchronized (lock) {
 try {
@@ -803,7 +811,8 @@ public class CheckpointCoordinator {
 masterHooks.keySet(),
 props,
 onCompletionPromise,
-pendingCheckpointStats);
+pendingCheckpointStats,
+masterTriggerCompletionPromise);
 
 synchronized (lock) {
 pendingCheckpoints.put(checkpointID, checkpoint);
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
index b4bd8eacc5e..8ca6e22020e 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/PendingCheckpoint.java
@@ -112,6 +112,8 @@ public class PendingCheckpoint implements Checkpoint {
 
 @Nullable private final PendingCheckpointStats pendingCheckpointStats;
 
+private final CompletableFuture masterTriggerCompletionPromise;
+
 /** Target storage location to persist the checkpoint metadata to. */
 @Nullable private CheckpointStorageLocation targetLocation;
 
@@ -136,7 +138,8 @@ public class PendingCheckpoint implements Checkpoint {
 Collection masterStateIdentifiers,
 CheckpointProperties props,
 CompletableFuture onCompleti

[flink] branch release-1.14 updated: [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext

2022-03-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.14 by this push:
 new 36136bc  [FLINK-26723][runtime]fix the error message thrown by 
SourceCoordinatorContext
36136bc is described below

commit 36136bc2bc33f0dc0add1303af949f681a7e42cd
Author: zoucao 
AuthorDate: Sat Mar 19 00:45:19 2022 +0800

[FLINK-26723][runtime]fix the error message thrown by 
SourceCoordinatorContext
---
 .../source/coordinator/SourceCoordinatorContext.java  | 19 +++
 .../coordinator/SourceCoordinatorContextTest.java |  2 +-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
index c07f62d..3c05fe4 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
@@ -178,14 +178,17 @@ public class SourceCoordinatorContext
 callInCoordinatorThread(
 () -> {
 // Ensure all the subtasks in the assignment have 
registered.
-for (Integer subtaskId : assignment.assignment().keySet()) 
{
-if (!registeredReaders.containsKey(subtaskId)) {
-throw new IllegalArgumentException(
-String.format(
-"Cannot assign splits %s to 
subtask %d because the subtask is not registered.",
-registeredReaders.get(subtaskId), 
subtaskId));
-}
-}
+assignment
+.assignment()
+.forEach(
+(id, splits) -> {
+if 
(!registeredReaders.containsKey(id)) {
+throw new IllegalArgumentException(
+String.format(
+"Cannot assign 
splits %s to subtask %d because the subtask is not registered.",
+splits, id));
+}
+});
 
 assignmentTracker.recordSplitAssignment(assignment);
 assignment
diff --git 
a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
 
b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
index d15f28e..380e65c 100644
--- 
a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
+++ 
b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
@@ -153,7 +153,7 @@ public class SourceCoordinatorContextTest extends 
SourceCoordinatorTestBase {
 }
 },
 "assignSplits() should fail to assign the splits to a reader 
that is not registered.",
-"Cannot assign splits");
+"Cannot assign splits " + 
splitsAssignment.assignment().get(0));
 }
 
 @Test


[flink] branch release-1.15 updated: [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext

2022-03-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.15
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.15 by this push:
 new e3992ab  [FLINK-26723][runtime]fix the error message thrown by 
SourceCoordinatorContext
e3992ab is described below

commit e3992ab17a4515076c287b8d12003e1449718df1
Author: zoucao 
AuthorDate: Sat Mar 19 00:45:19 2022 +0800

[FLINK-26723][runtime]fix the error message thrown by 
SourceCoordinatorContext
---
 .../source/coordinator/SourceCoordinatorContext.java  | 19 +++
 .../coordinator/SourceCoordinatorContextTest.java |  2 +-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
index 5e560fa..4434a18 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
@@ -195,14 +195,17 @@ public class SourceCoordinatorContext
 callInCoordinatorThread(
 () -> {
 // Ensure all the subtasks in the assignment have 
registered.
-for (Integer subtaskId : assignment.assignment().keySet()) 
{
-if (!registeredReaders.containsKey(subtaskId)) {
-throw new IllegalArgumentException(
-String.format(
-"Cannot assign splits %s to 
subtask %d because the subtask is not registered.",
-registeredReaders.get(subtaskId), 
subtaskId));
-}
-}
+assignment
+.assignment()
+.forEach(
+(id, splits) -> {
+if 
(!registeredReaders.containsKey(id)) {
+throw new IllegalArgumentException(
+String.format(
+"Cannot assign 
splits %s to subtask %d because the subtask is not registered.",
+splits, id));
+}
+});
 
 assignmentTracker.recordSplitAssignment(assignment);
 assignment
diff --git 
a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
 
b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
index d15f28e..380e65c 100644
--- 
a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
+++ 
b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
@@ -153,7 +153,7 @@ public class SourceCoordinatorContextTest extends 
SourceCoordinatorTestBase {
 }
 },
 "assignSplits() should fail to assign the splits to a reader 
that is not registered.",
-"Cannot assign splits");
+"Cannot assign splits " + 
splitsAssignment.assignment().get(0));
 }
 
 @Test


[flink] branch master updated: [FLINK-26723][runtime]fix the error message thrown by SourceCoordinatorContext

2022-03-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new da79677  [FLINK-26723][runtime]fix the error message thrown by 
SourceCoordinatorContext
da79677 is described below

commit da79677c9afcd6703ee5cd513740981530916f2f
Author: zoucao 
AuthorDate: Sat Mar 19 00:45:19 2022 +0800

[FLINK-26723][runtime]fix the error message thrown by 
SourceCoordinatorContext
---
 .../source/coordinator/SourceCoordinatorContext.java  | 19 +++
 .../coordinator/SourceCoordinatorContextTest.java |  2 +-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
index 5e560fa..4434a18 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
@@ -195,14 +195,17 @@ public class SourceCoordinatorContext
 callInCoordinatorThread(
 () -> {
 // Ensure all the subtasks in the assignment have 
registered.
-for (Integer subtaskId : assignment.assignment().keySet()) 
{
-if (!registeredReaders.containsKey(subtaskId)) {
-throw new IllegalArgumentException(
-String.format(
-"Cannot assign splits %s to 
subtask %d because the subtask is not registered.",
-registeredReaders.get(subtaskId), 
subtaskId));
-}
-}
+assignment
+.assignment()
+.forEach(
+(id, splits) -> {
+if 
(!registeredReaders.containsKey(id)) {
+throw new IllegalArgumentException(
+String.format(
+"Cannot assign 
splits %s to subtask %d because the subtask is not registered.",
+splits, id));
+}
+});
 
 assignmentTracker.recordSplitAssignment(assignment);
 assignment
diff --git 
a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
 
b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
index d15f28e..380e65c 100644
--- 
a/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
+++ 
b/flink-runtime/src/test/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContextTest.java
@@ -153,7 +153,7 @@ public class SourceCoordinatorContextTest extends 
SourceCoordinatorTestBase {
 }
 },
 "assignSplits() should fail to assign the splits to a reader 
that is not registered.",
-"Cannot assign splits");
+"Cannot assign splits " + 
splitsAssignment.assignment().get(0));
 }
 
 @Test


[flink] branch release-1.13 updated (965774c -> 6fb7807)

2022-02-23 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 965774c  [FLINK-25851][cassandra][tests] Inject dynamic table name 
into Pojos
 add e2a8990  [FLINK-24607] Let Deadline handle duration overflow.
 add 0774fed  [FLINK-24607] Add util methods to shutdown executor services.
 add 6fb7807  [FLINK-24607] Make OperatorCoordinator closure more robust.

No new revisions were added by this update.

Summary of changes:
 .../org/apache/flink/api/common/time/Deadline.java |  27 +++-
 .../coordination/ComponentClosingUtils.java|  95 +++-
 .../RecreateOnResetOperatorCoordinator.java|  12 +-
 .../source/coordinator/ExecutorNotifier.java   |  21 +--
 .../source/coordinator/SourceCoordinator.java  |  23 +--
 .../coordinator/SourceCoordinatorContext.java  |  20 +--
 .../coordinator/SourceCoordinatorProvider.java |  13 +-
 .../coordination/ComponentClosingUtilsTest.java| 172 +
 .../source/coordinator/ExecutorNotifierTest.java   |  14 +-
 .../source/coordinator/SourceCoordinatorTest.java  |  62 +++-
 .../coordinator/SourceCoordinatorTestBase.java |   9 +-
 .../ManuallyTriggeredScheduledExecutorService.java |   2 +-
 12 files changed, 382 insertions(+), 88 deletions(-)
 create mode 100644 
flink-runtime/src/test/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtilsTest.java


[flink] 03/03: [FLINK-24607] Make OperatorCoordinator closure more robust.

2022-02-22 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 0a76d632f33d9a69df87457a63043bd7f609ed40
Author: Jiangjie (Becket) Qin 
AuthorDate: Mon Feb 21 17:43:19 2022 +0800

[FLINK-24607] Make OperatorCoordinator closure more robust.
---
 .../RecreateOnResetOperatorCoordinator.java| 12 -
 .../source/coordinator/ExecutorNotifier.java   | 21 +---
 .../source/coordinator/SourceCoordinator.java  | 23 ++--
 .../coordinator/SourceCoordinatorContext.java  | 20 +++
 .../coordinator/SourceCoordinatorProvider.java | 13 +
 .../coordination/ComponentClosingUtilsTest.java|  3 +-
 .../source/coordinator/ExecutorNotifierTest.java   | 14 ++---
 .../source/coordinator/SourceCoordinatorTest.java  | 62 --
 .../coordinator/SourceCoordinatorTestBase.java |  9 ++--
 9 files changed, 96 insertions(+), 81 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
index 0d3d5f8..7d3d3ff 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
@@ -128,8 +128,16 @@ public class RecreateOnResetOperatorCoordinator implements 
OperatorCoordinator {
 // capture the status whether the coordinator was started when this 
method was called
 final boolean wasStarted = this.started;
 
-closingFuture.thenRun(
-() -> {
+closingFuture.whenComplete(
+(ignored, e) -> {
+if (e != null) {
+LOG.warn(
+String.format(
+"Received exception when closing "
++ "operator coordinator for 
%s.",
+oldCoordinator.operatorId),
+e);
+}
 if (!closed) {
 // The previous coordinator has closed. Create a new 
one.
 newCoordinator.createNewInternalCoordinator(context, 
provider);
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
index e52f6cd..fe4cf8a 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
@@ -25,23 +25,20 @@ import java.util.concurrent.Callable;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.BiConsumer;
 
 /**
  * This class is used to coordinate between two components, where one 
component has an executor
  * following the mailbox model and the other component notifies it when needed.
  */
-public class ExecutorNotifier implements AutoCloseable {
+public class ExecutorNotifier {
 private static final Logger LOG = 
LoggerFactory.getLogger(ExecutorNotifier.class);
 private final ScheduledExecutorService workerExecutor;
 private final Executor executorToNotify;
-private final AtomicBoolean closed;
 
 public ExecutorNotifier(ScheduledExecutorService workerExecutor, Executor 
executorToNotify) {
 this.executorToNotify = executorToNotify;
 this.workerExecutor = workerExecutor;
-this.closed = new AtomicBoolean(false);
 }
 
 /**
@@ -140,20 +137,4 @@ public class ExecutorNotifier implements AutoCloseable {
 periodMs,
 TimeUnit.MILLISECONDS);
 }
-
-/**
- * Close the executor notifier. This is a blocking call which waits for 
all the async calls to
- * finish before it returns.
- *
- * @throws InterruptedException when interrupted during closure.
- */
-public void close() throws InterruptedException {
-if (!closed.compareAndSet(false, true)) {
-LOG.debug("The executor notifier has been closed.");
-return;
-}
-// Shutdown the worker executor, so no more worker tasks can run.
-workerExecutor.shutdownNow();
-workerExecutor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
-}
 }
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java
 
b/flink-runtime/src/main/

[flink] 02/03: [FLINK-24607] Add util methods to shutdown executor services.

2022-02-22 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 6936ce6848e0544cb3275aa841ce40cac308540d
Author: Jiangjie (Becket) Qin 
AuthorDate: Fri Feb 11 16:13:50 2022 +0800

[FLINK-24607] Add util methods to shutdown executor services.
---
 .../coordination/ComponentClosingUtils.java|  95 ++-
 .../coordination/ComponentClosingUtilsTest.java| 173 +
 .../ManuallyTriggeredScheduledExecutorService.java |   2 +-
 3 files changed, 266 insertions(+), 4 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
index deed49e..4bfe302 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
@@ -18,16 +18,22 @@ limitations under the License.
 
 package org.apache.flink.runtime.operators.coordination;
 
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.api.common.time.Deadline;
+import org.apache.flink.util.clock.Clock;
+import org.apache.flink.util.clock.SystemClock;
 import org.apache.flink.util.concurrent.FutureUtils;
 import org.apache.flink.util.function.ThrowingRunnable;
 
 import java.time.Duration;
 import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
 /** A util class to help with a clean component shutdown. */
 public class ComponentClosingUtils {
+private static Clock clock = SystemClock.getInstance();
 
 /** Utility class, not meant to be instantiated. */
 private ComponentClosingUtils() {}
@@ -95,8 +101,91 @@ public class ComponentClosingUtils {
 return future;
 }
 
-static void abortThread(Thread t) {
-// the abortion strategy is pretty simple here...
-t.interrupt();
+/**
+ * A util method that tries to shut down an {@link ExecutorService} 
elegantly within the given
+ * timeout. If the executor has not been shut down before it hits timeout 
or the thread is
+ * interrupted when waiting for the termination, a forceful shutdown will 
be attempted on the
+ * executor.
+ *
+ * @param executor the {@link ExecutorService} to shut down.
+ * @param timeout the timeout duration.
+ * @return true if the given executor has been successfully closed, false 
otherwise.
+ */
+@SuppressWarnings("ResultOfMethodCallIgnored")
+public static boolean tryShutdownExecutorElegantly(ExecutorService 
executor, Duration timeout) {
+try {
+executor.shutdown();
+executor.awaitTermination(timeout.toMillis(), 
TimeUnit.MILLISECONDS);
+} catch (InterruptedException ie) {
+// Let it go.
+}
+if (!executor.isTerminated()) {
+shutdownExecutorForcefully(executor, Duration.ZERO, false);
+}
+return executor.isTerminated();
+}
+
+/**
+ * Shutdown the given executor forcefully within the given timeout. The 
method returns if it is
+ * interrupted.
+ *
+ * @param executor the executor to shut down.
+ * @param timeout the timeout duration.
+ * @return true if the given executor is terminated, false otherwise.
+ */
+public static boolean shutdownExecutorForcefully(ExecutorService executor, 
Duration timeout) {
+return shutdownExecutorForcefully(executor, timeout, true);
+}
+
+/**
+ * Shutdown the given executor forcefully within the given timeout.
+ *
+ * @param executor the executor to shut down.
+ * @param timeout the timeout duration.
+ * @param interruptable when set to true, the method can be interrupted. 
Each interruption to
+ * the thread results in another {@code ExecutorService.shutdownNow()} 
call to the shutting
+ * down executor.
+ * @return true if the given executor is terminated, false otherwise.
+ */
+public static boolean shutdownExecutorForcefully(
+ExecutorService executor, Duration timeout, boolean interruptable) 
{
+Deadline deadline = Deadline.fromNowWithClock(timeout, clock);
+boolean isInterrupted = false;
+do {
+executor.shutdownNow();
+try {
+executor.awaitTermination(deadline.timeLeft().toMillis(), 
TimeUnit.MILLISECONDS);
+} catch (InterruptedException e) {
+isInterrupted = interruptable;
+}
+} while (!isInterrupted && deadline.hasTimeLeft() && 
!executor.isTerminated());
+return executor.isTerminated();
+}

[flink] branch release-1.14 updated (1ff23ac -> 0a76d63)

2022-02-22 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 1ff23ac  [FLINK-25851][cassandra][tests] Inject dynamic table name 
into Pojos
 new 89046bc  [FLINK-24607] Let Deadline handle duration overflow.
 new 6936ce6  [FLINK-24607] Add util methods to shutdown executor services.
 new 0a76d63  [FLINK-24607] Make OperatorCoordinator closure more robust.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/flink/api/common/time/Deadline.java |  27 +++-
 .../coordination/ComponentClosingUtils.java|  95 +++-
 .../RecreateOnResetOperatorCoordinator.java|  12 +-
 .../source/coordinator/ExecutorNotifier.java   |  21 +--
 .../source/coordinator/SourceCoordinator.java  |  23 +--
 .../coordinator/SourceCoordinatorContext.java  |  20 +--
 .../coordinator/SourceCoordinatorProvider.java |  13 +-
 .../coordination/ComponentClosingUtilsTest.java| 172 +
 .../source/coordinator/ExecutorNotifierTest.java   |  14 +-
 .../source/coordinator/SourceCoordinatorTest.java  |  62 +++-
 .../coordinator/SourceCoordinatorTestBase.java |   9 +-
 .../ManuallyTriggeredScheduledExecutorService.java |   2 +-
 12 files changed, 382 insertions(+), 88 deletions(-)
 create mode 100644 
flink-runtime/src/test/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtilsTest.java


[flink] 01/03: [FLINK-24607] Let Deadline handle duration overflow.

2022-02-22 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 89046bc690d502e42212e01cfe28c737c0b2d3c9
Author: Jiangjie (Becket) Qin 
AuthorDate: Thu Feb 17 20:48:58 2022 +0800

[FLINK-24607] Let Deadline handle duration overflow.
---
 .../org/apache/flink/api/common/time/Deadline.java | 27 ++
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git 
a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java 
b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java
index 641a46b..bf7dba2 100644
--- a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java
+++ b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java
@@ -42,7 +42,7 @@ public class Deadline {
 }
 
 public Deadline plus(Duration other) {
-return new Deadline(Math.addExact(timeNanos, other.toNanos()), 
this.clock);
+return new Deadline(addHandlingOverflow(timeNanos, other.toNanos()), 
this.clock);
 }
 
 /**
@@ -72,9 +72,12 @@ public class Deadline {
 return !isOverdue();
 }
 
-/** Determines whether the deadline is in the past, i.e. whether the time 
left is negative. */
+/**
+ * Determines whether the deadline is in the past, i.e. whether the time 
left is zero or
+ * negative.
+ */
 public boolean isOverdue() {
-return timeNanos < clock.relativeTimeNanos();
+return timeNanos <= clock.relativeTimeNanos();
 }
 
 // 
@@ -92,7 +95,8 @@ public class Deadline {
 /** Constructs a Deadline that is a given duration after now. */
 public static Deadline fromNow(Duration duration) {
 return new Deadline(
-Math.addExact(System.nanoTime(), duration.toNanos()), 
SystemClock.getInstance());
+addHandlingOverflow(System.nanoTime(), duration.toNanos()),
+SystemClock.getInstance());
 }
 
 /**
@@ -103,11 +107,24 @@ public class Deadline {
  * @param clock Time provider for this deadline.
  */
 public static Deadline fromNowWithClock(Duration duration, Clock clock) {
-return new Deadline(Math.addExact(clock.relativeTimeNanos(), 
duration.toNanos()), clock);
+return new Deadline(
+addHandlingOverflow(clock.relativeTimeNanos(), 
duration.toNanos()), clock);
 }
 
 @Override
 public String toString() {
 return LocalDateTime.now().plus(timeLeft()).toString();
 }
+
+//  private helper methods 
+
+private static long addHandlingOverflow(long x, long y) {
+// The logic is copied over from Math.addExact() in order to handle 
overflows.
+long r = x + y;
+if (((x ^ r) & (y ^ r)) < 0) {
+return Long.MAX_VALUE;
+} else {
+return x + y;
+}
+}
 }


[flink] 02/03: [FLINK-24607] Add util methods to shutdown executor services.

2022-02-20 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit a17655ebe7e3b2870b7616f1c2b640fcb3154187
Author: Jiangjie (Becket) Qin 
AuthorDate: Fri Feb 11 16:13:50 2022 +0800

[FLINK-24607] Add util methods to shutdown executor services.
---
 .../coordination/ComponentClosingUtils.java|  95 ++-
 .../coordination/ComponentClosingUtilsTest.java| 173 +
 .../ManuallyTriggeredScheduledExecutorService.java |   2 +-
 3 files changed, 266 insertions(+), 4 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
index deed49e..4bfe302 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtils.java
@@ -18,16 +18,22 @@ limitations under the License.
 
 package org.apache.flink.runtime.operators.coordination;
 
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.api.common.time.Deadline;
+import org.apache.flink.util.clock.Clock;
+import org.apache.flink.util.clock.SystemClock;
 import org.apache.flink.util.concurrent.FutureUtils;
 import org.apache.flink.util.function.ThrowingRunnable;
 
 import java.time.Duration;
 import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
 /** A util class to help with a clean component shutdown. */
 public class ComponentClosingUtils {
+private static Clock clock = SystemClock.getInstance();
 
 /** Utility class, not meant to be instantiated. */
 private ComponentClosingUtils() {}
@@ -95,8 +101,91 @@ public class ComponentClosingUtils {
 return future;
 }
 
-static void abortThread(Thread t) {
-// the abortion strategy is pretty simple here...
-t.interrupt();
+/**
+ * A util method that tries to shut down an {@link ExecutorService} 
elegantly within the given
+ * timeout. If the executor has not been shut down before it hits timeout 
or the thread is
+ * interrupted when waiting for the termination, a forceful shutdown will 
be attempted on the
+ * executor.
+ *
+ * @param executor the {@link ExecutorService} to shut down.
+ * @param timeout the timeout duration.
+ * @return true if the given executor has been successfully closed, false 
otherwise.
+ */
+@SuppressWarnings("ResultOfMethodCallIgnored")
+public static boolean tryShutdownExecutorElegantly(ExecutorService 
executor, Duration timeout) {
+try {
+executor.shutdown();
+executor.awaitTermination(timeout.toMillis(), 
TimeUnit.MILLISECONDS);
+} catch (InterruptedException ie) {
+// Let it go.
+}
+if (!executor.isTerminated()) {
+shutdownExecutorForcefully(executor, Duration.ZERO, false);
+}
+return executor.isTerminated();
+}
+
+/**
+ * Shutdown the given executor forcefully within the given timeout. The 
method returns if it is
+ * interrupted.
+ *
+ * @param executor the executor to shut down.
+ * @param timeout the timeout duration.
+ * @return true if the given executor is terminated, false otherwise.
+ */
+public static boolean shutdownExecutorForcefully(ExecutorService executor, 
Duration timeout) {
+return shutdownExecutorForcefully(executor, timeout, true);
+}
+
+/**
+ * Shutdown the given executor forcefully within the given timeout.
+ *
+ * @param executor the executor to shut down.
+ * @param timeout the timeout duration.
+ * @param interruptable when set to true, the method can be interrupted. 
Each interruption to
+ * the thread results in another {@code ExecutorService.shutdownNow()} 
call to the shutting
+ * down executor.
+ * @return true if the given executor is terminated, false otherwise.
+ */
+public static boolean shutdownExecutorForcefully(
+ExecutorService executor, Duration timeout, boolean interruptable) 
{
+Deadline deadline = Deadline.fromNowWithClock(timeout, clock);
+boolean isInterrupted = false;
+do {
+executor.shutdownNow();
+try {
+executor.awaitTermination(deadline.timeLeft().toMillis(), 
TimeUnit.MILLISECONDS);
+} catch (InterruptedException e) {
+isInterrupted = interruptable;
+}
+} while (!isInterrupted && deadline.hasTimeLeft() && 
!executor.isTerminated());
+return executor.isTerminated();
+}

[flink] 01/03: [FLINK-24607] Let Deadline handle duration overflow.

2022-02-20 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 10c666be7000e4a370a449f14d07e9b2f7a6c71b
Author: Jiangjie (Becket) Qin 
AuthorDate: Thu Feb 17 20:48:58 2022 +0800

[FLINK-24607] Let Deadline handle duration overflow.
---
 .../org/apache/flink/api/common/time/Deadline.java | 27 ++
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git 
a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java 
b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java
index 641a46b..bf7dba2 100644
--- a/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java
+++ b/flink-core/src/main/java/org/apache/flink/api/common/time/Deadline.java
@@ -42,7 +42,7 @@ public class Deadline {
 }
 
 public Deadline plus(Duration other) {
-return new Deadline(Math.addExact(timeNanos, other.toNanos()), 
this.clock);
+return new Deadline(addHandlingOverflow(timeNanos, other.toNanos()), 
this.clock);
 }
 
 /**
@@ -72,9 +72,12 @@ public class Deadline {
 return !isOverdue();
 }
 
-/** Determines whether the deadline is in the past, i.e. whether the time 
left is negative. */
+/**
+ * Determines whether the deadline is in the past, i.e. whether the time 
left is zero or
+ * negative.
+ */
 public boolean isOverdue() {
-return timeNanos < clock.relativeTimeNanos();
+return timeNanos <= clock.relativeTimeNanos();
 }
 
 // 
@@ -92,7 +95,8 @@ public class Deadline {
 /** Constructs a Deadline that is a given duration after now. */
 public static Deadline fromNow(Duration duration) {
 return new Deadline(
-Math.addExact(System.nanoTime(), duration.toNanos()), 
SystemClock.getInstance());
+addHandlingOverflow(System.nanoTime(), duration.toNanos()),
+SystemClock.getInstance());
 }
 
 /**
@@ -103,11 +107,24 @@ public class Deadline {
  * @param clock Time provider for this deadline.
  */
 public static Deadline fromNowWithClock(Duration duration, Clock clock) {
-return new Deadline(Math.addExact(clock.relativeTimeNanos(), 
duration.toNanos()), clock);
+return new Deadline(
+addHandlingOverflow(clock.relativeTimeNanos(), 
duration.toNanos()), clock);
 }
 
 @Override
 public String toString() {
 return LocalDateTime.now().plus(timeLeft()).toString();
 }
+
+//  private helper methods 
+
+private static long addHandlingOverflow(long x, long y) {
+// The logic is copied over from Math.addExact() in order to handle 
overflows.
+long r = x + y;
+if (((x ^ r) & (y ^ r)) < 0) {
+return Long.MAX_VALUE;
+} else {
+return x + y;
+}
+}
 }


[flink] 03/03: [FLINK-24607] Make OperatorCoordinator closure more robust.

2022-02-20 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 0f19c2472c54aac97e4067f5398731ab90036d1a
Author: Jiangjie (Becket) Qin 
AuthorDate: Thu Feb 10 15:13:55 2022 +0800

[FLINK-24607] Make OperatorCoordinator closure more robust.
---
 .../RecreateOnResetOperatorCoordinator.java| 12 -
 .../source/coordinator/ExecutorNotifier.java   | 21 +---
 .../source/coordinator/SourceCoordinator.java  | 36 -
 .../coordinator/SourceCoordinatorContext.java  | 24 +
 .../coordinator/SourceCoordinatorProvider.java | 11 +---
 .../source/coordinator/ExecutorNotifierTest.java   | 14 ++---
 .../source/coordinator/SourceCoordinatorTest.java  | 63 --
 .../coordinator/SourceCoordinatorTestBase.java |  1 -
 8 files changed, 102 insertions(+), 80 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
index 5c660d0..ffab3ff 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/coordination/RecreateOnResetOperatorCoordinator.java
@@ -128,8 +128,16 @@ public class RecreateOnResetOperatorCoordinator implements 
OperatorCoordinator {
 // capture the status whether the coordinator was started when this 
method was called
 final boolean wasStarted = this.started;
 
-closingFuture.thenRun(
-() -> {
+closingFuture.whenComplete(
+(ignored, e) -> {
+if (e != null) {
+LOG.warn(
+String.format(
+"Received exception when closing "
++ "operator coordinator for 
%s.",
+oldCoordinator.operatorId),
+e);
+}
 if (!closed) {
 // The previous coordinator has closed. Create a new 
one.
 newCoordinator.createNewInternalCoordinator(context, 
provider);
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
index e52f6cd..fe4cf8a 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/ExecutorNotifier.java
@@ -25,23 +25,20 @@ import java.util.concurrent.Callable;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.BiConsumer;
 
 /**
  * This class is used to coordinate between two components, where one 
component has an executor
  * following the mailbox model and the other component notifies it when needed.
  */
-public class ExecutorNotifier implements AutoCloseable {
+public class ExecutorNotifier {
 private static final Logger LOG = 
LoggerFactory.getLogger(ExecutorNotifier.class);
 private final ScheduledExecutorService workerExecutor;
 private final Executor executorToNotify;
-private final AtomicBoolean closed;
 
 public ExecutorNotifier(ScheduledExecutorService workerExecutor, Executor 
executorToNotify) {
 this.executorToNotify = executorToNotify;
 this.workerExecutor = workerExecutor;
-this.closed = new AtomicBoolean(false);
 }
 
 /**
@@ -140,20 +137,4 @@ public class ExecutorNotifier implements AutoCloseable {
 periodMs,
 TimeUnit.MILLISECONDS);
 }
-
-/**
- * Close the executor notifier. This is a blocking call which waits for 
all the async calls to
- * finish before it returns.
- *
- * @throws InterruptedException when interrupted during closure.
- */
-public void close() throws InterruptedException {
-if (!closed.compareAndSet(false, true)) {
-LOG.debug("The executor notifier has been closed.");
-return;
-}
-// Shutdown the worker executor, so no more worker tasks can run.
-workerExecutor.shutdownNow();
-workerExecutor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
-}
 }
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoo

[flink] branch master updated (faf9a8a -> 0f19c24)

2022-02-20 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from faf9a8a  [FLINK-26016][hive] Fix FileSystemLookupFunction does not 
produce correct results when hive table uses columnar storage
 new 10c666b  [FLINK-24607] Let Deadline handle duration overflow.
 new a17655e  [FLINK-24607] Add util methods to shutdown executor services.
 new 0f19c24  [FLINK-24607] Make OperatorCoordinator closure more robust.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/flink/api/common/time/Deadline.java |  27 +++-
 .../coordination/ComponentClosingUtils.java|  95 ++-
 .../RecreateOnResetOperatorCoordinator.java|  12 +-
 .../source/coordinator/ExecutorNotifier.java   |  21 +--
 .../source/coordinator/SourceCoordinator.java  |  36 ++---
 .../coordinator/SourceCoordinatorContext.java  |  24 +--
 .../coordinator/SourceCoordinatorProvider.java |  11 +-
 .../coordination/ComponentClosingUtilsTest.java| 173 +
 .../source/coordinator/ExecutorNotifierTest.java   |  14 +-
 .../source/coordinator/SourceCoordinatorTest.java  |  63 +++-
 .../coordinator/SourceCoordinatorTestBase.java |   1 -
 .../ManuallyTriggeredScheduledExecutorService.java |   2 +-
 12 files changed, 390 insertions(+), 89 deletions(-)
 create mode 100644 
flink-runtime/src/test/java/org/apache/flink/runtime/operators/coordination/ComponentClosingUtilsTest.java


[flink-web] branch asf-site updated: Make minor change to the 'Apache Flink ML 2.0.0 Release Announcement' (#498)

2022-01-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/flink-web.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new f9d8660  Make minor change to the 'Apache Flink ML 2.0.0 Release 
Announcement' (#498)
f9d8660 is described below

commit f9d8660204aaebea2b34874360d2aa803fe7f88b
Author: Dong Lin 
AuthorDate: Thu Jan 13 10:57:38 2022 +0800

Make minor change to the 'Apache Flink ML 2.0.0 Release Announcement' (#498)
---
 _posts/2022-01-07-release-ml-2.0.0.md | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/_posts/2022-01-07-release-ml-2.0.0.md 
b/_posts/2022-01-07-release-ml-2.0.0.md
index 4cbea98..1468d19 100644
--- a/_posts/2022-01-07-release-ml-2.0.0.md
+++ b/_posts/2022-01-07-release-ml-2.0.0.md
@@ -150,12 +150,14 @@ TensorFlow program).
 ## Algorithm Library
 
 Now that the Flink ML API re-design is done, we started the initiative to add
-off-the-shelf algorithms in Flink ML. As part of this initiative, we borrowed
-ideas from the [Alink](https://github.com/alibaba/alink) project, and worked
-closely with developers of the Alink project to design the new Flink ML APIs,
-refactor, optimize and migrate algorithms from Alink to Flink. Our long-term
-goal is to provide a library of performant algorithms that are easy to use,
-debug and customize for your needs.
+off-the-shelf algorithms in Flink ML. The release of Flink-ML 2.0.0 is closely
+related to project Alink - an Apache Flink ecosystem project open sourced by
+Alibaba. The connection between the Flink community and developers of the Alink
+project dates back to 2017. The project Alink developers have a significant
+contribution in designing the new Flink ML APIs, refactoring, optimizing and
+migrating algorithms from Alink to Flink. Our long-term goal is to provide a
+library of performant algorithms that are easy to use, debug and customize for
+your needs.
 
 We have implemented five algorithms in this release, i.e. logistic regression,
 k-means, k-nearest neighbors, naive bayes and one-hot encoder. For now these


[flink-table-store] 01/01: Initial Commit. Add .asf.yaml

2022-01-11 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-table-store.git

commit a249ebac66d00f664d257551029da9b1beddaf1f
Author: Jiangjie (Becket) Qin 
AuthorDate: Wed Jan 12 11:33:21 2022 +0800

Initial Commit. Add .asf.yaml
---
 .asf.yaml | 5 +
 1 file changed, 5 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
new file mode 100644
index 000..82eef0b
--- /dev/null
+++ b/.asf.yaml
@@ -0,0 +1,5 @@
+notifications:
+  commits:  commits@flink.apache.org
+  issues:   iss...@flink.apache.org
+  pullrequests: iss...@flink.apache.org
+  jira_options: link label


[flink-table-store] branch master created (now a249eba)

2022-01-11 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink-table-store.git.


  at a249eba  Initial Commit. Add .asf.yaml

This branch includes the following new commits:

 new a249eba  Initial Commit. Add .asf.yaml

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[flink] branch release-1.14 updated: [FLINK-25132][connector/kafka] Move record deserializing from SplitFetcher to RecordEmitter to support object-reusing deserializer

2021-12-26 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.14 by this push:
 new ebbf772  [FLINK-25132][connector/kafka] Move record deserializing from 
SplitFetcher to RecordEmitter to support object-reusing deserializer
ebbf772 is described below

commit ebbf772ea287ee987f5eb628ad2e395895b312aa
Author: Qingsheng Ren 
AuthorDate: Thu Dec 2 08:51:26 2021 +0800

[FLINK-25132][connector/kafka] Move record deserializing from SplitFetcher 
to RecordEmitter to support object-reusing deserializer
---
 .../flink/connector/kafka/source/KafkaSource.java  |  20 +--
 .../source/reader/KafkaPartitionSplitReader.java   | 186 +++--
 .../kafka/source/reader/KafkaRecordEmitter.java|  50 +-
 .../kafka/source/reader/KafkaSourceReader.java |  14 +-
 .../reader/fetcher/KafkaSourceFetcherManager.java  |  21 ++-
 .../connector/kafka/source/KafkaSourceITCase.java  |  65 ---
 .../reader/KafkaPartitionSplitReaderTest.java  |  53 +++---
 7 files changed, 201 insertions(+), 208 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
index 6df7d2f..400e803 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
@@ -27,7 +27,6 @@ import org.apache.flink.api.connector.source.SourceReader;
 import org.apache.flink.api.connector.source.SourceReaderContext;
 import org.apache.flink.api.connector.source.SplitEnumerator;
 import org.apache.flink.api.connector.source.SplitEnumeratorContext;
-import org.apache.flink.api.java.tuple.Tuple3;
 import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
@@ -49,6 +48,8 @@ import org.apache.flink.core.io.SimpleVersionedSerializer;
 import org.apache.flink.metrics.MetricGroup;
 import org.apache.flink.util.UserCodeClassLoader;
 
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+
 import javax.annotation.Nullable;
 
 import java.io.IOException;
@@ -131,8 +132,8 @@ public class KafkaSource
 SourceReader createReader(
 SourceReaderContext readerContext, Consumer> 
splitFinishedHook)
 throws Exception {
-FutureCompletingBlockingQueue>> elementsQueue =
-new FutureCompletingBlockingQueue<>();
+
FutureCompletingBlockingQueue>>
+elementsQueue = new FutureCompletingBlockingQueue<>();
 deserializationSchema.open(
 new DeserializationSchema.InitializationContext() {
 @Override
@@ -148,18 +149,13 @@ public class KafkaSource
 final KafkaSourceReaderMetrics kafkaSourceReaderMetrics =
 new KafkaSourceReaderMetrics(readerContext.metricGroup());
 
-Supplier> splitReaderSupplier =
-() ->
-new KafkaPartitionSplitReader<>(
-props,
-deserializationSchema,
-readerContext,
-kafkaSourceReaderMetrics);
-KafkaRecordEmitter recordEmitter = new KafkaRecordEmitter<>();
+Supplier splitReaderSupplier =
+() -> new KafkaPartitionSplitReader(props, readerContext, 
kafkaSourceReaderMetrics);
+KafkaRecordEmitter recordEmitter = new 
KafkaRecordEmitter<>(deserializationSchema);
 
 return new KafkaSourceReader<>(
 elementsQueue,
-new KafkaSourceFetcherManager<>(
+new KafkaSourceFetcherManager(
 elementsQueue, splitReaderSupplier::get, 
splitFinishedHook),
 recordEmitter,
 toConfiguration(props),
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
index d048230..ebadef3 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
@@ -19,16 +19,13 @@
 package org.apache.flink.connector.kafka.source.reader;
 
 import 

[flink] branch master updated (f191bec -> 2b1a9de)

2021-12-21 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from f191bec  [hotfix][checkpoint] Fix the wrong parameters due to base 
code change in StreamTaskFinalCheckpointsTest
 add 2b1a9de  [FLINK-25132][connector/kafka] Move record deserializing from 
SplitFetcher to RecordEmitter to support object-reusing deserializer

No new revisions were added by this update.

Summary of changes:
 .../flink/connector/kafka/source/KafkaSource.java  |  20 +--
 .../source/reader/KafkaPartitionSplitReader.java   | 186 +++--
 .../kafka/source/reader/KafkaRecordEmitter.java|  50 +-
 .../kafka/source/reader/KafkaSourceReader.java |  14 +-
 .../reader/fetcher/KafkaSourceFetcherManager.java  |  21 ++-
 .../connector/kafka/source/KafkaSourceITCase.java  |  65 ---
 .../reader/KafkaPartitionSplitReaderTest.java  |  53 +++---
 7 files changed, 201 insertions(+), 208 deletions(-)


[flink-ml] 02/02: [FLINK-24354][FLIP-174] Improve the WithParams interface

2021-11-09 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit 9c44eef25970338fe32dcf77ce45efac74c4324f
Author: Dong Lin 
AuthorDate: Sun Sep 26 21:40:59 2021 +0800

[FLINK-24354][FLIP-174] Improve the WithParams interface
---
 flink-ml-api/pom.xml   |  15 +
 .../org/apache/flink/ml/api/core/Pipeline.java |  23 +-
 .../apache/flink/ml/api/core/PipelineModel.java|  23 +-
 .../java/org/apache/flink/ml/api/core/Stage.java   |   2 +-
 .../org/apache/flink/ml/param/BooleanParam.java|  35 ++
 .../apache/flink/ml/param/DoubleArrayParam.java|  35 ++
 .../org/apache/flink/ml/param/DoubleParam.java |  35 ++
 .../org/apache/flink/ml/param/FloatArrayParam.java |  35 ++
 .../java/org/apache/flink/ml/param/FloatParam.java |  32 ++
 .../org/apache/flink/ml/param/IntArrayParam.java   |  35 ++
 .../java/org/apache/flink/ml/param/IntParam.java   |  35 ++
 .../org/apache/flink/ml/param/LongArrayParam.java  |  35 ++
 .../java/org/apache/flink/ml/param/LongParam.java  |  32 ++
 .../main/java/org/apache/flink/ml/param/Param.java |  98 ++
 .../org/apache/flink/ml/param/ParamValidator.java  |  40 +++
 .../org/apache/flink/ml/param/ParamValidators.java |  98 ++
 .../apache/flink/ml/param/StringArrayParam.java|  35 ++
 .../org/apache/flink/ml/param/StringParam.java |  35 ++
 .../java/org/apache/flink/ml/param/WithParams.java | 135 
 .../java/org/apache/flink/ml/util/ParamUtils.java  |  89 +
 .../org/apache/flink/ml/util/ReadWriteUtils.java   | 279 +++
 .../apache/flink/ml/api/core/ExampleStages.java| 244 ++
 .../org/apache/flink/ml/api/core/PipelineTest.java | 202 +--
 .../org/apache/flink/ml/api/core/StageTest.java| 375 +
 pom.xml|   2 -
 25 files changed, 1863 insertions(+), 141 deletions(-)

diff --git a/flink-ml-api/pom.xml b/flink-ml-api/pom.xml
index 81fdcc7..ddfc659 100644
--- a/flink-ml-api/pom.xml
+++ b/flink-ml-api/pom.xml
@@ -38,6 +38,21 @@ under the License.
   ${flink.version}
   provided
 
+
+
+  org.apache.flink
+  flink-table-planner_${scala.binary.version}
+  ${flink.version}
+  test
+
+
+
+  org.apache.flink
+  flink-test-utils_${scala.binary.version}
+  ${flink.version}
+  test
+
+
 
   org.apache.flink
   flink-shaded-jackson
diff --git 
a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java 
b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java
index a5fed01..f1e5d0c 100644
--- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java
+++ b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Pipeline.java
@@ -20,13 +20,17 @@ package org.apache.flink.ml.api.core;
 
 import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.annotation.VisibleForTesting;
-import org.apache.flink.ml.api.misc.param.Params;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.util.ParamUtils;
+import org.apache.flink.ml.util.ReadWriteUtils;
 import org.apache.flink.table.api.Table;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * A Pipeline acts as an Estimator. It consists of an ordered list of stages, 
each of which could be
@@ -36,10 +40,11 @@ import java.util.List;
 public final class Pipeline implements Estimator {
 private static final long serialVersionUID = 6384850154817512318L;
 private final List> stages;
-private final Params params = new Params();
+private final Map, Object> paramMap = new HashMap<>();
 
 public Pipeline(List> stages) {
 this.stages = stages;
+ParamUtils.initializeMapWithDefaultValues(paramMap, this);
 }
 
 /**
@@ -97,17 +102,17 @@ public final class Pipeline implements Estimator {
 }
 
 @Override
-public void save(String path) throws IOException {
-throw new UnsupportedOperationException();
+public Map, Object> getParamMap() {
+return paramMap;
 }
 
-public static Pipeline load(String path) throws IOException {
-throw new UnsupportedOperationException();
+@Override
+public void save(String path) throws IOException {
+ReadWriteUtils.savePipeline(this, stages, path);
 }
 
-@Override
-public Params getParams() {
-return params;
+public static Pipeline load(String path) throws IOException {
+return new Pipeline(ReadWriteUtils.loadPipeline(path, 
Pipeline.class.getName()));
 }
 
 /**
diff --git 
a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineModel.java 
b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineModel.java
index 704fa8e..45bb757 

[flink-ml] branch master updated (81cd74a -> 9c44eef)

2021-11-09 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git.


from 81cd74a  [hotfix] Remove those library infra classes that need to be 
revisited
 new 1f0fe56  [FLINK-24354][FLIP-174] Remove old param-related classes
 new 9c44eef  [FLINK-24354][FLIP-174] Improve the WithParams interface

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 flink-ml-api/pom.xml   |  15 +
 .../org/apache/flink/ml/api/core/Pipeline.java |  23 +-
 .../apache/flink/ml/api/core/PipelineModel.java|  23 +-
 .../java/org/apache/flink/ml/api/core/Stage.java   |   2 +-
 .../apache/flink/ml/api/misc/param/ParamInfo.java  | 151 -
 .../flink/ml/api/misc/param/ParamInfoFactory.java  | 134 
 .../org/apache/flink/ml/api/misc/param/Params.java | 277 ---
 .../apache/flink/ml/api/misc/param/WithParams.java |  60 
 .../org/apache/flink/ml/param/BooleanParam.java|  24 +-
 .../apache/flink/ml/param/DoubleArrayParam.java|  24 +-
 .../org/apache/flink/ml/param/DoubleParam.java |  24 +-
 .../org/apache/flink/ml/param/FloatArrayParam.java |  24 +-
 .../java/org/apache/flink/ml/param/FloatParam.java |  21 +-
 .../org/apache/flink/ml/param/IntArrayParam.java   |  24 +-
 .../java/org/apache/flink/ml/param/IntParam.java   |  24 +-
 .../org/apache/flink/ml/param/LongArrayParam.java  |  24 +-
 .../java/org/apache/flink/ml/param/LongParam.java  |  21 +-
 .../main/java/org/apache/flink/ml/param/Param.java |  98 ++
 .../ml/{api/misc => }/param/ParamValidator.java|  17 +-
 .../org/apache/flink/ml/param/ParamValidators.java |  98 ++
 .../apache/flink/ml/param/StringArrayParam.java|  24 +-
 .../org/apache/flink/ml/param/StringParam.java |  24 +-
 .../java/org/apache/flink/ml/param/WithParams.java | 135 
 .../java/org/apache/flink/ml/util/ParamUtils.java  |  89 +
 .../org/apache/flink/ml/util/ReadWriteUtils.java   | 279 +++
 .../flink/ml/util/param/ExtractParamInfosUtil.java |  71 
 .../apache/flink/ml/api/core/ExampleStages.java| 244 ++
 .../org/apache/flink/ml/api/core/PipelineTest.java | 202 +--
 .../org/apache/flink/ml/api/core/StageTest.java| 375 +
 .../org/apache/flink/ml/api/misc/ParamsTest.java   | 179 --
 .../ml/util/param/ExtractParamInfosUtilTest.java   | 109 --
 pom.xml|   2 -
 32 files changed, 1568 insertions(+), 1273 deletions(-)
 delete mode 100644 
flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java
 delete mode 100644 
flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfoFactory.java
 delete mode 100644 
flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/Params.java
 delete mode 100644 
flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/WithParams.java
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/BooleanParam.java (61%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/DoubleArrayParam.java 
(60%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/DoubleParam.java (61%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/FloatArrayParam.java 
(60%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/FloatParam.java (63%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/IntArrayParam.java 
(60%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/IntParam.java (61%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/LongArrayParam.java 
(60%)
 copy 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/proxy/state/StateNamePrefix.java
 => flink-ml-api/src/main/java/org/apache/flink/ml/param/LongParam.java (64%)
 create mode 100644 
flink-ml-api/src/main/java/org/apache/flink/ml/param/Param.java
 rename flink-ml-api/src/main/java/org/apache/

[flink-ml] 01/02: [FLINK-24354][FLIP-174] Remove old param-related classes

2021-11-09 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit 1f0fe565f8f79ac084d7763f5f0da3fdb36bfa00
Author: Dong Lin 
AuthorDate: Sun Sep 26 21:37:32 2021 +0800

[FLINK-24354][FLIP-174] Remove old param-related classes
---
 .../apache/flink/ml/api/misc/param/ParamInfo.java  | 151 ---
 .../flink/ml/api/misc/param/ParamInfoFactory.java  | 134 --
 .../flink/ml/api/misc/param/ParamValidator.java|  39 ---
 .../org/apache/flink/ml/api/misc/param/Params.java | 277 -
 .../apache/flink/ml/api/misc/param/WithParams.java |  60 -
 .../flink/ml/util/param/ExtractParamInfosUtil.java |  71 --
 .../org/apache/flink/ml/api/misc/ParamsTest.java   | 179 -
 .../ml/util/param/ExtractParamInfosUtilTest.java   | 109 
 8 files changed, 1020 deletions(-)

diff --git 
a/flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java 
b/flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java
deleted file mode 100644
index b0f7ce9..000
--- 
a/flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamInfo.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.ml.api.misc.param;
-
-import org.apache.flink.annotation.PublicEvolving;
-import org.apache.flink.util.Preconditions;
-
-/**
- * Definition of a parameter, including name, type, default value, validator 
and so on.
- *
- * A parameter can either be optional or non-optional.
- *
- * 
- *   A non-optional parameter should not have a default value. Instead, 
its value must be
- *   provided by the users.
- *   An optional parameter may or may not have a default value.
- * 
- *
- * Please see {@link Params#get(ParamInfo)} and {@link 
Params#contains(ParamInfo)} for more
- * details about the behavior.
- *
- * A parameter may have aliases in addition to the parameter name for 
convenience and
- * compatibility purposes. One should not set values for both parameter name 
and an alias. One and
- * only one value should be set either under the parameter name or one of the 
alias.
- *
- * @param  the type of the param value
- */
-@PublicEvolving
-public class ParamInfo {
-private final String name;
-private final String[] alias;
-private final String description;
-private final boolean isOptional;
-private final boolean hasDefaultValue;
-private final V defaultValue;
-private final ParamValidator validator;
-private final Class valueClass;
-
-ParamInfo(
-String name,
-String[] alias,
-String description,
-boolean isOptional,
-boolean hasDefaultValue,
-V defaultValue,
-ParamValidator validator,
-Class valueClass) {
-this.name = name;
-this.alias = alias;
-this.description = description;
-this.isOptional = isOptional;
-this.hasDefaultValue = hasDefaultValue;
-this.defaultValue = defaultValue;
-this.validator = validator;
-this.valueClass = valueClass;
-}
-
-/**
- * Returns the name of the parameter. The name must be unique in the stage 
the ParamInfo belongs
- * to.
- *
- * @return the name of the parameter
- */
-public String getName() {
-return name;
-}
-
-/**
- * Returns the aliases of the parameter. The alias will be an empty string 
array by default.
- *
- * @return the aliases of the parameter
- */
-public String[] getAlias() {
-Preconditions.checkNotNull(alias);
-return alias;
-}
-
-/**
- * Returns the description of the parameter.
- *
- * @return the description of the parameter
- */
-public String getDescription() {
-return description;
-}
-
-/**
- * Returns whether the parameter is optional.
- *
- * @return {@code true} if the param is optional, {@code false} otherwise
- */
-public boolean isOptional() {
-return isOptional;
-}
-
-/**
- * Returns w

[flink] branch master updated: [FLINK-24308][docs] Translate Kafka DataStream connector documentation to Chinese

2021-10-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new aa0eb91  [FLINK-24308][docs] Translate Kafka DataStream connector 
documentation to Chinese
aa0eb91 is described below

commit aa0eb91714dbf11d0e01b63baaf910110c0ae991
Author: Qingsheng Ren 
AuthorDate: Wed Sep 22 10:59:53 2021 +0800

[FLINK-24308][docs] Translate Kafka DataStream connector documentation to 
Chinese
---
 .../content.zh/docs/connectors/datastream/kafka.md | 709 +
 docs/content/docs/connectors/datastream/kafka.md   |   2 +-
 2 files changed, 319 insertions(+), 392 deletions(-)

diff --git a/docs/content.zh/docs/connectors/datastream/kafka.md 
b/docs/content.zh/docs/connectors/datastream/kafka.md
index a9ae396..1329970b 100644
--- a/docs/content.zh/docs/connectors/datastream/kafka.md
+++ b/docs/content.zh/docs/connectors/datastream/kafka.md
@@ -27,463 +27,390 @@ under the License.
 
 # Apache Kafka 连接器
 
-Flink 提供了 [Apache Kafka](https://kafka.apache.org) 连接器,用于从 Kafka topic 
中读取或者向其中写入数据,可提供精确一次的处理语义。
-
-
+Flink 提供了 [Apache Kafka](https://kafka.apache.org) 连接器使用精确一次(Exactly-once)的语义在 
Kafka topic 中读取和写入数据。
 
 ## 依赖
 
-Apache Flink 集成了通用的 Kafka 连接器,它会尽力与 Kafka client 的最新版本保持同步。该连接器使用的 Kafka 
client 版本可能会在 Flink 版本之间发生变化。
+Apache Flink 集成了通用的 Kafka 连接器,它会尽力与 Kafka client 的最新版本保持同步。
+该连接器使用的 Kafka client 版本可能会在 Flink 版本之间发生变化。
 当前 Kafka client 向后兼容 0.10.0 或更高版本的 Kafka broker。
-有关 Kafka 兼容性的更多细节,请参考  [Kafka 
官方文档](https://kafka.apache.org/protocol.html#protocol_compatibility)。
+有关 Kafka 兼容性的更多细节,请参考 [Kafka 
官方文档](https://kafka.apache.org/protocol.html#protocol_compatibility)。
 
 {{< artifact flink-connector-kafka withScalaVersion >}}
 
-Flink 目前的流连接器还不是二进制发行版的一部分。
-[在此处]({{< ref "docs/dev/datastream/project-configuration" 
>}})可以了解到如何链接它们,从而在集群中运行。
-
-
-
-## Kafka Consumer
+如果使用 Kafka source,```flink-connector-base``` 也需要包含在依赖中:
 
-Flink 的 Kafka consumer 称为 `FlinkKafkaConsumer`。它提供对一个或多个 Kafka topics 的访问。
+{{< artifact flink-connector-base >}}
 
-构造函数接受以下参数:
+Flink 目前的流连接器还不是二进制发行版的一部分。
+[在此处]({{< ref "docs/dev/datastream/project-configuration" 
>}})可以了解到如何链接它们,从而在集群中运行。
 
-1. Topic 名称或者名称列表
-2. 用于反序列化 Kafka 数据的 DeserializationSchema 或者 KafkaDeserializationSchema
-3. Kafka 消费者的属性。需要以下属性:
-  - "bootstrap.servers"(以逗号分隔的 Kafka broker 列表)
-  - "group.id" 消费组 ID
+## Kafka Source
+{{< hint info >}}
+该文档描述的是基于[新数据源 API]({{< ref "docs/dev/datastream/sources.md" >}}) 的 Kafka 
Source。
+{{< /hint >}}
 
-{{< tabs "fdf41307-604d-426f-9863-666250ce0cdc" >}}
-{{< tab "Java" >}}
+### 使用方法
+Kafka Source 提供了构建类来创建 ```KafkaSource``` 的实例。以下代码片段展示了如何构建 ```KafkaSource```
+来消费 “input-topic” 最早位点的数据, 使用消费组 “my-group”,并且将 Kafka 消息体反序列化为字符串:
 ```java
-Properties properties = new Properties();
-properties.setProperty("bootstrap.servers", "localhost:9092");
-properties.setProperty("group.id", "test");
-DataStream stream = env
-.addSource(new FlinkKafkaConsumer<>("topic", new SimpleStringSchema(), 
properties));
-```
-{{< /tab >}}
-{{< tab "Scala" >}}
-```scala
-val properties = new Properties()
-properties.setProperty("bootstrap.servers", "localhost:9092")
-properties.setProperty("group.id", "test")
-val stream = env
-.addSource(new FlinkKafkaConsumer[String]("topic", new 
SimpleStringSchema(), properties))
-```
-{{< /tab >}}
-{{< /tabs >}}
-
-
-
-### `DeserializationSchema`
-
-Flink Kafka Consumer 需要知道如何将 Kafka 中的二进制数据转换为 Java 或者 Scala 
对象。`KafkaDeserializationSchema` 允许用户指定这样的 schema,每条 Kafka 中的消息会调用 `T 
deserialize(ConsumerRecord record)` 反序列化。
-
-为了方便使用,Flink 提供了以下几种 schemas:
-
-1. `TypeInformationSerializationSchema`(和 
`TypeInformationKeyValueSerializationSchema`) 基于 Flink 的 `TypeInformation` 创建 
`schema`。
-如果该数据的读和写都发生在 Flink 中,那么这将是非常有用的。此 schema 是其他通用序列化方法的高性能 Flink 替代方案。
-
-2. `JsonDeserializationSchema`(和 `JSONKeyValueDeserializationSchema`)将序列化的 
JSON 转化为 ObjectNode 对象,可以使用 `objectNode.get("field").as(Int/String/...)()` 
来访问某个字段。
-KeyValue objectNode 包含一个含所有字段的 key 和 values 
字段,以及一个可选的"metadata"字段,可以访问到消息的 offset、partition、topic 等信息。
-
-3. `GlueSchemaRegistryJsonDeserializationSchema` 可以在[AWS Glue Schema 
Registry](https://docs.aws.amazon.com/glue/latest/dg/schema-registry.html)
-查找编写器的 schema(用于编写记录的 schema)。使用这些反序列化 schema 记录将读取从 AWS Glue Schema 
Registry 检索到的 schema 
转换为代表通用记录的`com.amazonaws.services.schemaregistry.serializers.json.JsonDataWithSchema`
-
或者由[mbknor-jackson-jsonSchema](https://github.com/mbknor/mbknor-jackson-jsonSchema)生成的
 Java POJO. 
-
-要使用此反序列化 schema 必须添加以下依赖:
-
-{{< tabs "8c6721c7-4a48-496e-

[flink] branch release-1.14 updated: [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name

2021-10-11 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.14 by this push:
 new b0a593e  [FLINK-24376][runtime] Use operator name for constructing 
OperatorCoordinatorProvider instead of chained name
b0a593e is described below

commit b0a593eaa1a039df751e36ad8fbffd61e6431ddf
Author: Qingsheng Ren 
AuthorDate: Sun Sep 26 15:55:36 2021 +0800

[FLINK-24376][runtime] Use operator name for constructing 
OperatorCoordinatorProvider instead of chained name
---
 .../apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
index 536577b..75a2c27 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
@@ -442,7 +442,9 @@ public class StreamingJobGraphGenerator {
 createChainedPreferredResources(currentNodeId, 
chainableOutputs));
 
 OperatorID currentOperatorId =
-chainInfo.addNodeToChain(currentNodeId, 
chainedNames.get(currentNodeId));
+chainInfo.addNodeToChain(
+currentNodeId,
+
streamGraph.getStreamNode(currentNodeId).getOperatorName());
 
 if (currentNode.getInputFormat() != null) {
 getOrCreateFormatContainer(startNodeId)


[flink] branch master updated: [FLINK-24376][runtime] Use operator name for constructing OperatorCoordinatorProvider instead of chained name

2021-10-11 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new d05c385  [FLINK-24376][runtime] Use operator name for constructing 
OperatorCoordinatorProvider instead of chained name
d05c385 is described below

commit d05c38577df04e937ff80dfbc486c60f34e8e108
Author: Qingsheng Ren 
AuthorDate: Sun Sep 26 15:55:36 2021 +0800

[FLINK-24376][runtime] Use operator name for constructing 
OperatorCoordinatorProvider instead of chained name
---
 .../apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
index 536577b..75a2c27 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/graph/StreamingJobGraphGenerator.java
@@ -442,7 +442,9 @@ public class StreamingJobGraphGenerator {
 createChainedPreferredResources(currentNodeId, 
chainableOutputs));
 
 OperatorID currentOperatorId =
-chainInfo.addNodeToChain(currentNodeId, 
chainedNames.get(currentNodeId));
+chainInfo.addNodeToChain(
+currentNodeId,
+
streamGraph.getStreamNode(currentNodeId).getOperatorName());
 
 if (currentNode.getInputFormat() != null) {
 getOrCreateFormatContainer(startNodeId)


[flink-ml] branch master updated: [FLINK-22915][FLIP-173] Update Flink ML API to support AlgoOperator with multiple input tables and multiple output tables

2021-09-26 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git


The following commit(s) were added to refs/heads/master by this push:
 new 5ff346e  [FLINK-22915][FLIP-173] Update Flink ML API to support 
AlgoOperator with multiple input tables and multiple output tables
5ff346e is described below

commit 5ff346ea1a508a00b89759492f09e7330e69baef
Author: Dong Lin 
AuthorDate: Wed Sep 22 13:47:39 2021 +0800

[FLINK-22915][FLIP-173] Update Flink ML API to support AlgoOperator with 
multiple input tables and multiple output tables
---
 .../core/{PipelineStage.java => AlgoOperator.java} |  35 ++-
 .../org/apache/flink/ml/api/core/Estimator.java|  24 +-
 .../java/org/apache/flink/ml/api/core/Model.java   |  34 ++-
 .../org/apache/flink/ml/api/core/Pipeline.java | 257 +
 .../apache/flink/ml/api/core/PipelineModel.java|  83 +++
 .../java/org/apache/flink/ml/api/core/Stage.java   |  44 
 .../org/apache/flink/ml/api/core/Transformer.java  |  22 +-
 .../org/apache/flink/ml/api/core/PipelineTest.java |  69 +++---
 8 files changed, 269 insertions(+), 299 deletions(-)

diff --git 
a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineStage.java 
b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/AlgoOperator.java
similarity index 50%
rename from 
flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineStage.java
rename to 
flink-ml-api/src/main/java/org/apache/flink/ml/api/core/AlgoOperator.java
index 0a3dd23..7f2d4b4 100644
--- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/PipelineStage.java
+++ b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/AlgoOperator.java
@@ -18,29 +18,22 @@
 
 package org.apache.flink.ml.api.core;
 
-import org.apache.flink.ml.api.misc.param.WithParams;
-
-import java.io.Serializable;
+import org.apache.flink.annotation.PublicEvolving;
+import org.apache.flink.table.api.Table;
 
 /**
- * Base class for a stage in a pipeline. The interface is only a concept, and 
does not have any
- * actual functionality. Its subclasses must be either Estimator or 
Transformer. No other classes
- * should inherit this interface directly.
- *
- * Each pipeline stage is with parameters, and requires a public empty 
constructor for
- * restoration in Pipeline.
+ * An AlgoOperator takes a list of tables as inputs and produces a list of 
tables as results. It can
+ * be used to encode generic multi-input multi-output computation logic.
  *
- * @param  The class type of the PipelineStage implementation itself, used 
by {@link
- * org.apache.flink.ml.api.misc.param.WithParams}
- * @see WithParams
+ * @param  The class type of the AlgoOperator implementation itself.
  */
-interface PipelineStage> extends WithParams, 
Serializable {
-
-default String toJson() {
-return getParams().toJson();
-}
-
-default void loadJson(String json) {
-getParams().loadJson(json);
-}
+@PublicEvolving
+public interface AlgoOperator> extends Stage {
+/**
+ * Applies the AlgoOperator on the given input tables and returns the 
result tables.
+ *
+ * @param inputs a list of tables
+ * @return a list of tables
+ */
+Table[] transform(Table... inputs);
 }
diff --git 
a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java 
b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java
index 24c8349..bab9c7d 100644
--- a/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java
+++ b/flink-ml-api/src/main/java/org/apache/flink/ml/api/core/Estimator.java
@@ -20,28 +20,20 @@ package org.apache.flink.ml.api.core;
 
 import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.table.api.Table;
-import org.apache.flink.table.api.TableEnvironment;
 
 /**
- * Estimators are {@link PipelineStage}s responsible for training and 
generating machine learning
- * models.
+ * Estimators are responsible for training and generating Models.
  *
- * The implementations are expected to take an input table as training 
samples and generate a
- * {@link Model} which fits these samples.
- *
- * @param  class type of the Estimator implementation itself, used by {@link
- * org.apache.flink.ml.api.misc.param.WithParams}.
- * @param  class type of the {@link Model} this Estimator produces.
+ * @param  class type of the Estimator implementation itself.
+ * @param  class type of the Model this Estimator produces.
  */
 @PublicEvolving
-public interface Estimator, M extends Model> 
extends PipelineStage {
-
+public interface Estimator, M extends Model> 
extends Stage {
 /**
- * Train and produce a {@link Model} which fits the records in the given 
{@link Table}.
+ * Trains on the given inputs and produces a Model.
  *
- * @param tEnv the table environment to which the input table is bound.
- * @param input the table 

[flink] branch release-1.12 updated (b842230 -> 7249e0d)

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.12
in repository https://gitbox.apache.org/repos/asf/flink.git.


from b842230  [FLINK-23949][runtime][checkpoint] fix first incremental 
checkpoint after a savepoint will degenerate into a full checkpoint
 new bf3df16  [FLINK-24277][connector/kafka] Add configuration for 
committing offset on checkpoint and disable it if group ID is not specified
 new 7249e0d  [FLINK-24277][connector/kafka] Add 
OffsetsInitializerValidator interface for validating offset initializer in 
KafkaSourceBuilder

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../flink/connector/kafka/source/KafkaSource.java  |   6 +
 .../connector/kafka/source/KafkaSourceBuilder.java |  43 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   6 +
 .../initializer/OffsetsInitializerValidator.java   |  19 ++-
 .../ReaderHandledOffsetsInitializer.java   |  17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   |  22 ++-
 .../source/reader/KafkaPartitionSplitReader.java   |  24 ++--
 .../kafka/source/reader/KafkaSourceReader.java |  17 +++
 .../kafka/source/KafkaSourceBuilderTest.java   | 156 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  19 +++
 .../kafka/source/reader/KafkaSourceReaderTest.java |  36 -
 11 files changed, 332 insertions(+), 33 deletions(-)
 copy 
flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java
 => 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 (61%)


[flink] 01/02: [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.12
in repository https://gitbox.apache.org/repos/asf/flink.git

commit bf3df16e3cc9a3bede3f5dabc8d08c9369e02485
Author: Qingsheng Ren 
AuthorDate: Thu Sep 16 15:20:08 2021 +0800

[FLINK-24277][connector/kafka] Add configuration for committing offset on 
checkpoint and disable it if group ID is not specified
---
 .../flink/connector/kafka/source/KafkaSource.java  |   6 ++
 .../connector/kafka/source/KafkaSourceBuilder.java |  35 ++-
 .../connector/kafka/source/KafkaSourceOptions.java |   6 ++
 .../source/reader/KafkaPartitionSplitReader.java   |  24 ++---
 .../kafka/source/reader/KafkaSourceReader.java |  17 
 .../kafka/source/KafkaSourceBuilderTest.java   | 103 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  19 
 .../kafka/source/reader/KafkaSourceReaderTest.java |  36 ++-
 8 files changed, 225 insertions(+), 21 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
index ea7ad6c..477a4d6 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
@@ -18,6 +18,7 @@
 
 package org.apache.flink.connector.kafka.source;
 
+import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.connector.source.Boundedness;
 import org.apache.flink.api.connector.source.Source;
@@ -178,4 +179,9 @@ public class KafkaSource
 props.stringPropertyNames().forEach(key -> config.setString(key, 
props.getProperty(key)));
 return config;
 }
+
+@VisibleForTesting
+Configuration getConfiguration() {
+return toConfiguration(props);
+}
 }
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index 0099df3..ddcf2c7 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -40,6 +40,7 @@ import java.util.Set;
 import java.util.regex.Pattern;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
 
 /**
  * The @builder class for {@link KafkaSource} to make it easier for the users 
to construct a {@link
@@ -412,8 +413,12 @@ public class KafkaSourceBuilder {
 ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
 ByteArrayDeserializer.class.getName(),
 true);
-maybeOverride(
-ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new 
Random().nextLong(), false);
+if (!props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+LOG.warn(
+"Offset commit on checkpoint is disabled because {} is not 
specified",
+ConsumerConfig.GROUP_ID_CONFIG);
+
maybeOverride(KafkaSourceOptions.COMMIT_OFFSETS_ON_CHECKPOINT.key(), "false", 
false);
+}
 maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", 
false);
 maybeOverride(
 ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
@@ -426,10 +431,13 @@ public class KafkaSourceBuilder {
 "-1",
 boundedness == Boundedness.BOUNDED);
 
-// If the client id prefix is not set, reuse the consumer group id as 
the client id prefix.
+// If the client id prefix is not set, reuse the consumer group id as 
the client id prefix,
+// or generate a random string if consumer group id is not specified.
 maybeOverride(
 KafkaSourceOptions.CLIENT_ID_PREFIX.key(),
-props.getProperty(ConsumerConfig.GROUP_ID_CONFIG),
+props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)
+? props.getProperty(ConsumerConfig.GROUP_ID_CONFIG)
+: "KafkaSource-" + new Random().nextLong(),
 false);
 }
 
@@ -464,5 +472,24 @@ public class KafkaSourceBuilder {
 "No subscribe mode is specified, "
 + "should be one of topics, topic pattern and 
partition set.");
 checkNotNull(deserializationSchema, "Deserialization schema is 
required but not provided.")

[flink] 02/02: [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.12
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 7249e0d439206595de24c25c654ff204f4a2fde8
Author: Qingsheng Ren 
AuthorDate: Tue Sep 14 18:17:56 2021 +0800

[FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface 
for validating offset initializer in KafkaSourceBuilder

(cherry picked from commit 2da73edba95685537040305f30ee9d6dfd8d6c02)
---
 .../connector/kafka/source/KafkaSourceBuilder.java |  8 
 .../initializer/OffsetsInitializerValidator.java   | 39 
 .../ReaderHandledOffsetsInitializer.java   | 17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   | 22 -
 .../kafka/source/KafkaSourceBuilderTest.java   | 53 ++
 5 files changed, 137 insertions(+), 2 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index ddcf2c7..f3ab1cd 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -21,6 +21,7 @@ package org.apache.flink.connector.kafka.source;
 import org.apache.flink.api.connector.source.Boundedness;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.NoStoppingOffsetsInitializer;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
+import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializerValidator;
 import 
org.apache.flink.connector.kafka.source.enumerator.subscriber.KafkaSubscriber;
 import 
org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializer;
 
@@ -478,6 +479,13 @@ public class KafkaSourceBuilder {
 String.format(
 "Property %s is required when offset commit is 
enabled",
 ConsumerConfig.GROUP_ID_CONFIG));
+// Check offsets initializers
+if (startingOffsetsInitializer instanceof OffsetsInitializerValidator) 
{
+((OffsetsInitializerValidator) 
startingOffsetsInitializer).validate(props);
+}
+if (stoppingOffsetsInitializer instanceof OffsetsInitializerValidator) 
{
+((OffsetsInitializerValidator) 
stoppingOffsetsInitializer).validate(props);
+}
 }
 
 private boolean offsetCommitEnabledManually() {
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
new file mode 100644
index 000..c198107
--- /dev/null
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.connector.kafka.source.enumerator.initializer;
+
+import org.apache.flink.annotation.Internal;
+
+import java.util.Properties;
+
+/**
+ * Interface for validating {@link OffsetsInitializer} with properties from 
{@link
+ * org.apache.flink.connector.kafka.source.KafkaSource}.
+ */
+@Internal
+public interface OffsetsInitializerValidator {
+
+/**
+ * Validate offsets initializer with properties of Kafka source.
+ *
+ * @param kafkaSourceProperties Properties of Kafka source
+ * @throws IllegalStateException if validation fails
+ */
+void validate(Properties kafkaSourceProperties) throws 
IllegalStateException;
+}
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/ReaderHandledOffsetsInitializer.java
 
b/flink-connec

[flink] 02/02: [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit c7c34ed414dcd069bde0d8de36ff049d39f2a618
Author: Qingsheng Ren 
AuthorDate: Tue Sep 14 18:17:56 2021 +0800

[FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface 
for validating offset initializer in KafkaSourceBuilder

(cherry picked from commit 2da73edba95685537040305f30ee9d6dfd8d6c02)
---
 .../connector/kafka/source/KafkaSourceBuilder.java |  8 
 .../initializer/OffsetsInitializerValidator.java   | 39 
 .../ReaderHandledOffsetsInitializer.java   | 17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   | 22 -
 .../kafka/source/KafkaSourceBuilderTest.java   | 53 ++
 5 files changed, 137 insertions(+), 2 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index eb93683..d105cd8 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -22,6 +22,7 @@ import 
org.apache.flink.api.common.serialization.DeserializationSchema;
 import org.apache.flink.api.connector.source.Boundedness;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.NoStoppingOffsetsInitializer;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
+import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializerValidator;
 import 
org.apache.flink.connector.kafka.source.enumerator.subscriber.KafkaSubscriber;
 import 
org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
 
@@ -495,6 +496,13 @@ public class KafkaSourceBuilder {
 String.format(
 "Property %s is required when offset commit is 
enabled",
 ConsumerConfig.GROUP_ID_CONFIG));
+// Check offsets initializers
+if (startingOffsetsInitializer instanceof OffsetsInitializerValidator) 
{
+((OffsetsInitializerValidator) 
startingOffsetsInitializer).validate(props);
+}
+if (stoppingOffsetsInitializer instanceof OffsetsInitializerValidator) 
{
+((OffsetsInitializerValidator) 
stoppingOffsetsInitializer).validate(props);
+}
 }
 
 private boolean offsetCommitEnabledManually() {
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
new file mode 100644
index 000..c198107
--- /dev/null
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.connector.kafka.source.enumerator.initializer;
+
+import org.apache.flink.annotation.Internal;
+
+import java.util.Properties;
+
+/**
+ * Interface for validating {@link OffsetsInitializer} with properties from 
{@link
+ * org.apache.flink.connector.kafka.source.KafkaSource}.
+ */
+@Internal
+public interface OffsetsInitializerValidator {
+
+/**
+ * Validate offsets initializer with properties of Kafka source.
+ *
+ * @param kafkaSourceProperties Properties of Kafka source
+ * @throws IllegalStateException if validation fails
+ */
+void validate(Properties kafkaSourceProperties) throws 
IllegalStateException;
+}
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/ReaderHandledOff

[flink] branch release-1.13 updated (c9995a9 -> c7c34ed)

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git.


from c9995a9  [FLINK-24317][python][tests] Optimize the implementation of 
Top2 in test_flat_aggregate
 new f0bd873  [FLINK-24277][connector/kafka] Add configuration for 
committing offset on checkpoint and disable it if group ID is not specified
 new c7c34ed  [FLINK-24277][connector/kafka] Add 
OffsetsInitializerValidator interface for validating offset initializer in 
KafkaSourceBuilder

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/content/docs/connectors/datastream/kafka.md   |   1 +
 .../flink/connector/kafka/source/KafkaSource.java  |   5 +
 .../connector/kafka/source/KafkaSourceBuilder.java |  43 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   6 +
 .../initializer/OffsetsInitializerValidator.java   |  19 ++-
 .../ReaderHandledOffsetsInitializer.java   |  17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   |  22 ++-
 .../source/reader/KafkaPartitionSplitReader.java   |  24 ++--
 .../kafka/source/reader/KafkaSourceReader.java |  17 +++
 .../kafka/source/KafkaSourceBuilderTest.java   | 156 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  19 +++
 .../kafka/source/reader/KafkaSourceReaderTest.java |  44 +-
 12 files changed, 340 insertions(+), 33 deletions(-)
 copy 
flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java
 => 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 (61%)


[flink] 01/02: [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit f0bd873309c3b0a13edb5354912ffdb1169de5b4
Author: Qingsheng Ren 
AuthorDate: Tue Sep 14 15:22:00 2021 +0800

[FLINK-24277][connector/kafka] Add configuration for committing offset on 
checkpoint and disable it if group ID is not specified

(cherry picked from commit ca8bff231aed2412f579d0a4e446e9a6bee42581)
---
 docs/content/docs/connectors/datastream/kafka.md   |   1 +
 .../flink/connector/kafka/source/KafkaSource.java  |   5 +
 .../connector/kafka/source/KafkaSourceBuilder.java |  35 ++-
 .../connector/kafka/source/KafkaSourceOptions.java |   6 ++
 .../source/reader/KafkaPartitionSplitReader.java   |  24 ++---
 .../kafka/source/reader/KafkaSourceReader.java |  17 
 .../kafka/source/KafkaSourceBuilderTest.java   | 103 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  19 
 .../kafka/source/reader/KafkaSourceReaderTest.java |  44 -
 9 files changed, 233 insertions(+), 21 deletions(-)

diff --git a/docs/content/docs/connectors/datastream/kafka.md 
b/docs/content/docs/connectors/datastream/kafka.md
index 8fcd023..cab25dc 100644
--- a/docs/content/docs/connectors/datastream/kafka.md
+++ b/docs/content/docs/connectors/datastream/kafka.md
@@ -154,6 +154,7 @@ KafkaSource has following options for configuration:
   below for more details.
 - ```register.consumer.metrics``` specifies whether to register metrics of 
KafkaConsumer in Flink
 metric group
+- ```commit.offsets.on.checkpoint``` specifies whether to commit consuming 
offsets to Kafka brokers on checkpoint
 
 For configurations of KafkaConsumer, you can refer to
 http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka 
documentation
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
index af0013b..d1219c0 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
@@ -214,4 +214,9 @@ public class KafkaSource
 props.stringPropertyNames().forEach(key -> config.setString(key, 
props.getProperty(key)));
 return config;
 }
+
+@VisibleForTesting
+Configuration getConfiguration() {
+return toConfiguration(props);
+}
 }
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index cd286ed..eb93683 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -41,6 +41,7 @@ import java.util.Set;
 import java.util.regex.Pattern;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
 
 /**
  * The @builder class for {@link KafkaSource} to make it easier for the users 
to construct a {@link
@@ -429,8 +430,12 @@ public class KafkaSourceBuilder {
 ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
 ByteArrayDeserializer.class.getName(),
 true);
-maybeOverride(
-ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new 
Random().nextLong(), false);
+if (!props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+LOG.warn(
+"Offset commit on checkpoint is disabled because {} is not 
specified",
+ConsumerConfig.GROUP_ID_CONFIG);
+
maybeOverride(KafkaSourceOptions.COMMIT_OFFSETS_ON_CHECKPOINT.key(), "false", 
false);
+}
 maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", 
false);
 maybeOverride(
 ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
@@ -443,10 +448,13 @@ public class KafkaSourceBuilder {
 "-1",
 boundedness == Boundedness.BOUNDED);
 
-// If the client id prefix is not set, reuse the consumer group id as 
the client id prefix.
+// If the client id prefix is not set, reuse the consumer group id as 
the client id prefix,
+// or generate a random string if consumer group id is not specified.
 maybeOverride(
 KafkaSourceOptions.CLIENT_ID_PREFIX.key(),
-props.getProperty(ConsumerConfig.GROUP_ID_CONFIG),
+   

[flink] 02/03: [FLINK-24277][connector/kafka] Remove auto-generated group id in Kafka table source

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 984cfe9797562d578d54329e8660758d777d80ec
Author: Qingsheng Ren 
AuthorDate: Tue Sep 14 15:24:10 2021 +0800

[FLINK-24277][connector/kafka] Remove auto-generated group id in Kafka 
table source
---
 .../connectors/kafka/table/KafkaDynamicSource.java | 16 --
 .../kafka/source/KafkaSourceTestUtils.java |  6 
 .../kafka/table/KafkaDynamicTableFactoryTest.java  | 34 +-
 3 files changed, 39 insertions(+), 17 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java
index 8e12124..ab0fa13 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicSource.java
@@ -49,12 +49,9 @@ import org.apache.flink.table.types.DataType;
 import org.apache.flink.table.types.utils.DataTypeUtils;
 import org.apache.flink.util.Preconditions;
 
-import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.header.Header;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
 
@@ -77,8 +74,6 @@ import java.util.stream.Stream;
 public class KafkaDynamicSource
 implements ScanTableSource, SupportsReadingMetadata, 
SupportsWatermarkPushDown {
 
-private static final Logger LOG = 
LoggerFactory.getLogger(KafkaDynamicSource.class);
-
 // 

 // Mutable attributes
 // 

@@ -389,17 +384,6 @@ public class KafkaDynamicSource
 kafkaSourceBuilder.setTopicPattern(topicPattern);
 }
 
-// For compatibility with legacy source that is not validating group id
-if (!properties.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-String generatedGroupId = "KafkaSource-" + tableIdentifier;
-LOG.warn(
-"Property \"{}\" is required for offset commit but not set 
in table options. "
-+ "Assigning \"{}\" as consumer group id",
-ConsumerConfig.GROUP_ID_CONFIG,
-generatedGroupId);
-kafkaSourceBuilder.setGroupId(generatedGroupId);
-}
-
 switch (startupMode) {
 case EARLIEST:
 
kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.earliest());
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
index fce9591..572b77d 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java
@@ -19,6 +19,7 @@
 package org.apache.flink.connector.kafka.source;
 
 import org.apache.flink.api.connector.source.SourceReaderContext;
+import org.apache.flink.configuration.Configuration;
 import org.apache.flink.connector.kafka.source.reader.KafkaSourceReader;
 
 import java.util.Collection;
@@ -44,4 +45,9 @@ public class KafkaSourceTestUtils {
 return ((KafkaSourceReader)
 kafkaSource.createReader(sourceReaderContext, 
splitFinishedHook));
 }
+
+/** Get configuration of KafkaSource. */
+public static Configuration getKafkaSourceConfiguration(KafkaSource 
kafkaSource) {
+return kafkaSource.getConfiguration();
+}
 }
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java
index 01af4b0..a0cc3cf 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/streaming/connectors/kafka/table/KafkaDynamicTableFactoryTest.java
@@ -22,9 +22,13 @@ import 
org.apache.flink.api.c

[flink] branch release-1.14 updated (791c1b9 -> cc19997)

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 791c1b9  [FLINK-24300] SourceOperator#getAvailableFuture reuses future
 new 677caa8  [FLINK-24277][connector/kafka] Add configuration for 
committing offset on checkpoint and disable it if group ID is not specified
 new 984cfe9  [FLINK-24277][connector/kafka] Remove auto-generated group id 
in Kafka table source
 new cc19997  [FLINK-24277][connector/kafka] Add 
OffsetsInitializerValidator interface for validating offset initializer in 
KafkaSourceBuilder

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/content/docs/connectors/datastream/kafka.md   |   1 +
 .../flink/connector/kafka/source/KafkaSource.java  |   5 +
 .../connector/kafka/source/KafkaSourceBuilder.java |  43 ++-
 .../connector/kafka/source/KafkaSourceOptions.java |   6 +
 .../initializer/OffsetsInitializerValidator.java   |  19 ++-
 .../ReaderHandledOffsetsInitializer.java   |  17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   |  22 +++-
 .../source/reader/KafkaPartitionSplitReader.java   |  24 ++--
 .../kafka/source/reader/KafkaSourceReader.java |  17 +++
 .../connectors/kafka/table/KafkaDynamicSource.java |  16 ---
 .../kafka/source/KafkaSourceBuilderTest.java   | 140 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  21 
 .../kafka/source/KafkaSourceTestUtils.java |   6 +
 .../kafka/source/reader/KafkaSourceReaderTest.java |  44 ++-
 .../kafka/table/KafkaDynamicTableFactoryTest.java  |  34 -
 15 files changed, 365 insertions(+), 50 deletions(-)
 copy 
flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java
 => 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 (61%)


[flink] 01/03: [FLINK-24277][connector/kafka] Add configuration for committing offset on checkpoint and disable it if group ID is not specified

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 677caa8d97161403b9b090fad8fe91d576db7069
Author: Qingsheng Ren 
AuthorDate: Tue Sep 14 15:22:00 2021 +0800

[FLINK-24277][connector/kafka] Add configuration for committing offset on 
checkpoint and disable it if group ID is not specified
---
 docs/content/docs/connectors/datastream/kafka.md   |  1 +
 .../flink/connector/kafka/source/KafkaSource.java  |  5 ++
 .../connector/kafka/source/KafkaSourceBuilder.java | 35 -
 .../connector/kafka/source/KafkaSourceOptions.java |  6 ++
 .../source/reader/KafkaPartitionSplitReader.java   | 24 +++---
 .../kafka/source/reader/KafkaSourceReader.java | 17 +
 .../kafka/source/KafkaSourceBuilderTest.java   | 87 +-
 .../connector/kafka/source/KafkaSourceITCase.java  | 21 ++
 .../kafka/source/reader/KafkaSourceReaderTest.java | 44 ++-
 9 files changed, 219 insertions(+), 21 deletions(-)

diff --git a/docs/content/docs/connectors/datastream/kafka.md 
b/docs/content/docs/connectors/datastream/kafka.md
index a94d7bd..b614c10 100644
--- a/docs/content/docs/connectors/datastream/kafka.md
+++ b/docs/content/docs/connectors/datastream/kafka.md
@@ -154,6 +154,7 @@ KafkaSource has following options for configuration:
   below for more details.
 - ```register.consumer.metrics``` specifies whether to register metrics of 
KafkaConsumer in Flink
 metric group
+- ```commit.offsets.on.checkpoint``` specifies whether to commit consuming 
offsets to Kafka brokers on checkpoint
 
 For configurations of KafkaConsumer, you can refer to
 http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka 
documentation
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
index a5d89b9..9a05089 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
@@ -214,4 +214,9 @@ public class KafkaSource
 props.stringPropertyNames().forEach(key -> config.setString(key, 
props.getProperty(key)));
 return config;
 }
+
+@VisibleForTesting
+Configuration getConfiguration() {
+return toConfiguration(props);
+}
 }
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index cd286ed..eb93683 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -41,6 +41,7 @@ import java.util.Set;
 import java.util.regex.Pattern;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
 
 /**
  * The @builder class for {@link KafkaSource} to make it easier for the users 
to construct a {@link
@@ -429,8 +430,12 @@ public class KafkaSourceBuilder {
 ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
 ByteArrayDeserializer.class.getName(),
 true);
-maybeOverride(
-ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new 
Random().nextLong(), false);
+if (!props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+LOG.warn(
+"Offset commit on checkpoint is disabled because {} is not 
specified",
+ConsumerConfig.GROUP_ID_CONFIG);
+
maybeOverride(KafkaSourceOptions.COMMIT_OFFSETS_ON_CHECKPOINT.key(), "false", 
false);
+}
 maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", 
false);
 maybeOverride(
 ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
@@ -443,10 +448,13 @@ public class KafkaSourceBuilder {
 "-1",
 boundedness == Boundedness.BOUNDED);
 
-// If the client id prefix is not set, reuse the consumer group id as 
the client id prefix.
+// If the client id prefix is not set, reuse the consumer group id as 
the client id prefix,
+// or generate a random string if consumer group id is not specified.
 maybeOverride(
 KafkaSourceOptions.CLIENT_ID_PREFIX.key(),
-props.getProperty(ConsumerConfig.GROUP_ID_CONFIG),
+props.containsKey(ConsumerConfig.GROUP_ID_CONFIG)
+ 

[flink] 03/03: [FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface for validating offset initializer in KafkaSourceBuilder

2021-09-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git

commit cc19997d6124e0b4f8c905601a3c98b328014f1d
Author: Qingsheng Ren 
AuthorDate: Tue Sep 14 18:17:56 2021 +0800

[FLINK-24277][connector/kafka] Add OffsetsInitializerValidator interface 
for validating offset initializer in KafkaSourceBuilder
---
 .../connector/kafka/source/KafkaSourceBuilder.java |  8 
 .../initializer/OffsetsInitializerValidator.java   | 39 +++
 .../ReaderHandledOffsetsInitializer.java   | 17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   | 22 -
 .../kafka/source/KafkaSourceBuilderTest.java   | 55 +-
 5 files changed, 138 insertions(+), 3 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index eb93683..d105cd8 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -22,6 +22,7 @@ import 
org.apache.flink.api.common.serialization.DeserializationSchema;
 import org.apache.flink.api.connector.source.Boundedness;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.NoStoppingOffsetsInitializer;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
+import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializerValidator;
 import 
org.apache.flink.connector.kafka.source.enumerator.subscriber.KafkaSubscriber;
 import 
org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
 
@@ -495,6 +496,13 @@ public class KafkaSourceBuilder {
 String.format(
 "Property %s is required when offset commit is 
enabled",
 ConsumerConfig.GROUP_ID_CONFIG));
+// Check offsets initializers
+if (startingOffsetsInitializer instanceof OffsetsInitializerValidator) 
{
+((OffsetsInitializerValidator) 
startingOffsetsInitializer).validate(props);
+}
+if (stoppingOffsetsInitializer instanceof OffsetsInitializerValidator) 
{
+((OffsetsInitializerValidator) 
stoppingOffsetsInitializer).validate(props);
+}
 }
 
 private boolean offsetCommitEnabledManually() {
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
new file mode 100644
index 000..c198107
--- /dev/null
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.connector.kafka.source.enumerator.initializer;
+
+import org.apache.flink.annotation.Internal;
+
+import java.util.Properties;
+
+/**
+ * Interface for validating {@link OffsetsInitializer} with properties from 
{@link
+ * org.apache.flink.connector.kafka.source.KafkaSource}.
+ */
+@Internal
+public interface OffsetsInitializerValidator {
+
+/**
+ * Validate offsets initializer with properties of Kafka source.
+ *
+ * @param kafkaSourceProperties Properties of Kafka source
+ * @throws IllegalStateException if validation fails
+ */
+void validate(Properties kafkaSourceProperties) throws 
IllegalStateException;
+}
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/ReaderHandledOffsetsInitializer.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/or

[flink] branch master updated (6f07196 -> 2da73ed)

2021-09-15 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 6f07196  [FLINK-24217][docs-zh] Translate "LOAD Statements" page of 
"SQL" into Chinese (#17221)
 add ca8bff2  [FLINK-24277][connector/kafka] Add configuration for 
committing offset on checkpoint and disable it if group ID is not specified
 add f3ef860  [FLINK-24277][connector/kafka] Remove auto-generated group id 
in Kafka table source
 add 2da73ed  [FLINK-24277][connector/kafka] Add 
OffsetsInitializerValidator interface for validating offset initializer in 
KafkaSourceBuilder

No new revisions were added by this update.

Summary of changes:
 docs/content/docs/connectors/datastream/kafka.md   |   1 +
 .../flink/connector/kafka/source/KafkaSource.java  |   5 +
 .../connector/kafka/source/KafkaSourceBuilder.java |  43 ++-
 .../connector/kafka/source/KafkaSourceOptions.java |   6 +
 .../initializer/OffsetsInitializerValidator.java   |  19 ++-
 .../ReaderHandledOffsetsInitializer.java   |  17 ++-
 .../initializer/SpecifiedOffsetsInitializer.java   |  22 +++-
 .../source/reader/KafkaPartitionSplitReader.java   |  24 ++--
 .../kafka/source/reader/KafkaSourceReader.java |  17 +++
 .../connectors/kafka/table/KafkaDynamicSource.java |  16 ---
 .../kafka/source/KafkaSourceBuilderTest.java   | 140 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  21 
 .../kafka/source/KafkaSourceTestUtils.java |   6 +
 .../kafka/source/reader/KafkaSourceReaderTest.java |  44 ++-
 .../kafka/table/KafkaDynamicTableFactoryTest.java  |  34 -
 15 files changed, 365 insertions(+), 50 deletions(-)
 copy 
flink-queryable-state/flink-queryable-state-client-java/src/main/java/org/apache/flink/queryablestate/network/messages/MessageDeserializer.java
 => 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerValidator.java
 (61%)


[flink] branch release-1.14 updated: [FLINK-24059][Connectors/Common][test] Allow SourceReaderTestBase.NUM_SPLITS to be overridden (#17064)

2021-09-15 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.14 by this push:
 new 8d148a8  [FLINK-24059][Connectors/Common][test] Allow 
SourceReaderTestBase.NUM_SPLITS to be overridden (#17064)
8d148a8 is described below

commit 8d148a8b7832fcefefa4818de8e700562f0ffd26
Author: Brian Zhou 
AuthorDate: Thu Sep 9 11:33:41 2021 +0800

[FLINK-24059][Connectors/Common][test] Allow 
SourceReaderTestBase.NUM_SPLITS to be overridden (#17064)
---
 .../kafka/source/reader/KafkaSourceReaderTest.java | 15 +---
 .../source/reader/SourceReaderTestBase.java| 43 +-
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
index 58b3fdd..f4e3fbd 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
@@ -76,6 +76,7 @@ import static 
org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderM
 import static 
org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP;
 import static 
org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderMetrics.PARTITION_GROUP;
 import static 
org.apache.flink.connector.kafka.source.metrics.KafkaSourceReaderMetrics.TOPIC_GROUP;
+import static 
org.apache.flink.connector.kafka.source.testutils.KafkaSourceTestEnv.NUM_PARTITIONS;
 import static org.apache.flink.core.testutils.CommonTestUtils.waitUtil;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -89,7 +90,7 @@ public class KafkaSourceReaderTest extends 
SourceReaderTestBase)
 createReader(Boundedness.CONTINUOUS_UNBOUNDED, 
groupId)) {
 reader.addSplits(
-getSplits(NUM_SPLITS, NUM_RECORDS_PER_SPLIT, 
Boundedness.CONTINUOUS_UNBOUNDED));
+getSplits(numSplits, NUM_RECORDS_PER_SPLIT, 
Boundedness.CONTINUOUS_UNBOUNDED));
 ValidatingSourceOutput output = new ValidatingSourceOutput();
 long checkpointId = 0;
 do {
@@ -204,7 +209,7 @@ public class KafkaSourceReaderTest extends 
SourceReaderTestBase
 assertEquals(NUM_RECORDS_PER_SPLIT, 
offsetAndMetadata.offset()));
@@ -480,7 +485,7 @@ public class KafkaSourceReaderTest extends 
SourceReaderTestBase> getRecords() {
 List> records = new ArrayList<>();
-for (int part = 0; part < NUM_SPLITS; part++) {
+for (int part = 0; part < NUM_PARTITIONS; part++) {
 for (int i = 0; i < NUM_RECORDS_PER_SPLIT; i++) {
 records.add(
 new ProducerRecord<>(
diff --git 
a/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java
 
b/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java
index c109aae..462c5b2 100644
--- 
a/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java
+++ 
b/flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/reader/SourceReaderTestBase.java
@@ -49,9 +49,18 @@ import static org.junit.Assert.assertFalse;
  */
 public abstract class SourceReaderTestBase extends 
TestLogger {
 
-protected static final int NUM_SPLITS = 10;
+protected final int numSplits;
+protected final int totalNumRecords;
 protected static final int NUM_RECORDS_PER_SPLIT = 10;
-protected static final int TOTAL_NUM_RECORDS = NUM_RECORDS_PER_SPLIT * 
NUM_SPLITS;
+
+public SourceReaderTestBase() {
+this.numSplits = getNumSplits();
+this.totalNumRecords = this.numSplits * NUM_RECORDS_PER_SPLIT;
+}
+
+protected int getNumSplits() {
+return 10;
+}
 
 @Rule public ExpectedException expectedException = 
ExpectedException.none();
 
@@ -68,9 +77,9 @@ public abstract class SourceReaderTestBase extends T
 @Test
 public void testRead() throws Exception {
 try (SourceReader reader = createReader()) {
-reader.addSplits(getSplits(NUM_SPLITS, NUM_RECORDS_PER_SPLIT, 
Boundedness.BOUNDED));
+reader.addSplits(getSplits(numSplits, NUM_RECORDS_PER_SPLIT, 
Boundedness.BOUNDED));
 Validati

[flink] branch release-1.14 updated (da82cb1 -> b26f7e7)

2021-09-13 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git.


from da82cb1  [FLINK-24155][documentation] Sync Chinese version of 
documentation to configure CheckpointFailureManager (#17173) (#17252)
 add 52a26c8  [hotfix][connector/common] Expose splitFinishedHook in 
SplitFetcher for fine-grained testing
 add b26f7e7  [FLINK-23773][connector/kafka] Mark empty splits as finished 
to cleanup states in SplitFetcher

No new revisions were added by this update.

Summary of changes:
 .../SingleThreadMultiplexSourceReaderBase.java |  3 +-
 .../reader/fetcher/SingleThreadFetcherManager.java | 21 +++
 .../base/source/reader/fetcher/SplitFetcher.java   |  6 +-
 .../source/reader/fetcher/SplitFetcherManager.java | 26 -
 .../source/reader/fetcher/SplitFetcherTest.java|  6 +-
 .../flink/connector/kafka/source/KafkaSource.java  | 14 -
 .../source/reader/KafkaPartitionSplitReader.java   | 31 +--
 .../kafka/source/reader/KafkaSourceReader.java | 10 +---
 .../reader/fetcher/KafkaSourceFetcherManager.java  |  8 ++-
 .../kafka/source/KafkaSourceTestUtils.java | 47 
 .../reader/KafkaPartitionSplitReaderTest.java  | 33 +++
 .../kafka/source/reader/KafkaSourceReaderTest.java | 64 +++---
 12 files changed, 237 insertions(+), 32 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java


[flink] 02/02: [FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup states in SplitFetcher

2021-09-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 763ac52092ba70dfef989d18b711400b437e6e09
Author: Qingsheng Ren 
AuthorDate: Mon Aug 23 11:26:44 2021 +0800

[FLINK-23773][connector/kafka] Mark empty splits as finished to cleanup 
states in SplitFetcher

(cherry picked from commit fe17ca6042c570ce603bf4308775f61db1d515c9)
---
 .../source/reader/KafkaPartitionSplitReader.java   | 31 ++---
 .../reader/KafkaPartitionSplitReaderTest.java  | 34 +++
 .../kafka/source/reader/KafkaSourceReaderTest.java | 39 ++
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
index e5e1425..e13e10e 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
@@ -56,6 +56,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 import java.util.StringJoiner;
+import java.util.stream.Collectors;
 
 /**
  * A {@link SplitReader} implementation that reads records from Kafka 
partitions.
@@ -77,6 +78,9 @@ public class KafkaPartitionSplitReader
 private final int subtaskId;
 private final KafkaSourceReaderMetrics kafkaSourceReaderMetrics;
 
+// Tracking empty splits that has not been added to finished splits in 
fetch()
+private final Set emptySplits = new HashSet<>();
+
 public KafkaPartitionSplitReader(
 Properties props,
 KafkaRecordDeserializationSchema deserializationSchema,
@@ -174,6 +178,14 @@ public class KafkaPartitionSplitReader
 tp, 
recordsFromPartition.get(recordsFromPartition.size() - 1).offset());
 }
 }
+
+// Some splits are discovered as empty when handling split additions. 
These splits should be
+// added to finished splits to clean up states in split fetcher and 
source reader.
+if (!emptySplits.isEmpty()) {
+recordsBySplits.finishedSplits.addAll(emptySplits);
+emptySplits.clear();
+}
+
 // Unassign the partitions that has finished.
 if (!finishedPartitions.isEmpty()) {
 unassignPartitions(finishedPartitions);
@@ -342,15 +354,24 @@ public class KafkaPartitionSplitReader
 }
 
 private void removeEmptySplits() {
-List emptySplits = new ArrayList<>();
+List emptyPartitions = new ArrayList<>();
 // If none of the partitions have any records,
 for (TopicPartition tp : consumer.assignment()) {
 if (consumer.position(tp) >= getStoppingOffset(tp)) {
-emptySplits.add(tp);
+emptyPartitions.add(tp);
 }
 }
-if (!emptySplits.isEmpty()) {
-unassignPartitions(emptySplits);
+if (!emptyPartitions.isEmpty()) {
+LOG.debug(
+"These assigning splits are empty and will be marked as 
finished in later fetch: {}",
+emptyPartitions);
+// Add empty partitions to empty split set for later cleanup in 
fetch()
+emptySplits.addAll(
+emptyPartitions.stream()
+.map(KafkaPartitionSplit::toSplitId)
+.collect(Collectors.toSet()));
+// Un-assign partitions from Kafka consumer
+unassignPartitions(emptyPartitions);
 }
 }
 
@@ -366,7 +387,7 @@ public class KafkaPartitionSplitReader
 "[%s, start:%d, stop: %d]",
 split.getTopicPartition(), startingOffset, 
stoppingOffset));
 }
-LOG.debug("SplitsChange handling result: {}", 
splitsInfo.toString());
+LOG.debug("SplitsChange handling result: {}", splitsInfo);
 }
 }
 
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReaderTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReaderTest.java
index 2a477dd..d64225f 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReaderTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reade

[flink] branch release-1.13 updated (1af7731 -> 763ac52)

2021-09-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 1af7731  [FLINK-22971][tests] Bump testcontainers to 1.16.0
 new fdc2fb2  [hotfix][connector/common] Expose splitFinishedHook in 
SplitFetcher for fine-grained testing
 new 763ac52  [FLINK-23773][connector/kafka] Mark empty splits as finished 
to cleanup states in SplitFetcher

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../SingleThreadMultiplexSourceReaderBase.java |  3 +-
 .../reader/fetcher/SingleThreadFetcherManager.java | 21 +++
 .../base/source/reader/fetcher/SplitFetcher.java   |  6 +-
 .../source/reader/fetcher/SplitFetcherManager.java | 26 -
 .../source/reader/fetcher/SplitFetcherTest.java|  6 +-
 .../flink/connector/kafka/source/KafkaSource.java  | 14 -
 .../source/reader/KafkaPartitionSplitReader.java   | 31 --
 .../kafka/source/reader/KafkaSourceReader.java | 10 +---
 .../reader/fetcher/KafkaSourceFetcherManager.java  |  8 ++-
 .../kafka/source/KafkaSourceTestUtils.java | 47 +++
 .../reader/KafkaPartitionSplitReaderTest.java  | 34 +++
 .../kafka/source/reader/KafkaSourceReaderTest.java | 66 +++---
 12 files changed, 240 insertions(+), 32 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java


[flink] 01/02: [hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for fine-grained testing

2021-09-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit fdc2fb26622254b57f37fcfd405db2cbe0c71213
Author: Qingsheng Ren 
AuthorDate: Mon Aug 23 12:13:32 2021 +0800

[hotfix][connector/common] Expose splitFinishedHook in SplitFetcher for 
fine-grained testing

(cherry picked from commit 754b744c2a3d4a15c2197a2df4f92c19bd3e33b7)
---
 .../SingleThreadMultiplexSourceReaderBase.java |  3 +-
 .../reader/fetcher/SingleThreadFetcherManager.java | 21 ++
 .../base/source/reader/fetcher/SplitFetcher.java   |  6 ++-
 .../source/reader/fetcher/SplitFetcherManager.java | 26 +++-
 .../source/reader/fetcher/SplitFetcherTest.java|  6 ++-
 .../flink/connector/kafka/source/KafkaSource.java  | 14 ++-
 .../kafka/source/reader/KafkaSourceReader.java | 10 +
 .../reader/fetcher/KafkaSourceFetcherManager.java  |  8 +++-
 .../kafka/source/KafkaSourceTestUtils.java | 47 ++
 .../kafka/source/reader/KafkaSourceReaderTest.java | 27 -
 10 files changed, 141 insertions(+), 27 deletions(-)

diff --git 
a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java
 
b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java
index 377a72d..e3b8d43 100644
--- 
a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java
+++ 
b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/SingleThreadMultiplexSourceReaderBase.java
@@ -26,7 +26,6 @@ import 
org.apache.flink.connector.base.source.reader.fetcher.SingleThreadFetcher
 import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import 
org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
 
-import java.util.Collection;
 import java.util.function.Supplier;
 
 /**
@@ -47,7 +46,7 @@ import java.util.function.Supplier;
  *   The class must override the methods to convert back and forth between 
the immutable splits
  *   ({@code SplitT}) and the mutable split state representation ({@code 
SplitStateT}).
  *   Finally, the reader must decide what to do when it starts ({@link 
#start()}) or when a
- *   split is finished ({@link #onSplitFinished(Collection)}).
+ *   split is finished ({@link #onSplitFinished(java.util.Map)}).
  * 
  *
  * @param  The type of the records (the raw type that typically contains 
checkpointing
diff --git 
a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java
 
b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java
index 91d0d4d..2abed2e 100644
--- 
a/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java
+++ 
b/flink-connectors/flink-connector-base/src/main/java/org/apache/flink/connector/base/source/reader/fetcher/SingleThreadFetcherManager.java
@@ -18,13 +18,16 @@
 
 package org.apache.flink.connector.base.source.reader.fetcher;
 
+import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.api.connector.source.SourceSplit;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.SourceReaderBase;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import 
org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
 
+import java.util.Collection;
 import java.util.List;
+import java.util.function.Consumer;
 import java.util.function.Supplier;
 
 /**
@@ -54,6 +57,24 @@ public class SingleThreadFetcherManager
 super(elementsQueue, splitReaderSupplier);
 }
 
+/**
+ * Creates a new SplitFetcherManager with a single I/O threads.
+ *
+ * @param elementsQueue The queue that is used to hand over data from the 
I/O thread (the
+ * fetchers) to the reader (which emits the records and book-keeps the 
state. This must be
+ * the same queue instance that is also passed to the {@link 
SourceReaderBase}.
+ * @param splitReaderSupplier The factory for the split reader that 
connects to the source
+ * system.
+ * @param splitFinishedHook Hook for handling finished splits in split 
fetchers
+ */
+@VisibleForTesting
+public SingleThreadFetcherManager(
+FutureCompletingBlockingQueue> 
elementsQueue,
+Supplier> splitReaderSupplier,
+Consumer> splitFinishedHook) {
+super(elementsQueue, splitReade

[flink] branch master updated (125cb70 -> d4c483f)

2021-09-08 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 125cb70  [FLINK-24196][docs-zh] Translate "EXPLAIN Statements" page of 
"SQL" into Chinese (#17195)
 add d4c483f  [FLINK-24059][Connectors/Common][test] Allow 
SourceReaderTestBase.NUM_SPLITS to be overridden (#17064)

No new revisions were added by this update.

Summary of changes:
 .../kafka/source/reader/KafkaSourceReaderTest.java | 15 +---
 .../source/reader/SourceReaderTestBase.java| 43 +-
 2 files changed, 36 insertions(+), 22 deletions(-)


[flink] branch master updated (5b47a81 -> fe17ca6)

2021-09-06 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 5b47a81  [FLINK-24090][docs] Added Troubleshooting section with 
ignoring in-flight data explanation into unaligned checkpoints page
 add 754b744  [hotfix][connector/common] Expose splitFinishedHook in 
SplitFetcher for fine-grained testing
 add fe17ca6  [FLINK-23773][connector/kafka] Mark empty splits as finished 
to cleanup states in SplitFetcher

No new revisions were added by this update.

Summary of changes:
 .../SingleThreadMultiplexSourceReaderBase.java |  3 +-
 .../reader/fetcher/SingleThreadFetcherManager.java | 21 +++
 .../base/source/reader/fetcher/SplitFetcher.java   |  6 +-
 .../source/reader/fetcher/SplitFetcherManager.java | 26 -
 .../source/reader/fetcher/SplitFetcherTest.java|  6 +-
 .../flink/connector/kafka/source/KafkaSource.java  | 14 -
 .../source/reader/KafkaPartitionSplitReader.java   | 31 +--
 .../kafka/source/reader/KafkaSourceReader.java | 10 +---
 .../reader/fetcher/KafkaSourceFetcherManager.java  |  8 ++-
 .../kafka/source/KafkaSourceTestUtils.java | 47 
 .../reader/KafkaPartitionSplitReaderTest.java  | 33 +++
 .../kafka/source/reader/KafkaSourceReaderTest.java | 64 +++---
 12 files changed, 237 insertions(+), 32 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceTestUtils.java


[flink] branch release-1.14 updated: [FLINK-23971][tests] fix connector testing framework error when compare records in different splits

2021-09-01 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.14
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.14 by this push:
 new 267b863  [FLINK-23971][tests] fix connector testing framework error 
when compare records in different splits
267b863 is described below

commit 267b863683b23b8b3df29bee55ac58a25ca1fcd0
Author: Hang Ruan 
AuthorDate: Tue Aug 31 15:53:28 2021 +0800

[FLINK-23971][tests] fix connector testing framework error when compare 
records in different splits

Add split index parameter to generate test data, make sure T.equals(object) 
return false when records come from differernt splits.
---
 .../testutils/KafkaSingleTopicExternalContext.java  |  8 
 .../pulsar/testutils/PulsarTestContext.java |  4 ++--
 .../cases/MultipleTopicConsumingContext.java|  4 ++--
 .../cases/SingleTopicConsumingContext.java  |  4 ++--
 .../test/common/external/ExternalContext.java   |  6 +-
 .../test/common/testsuites/SourceTestSuiteBase.java | 21 -
 6 files changed, 27 insertions(+), 20 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
index 81240cf..ad5e31d 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
@@ -155,7 +155,7 @@ public class KafkaSingleTopicExternalContext implements 
ExternalContext
 }
 
 @Override
-public Collection generateTestData(long seed) {
+public Collection generateTestData(int splitIndex, long seed) {
 Random random = new Random(seed);
 List randomStringRecords = new ArrayList<>();
 int recordNum =
@@ -163,15 +163,15 @@ public class KafkaSingleTopicExternalContext implements 
ExternalContext
 + NUM_RECORDS_LOWER_BOUND;
 for (int i = 0; i < recordNum; i++) {
 int stringLength = random.nextInt(50) + 1;
-randomStringRecords.add(generateRandomString(stringLength, 
random));
+randomStringRecords.add(generateRandomString(splitIndex, 
stringLength, random));
 }
 return randomStringRecords;
 }
 
-private String generateRandomString(int length, Random random) {
+private String generateRandomString(int splitIndex, int length, Random 
random) {
 String alphaNumericString =
 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + 
"0123456789";
-StringBuilder sb = new StringBuilder();
+StringBuilder sb = new StringBuilder().append(splitIndex).append("-");
 for (int i = 0; i < length; ++i) {
 
sb.append(alphaNumericString.charAt(random.nextInt(alphaNumericString.length(;
 }
diff --git 
a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
 
b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
index 6733439..a80d721 100644
--- 
a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
+++ 
b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
@@ -43,7 +43,7 @@ public abstract class PulsarTestContext implements 
ExternalContext {
 
 // Helper methods for generating data.
 
-protected List generateStringTestData(long seed) {
+protected List generateStringTestData(int splitIndex, long seed) {
 Random random = new Random(seed);
 int recordNum =
 random.nextInt(NUM_RECORDS_UPPER_BOUND - 
NUM_RECORDS_LOWER_BOUND)
@@ -52,7 +52,7 @@ public abstract class PulsarTestContext implements 
ExternalContext {
 
 for (int i = 0; i < recordNum; i++) {
 int stringLength = random.nextInt(50) + 1;
-records.add(randomAlphanumeric(stringLength));
+records.add(splitIndex + "-" + randomAlphanumeric(stringLength));
 }
 
 return records;
diff --git 
a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java
 
b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java
index 60a0bfba..7ce676c 100644
--- 
a/flink-connector

[flink] branch master updated: [FLINK-23971][tests] fix connector testing framework error when compare records in different splits

2021-09-01 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new ad052cc  [FLINK-23971][tests] fix connector testing framework error 
when compare records in different splits
ad052cc is described below

commit ad052cc056c7d6e63d8356dbd22d6a98b54743c3
Author: Hang Ruan 
AuthorDate: Tue Aug 31 15:53:28 2021 +0800

[FLINK-23971][tests] fix connector testing framework error when compare 
records in different splits

Add split index parameter to generate test data, make sure T.equals(object) 
return false when records come from differernt splits.
---
 .../testutils/KafkaSingleTopicExternalContext.java  |  8 
 .../pulsar/testutils/PulsarTestContext.java |  4 ++--
 .../cases/MultipleTopicConsumingContext.java|  4 ++--
 .../cases/SingleTopicConsumingContext.java  |  4 ++--
 .../test/common/external/ExternalContext.java   |  6 +-
 .../test/common/testsuites/SourceTestSuiteBase.java | 21 -
 6 files changed, 27 insertions(+), 20 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
index 81240cf..ad5e31d 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/testutils/KafkaSingleTopicExternalContext.java
@@ -155,7 +155,7 @@ public class KafkaSingleTopicExternalContext implements 
ExternalContext
 }
 
 @Override
-public Collection generateTestData(long seed) {
+public Collection generateTestData(int splitIndex, long seed) {
 Random random = new Random(seed);
 List randomStringRecords = new ArrayList<>();
 int recordNum =
@@ -163,15 +163,15 @@ public class KafkaSingleTopicExternalContext implements 
ExternalContext
 + NUM_RECORDS_LOWER_BOUND;
 for (int i = 0; i < recordNum; i++) {
 int stringLength = random.nextInt(50) + 1;
-randomStringRecords.add(generateRandomString(stringLength, 
random));
+randomStringRecords.add(generateRandomString(splitIndex, 
stringLength, random));
 }
 return randomStringRecords;
 }
 
-private String generateRandomString(int length, Random random) {
+private String generateRandomString(int splitIndex, int length, Random 
random) {
 String alphaNumericString =
 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + 
"0123456789";
-StringBuilder sb = new StringBuilder();
+StringBuilder sb = new StringBuilder().append(splitIndex).append("-");
 for (int i = 0; i < length; ++i) {
 
sb.append(alphaNumericString.charAt(random.nextInt(alphaNumericString.length(;
 }
diff --git 
a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
 
b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
index 6733439..a80d721 100644
--- 
a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
+++ 
b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/PulsarTestContext.java
@@ -43,7 +43,7 @@ public abstract class PulsarTestContext implements 
ExternalContext {
 
 // Helper methods for generating data.
 
-protected List generateStringTestData(long seed) {
+protected List generateStringTestData(int splitIndex, long seed) {
 Random random = new Random(seed);
 int recordNum =
 random.nextInt(NUM_RECORDS_UPPER_BOUND - 
NUM_RECORDS_LOWER_BOUND)
@@ -52,7 +52,7 @@ public abstract class PulsarTestContext implements 
ExternalContext {
 
 for (int i = 0; i < recordNum; i++) {
 int stringLength = random.nextInt(50) + 1;
-records.add(randomAlphanumeric(stringLength));
+records.add(splitIndex + "-" + randomAlphanumeric(stringLength));
 }
 
 return records;
diff --git 
a/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java
 
b/flink-connectors/flink-connector-pulsar/src/test/java/org/apache/flink/connector/pulsar/testutils/cases/MultipleTopicConsumingContext.java
index 60a0bfba..7ce676c 100644
--- 
a/flink-connectors/flink

[flink] branch release-1.13 updated: [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition

2021-08-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.13 by this push:
 new 5ce61a3  [FLINK-23686][connector/kafka] Increase counter 
"commitsSucceeded" per commit instead of per partition
5ce61a3 is described below

commit 5ce61a31ff8a184ce3f8457471ffc6f5f4439b5d
Author: Qingsheng Ren 
AuthorDate: Sat Aug 14 19:03:24 2021 +0800

[FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per 
commit instead of per partition
---
 .../source/metrics/KafkaSourceReaderMetrics.java   |  6 -
 .../kafka/source/reader/KafkaSourceReader.java |  1 +
 .../metrics/KafkaSourceReaderMetricsTest.java  | 12 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java | 27 ++
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
index df1abe5..356409f 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
@@ -137,10 +137,14 @@ public class KafkaSourceReaderMetrics {
  */
 public void recordCommittedOffset(TopicPartition tp, long offset) {
 checkTopicPartitionTracked(tp);
-commitsSucceeded.inc();
 offsets.get(tp).committedOffset = offset;
 }
 
+/** Mark a successful commit. */
+public void recordSucceededCommit() {
+commitsSucceeded.inc();
+}
+
 /** Mark a failure commit. */
 public void recordFailedCommit() {
 commitsFailed.inc();
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
index 3a00be5..287dadf 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
@@ -130,6 +130,7 @@ public class KafkaSourceReader
 LOG.debug(
 "Successfully committed offsets for 
checkpoint {}",
 checkpointId);
+
kafkaSourceReaderMetrics.recordSucceededCommit();
 // If the finished topic partition has been 
committed, we remove it
 // from the offsets of the finished splits map.
 Map 
committedPartitions =
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
index c5dfdf3..1e66747 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
@@ -82,7 +82,17 @@ public class KafkaSourceReaderMetricsTest {
 assertCommittedOffset(BAR_1, 15513L, metricListener);
 
 assertEquals(
-4L,
+0L,
+metricListener
+.getCounter(
+
KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP,
+
KafkaSourceReaderMetrics.COMMITS_SUCCEEDED_METRIC_COUNTER)
+.getCount());
+
+kafkaSourceReaderMetrics.recordSucceededCommit();
+
+assertEquals(
+1L,
 metricListener
 .getCounter(
 
KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP,
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
index 18a024b..53f61e4 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java

[flink] branch master updated (5289b0e -> 83b9ee8)

2021-08-26 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 5289b0e  [FLINK-23818][python][docs] Add documentation about tgz files 
for python archives
 add 83b9ee8  [FLINK-22702][tests] Add test data supplier which provide 
null timestamp field to kafka connector tests

No new revisions were added by this update.

Summary of changes:
 .../kafka/source/split/KafkaPartitionSplit.java|  4 +--
 .../connector/kafka/source/KafkaSourceITCase.java  | 18 +++
 .../source/enumerator/KafkaEnumeratorTest.java |  4 +--
 .../initializer/OffsetsInitializerTest.java|  4 +--
 .../reader/KafkaPartitionSplitReaderTest.java  |  4 +--
 .../kafka/source/testutils/KafkaSourceTestEnv.java | 37 --
 6 files changed, 55 insertions(+), 16 deletions(-)


[flink] branch master updated: [FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per commit instead of per partition

2021-08-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new 62931a1  [FLINK-23686][connector/kafka] Increase counter 
"commitsSucceeded" per commit instead of per partition
62931a1 is described below

commit 62931a1665e6a6976d088ed49375f9fdf00229d9
Author: Qingsheng Ren 
AuthorDate: Sat Aug 14 19:03:24 2021 +0800

[FLINK-23686][connector/kafka] Increase counter "commitsSucceeded" per 
commit instead of per partition
---
 .../source/metrics/KafkaSourceReaderMetrics.java   |  6 -
 .../kafka/source/reader/KafkaSourceReader.java |  1 +
 .../metrics/KafkaSourceReaderMetricsTest.java  | 12 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java | 27 ++
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
index df1abe5..356409f 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
@@ -137,10 +137,14 @@ public class KafkaSourceReaderMetrics {
  */
 public void recordCommittedOffset(TopicPartition tp, long offset) {
 checkTopicPartitionTracked(tp);
-commitsSucceeded.inc();
 offsets.get(tp).committedOffset = offset;
 }
 
+/** Mark a successful commit. */
+public void recordSucceededCommit() {
+commitsSucceeded.inc();
+}
+
 /** Mark a failure commit. */
 public void recordFailedCommit() {
 commitsFailed.inc();
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
index 3a00be5..287dadf 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
@@ -130,6 +130,7 @@ public class KafkaSourceReader
 LOG.debug(
 "Successfully committed offsets for 
checkpoint {}",
 checkpointId);
+
kafkaSourceReaderMetrics.recordSucceededCommit();
 // If the finished topic partition has been 
committed, we remove it
 // from the offsets of the finished splits map.
 Map 
committedPartitions =
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
index b16a835..c7df9b6 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
@@ -83,7 +83,17 @@ public class KafkaSourceReaderMetricsTest {
 assertCommittedOffset(BAR_1, 15513L, metricListener);
 
 assertEquals(
-4L,
+0L,
+metricListener
+.getCounter(
+
KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP,
+
KafkaSourceReaderMetrics.COMMITS_SUCCEEDED_METRIC_COUNTER)
+.getCount());
+
+kafkaSourceReaderMetrics.recordSucceededCommit();
+
+assertEquals(
+1L,
 metricListener
 .getCounter(
 
KafkaSourceReaderMetrics.KAFKA_SOURCE_READER_METRIC_GROUP,
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
index 96f0bed..16e3d6a 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
+++ 
b/flink-co

[flink] 03/03: [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group

2021-07-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 2c455f324b9ec7ef053253cf4904413b1e5f7a98
Author: Qingsheng Ren 
AuthorDate: Tue Jun 8 11:57:52 2021 +0800

[FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in 
Flink metric group

(cherry picked from commit b094a932845db5539fc07b032d49d0bcefd15df2)
---
 docs/content/docs/connectors/datastream/kafka.md   |  36 
 .../flink/connector/kafka/source/KafkaSource.java  |  11 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   9 +-
 .../source/metrics/KafkaSourceReaderMetrics.java   | 183 +
 .../source/reader/KafkaPartitionSplitReader.java   |  33 +++-
 .../kafka/source/reader/KafkaSourceReader.java |  11 +-
 .../metrics/KafkaSourceReaderMetricsTest.java  | 166 +++
 .../reader/KafkaPartitionSplitReaderTest.java  |   8 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +-
 .../source/reader/TestingReaderContext.java|   7 +-
 10 files changed, 575 insertions(+), 11 deletions(-)

diff --git a/docs/content/docs/connectors/datastream/kafka.md 
b/docs/content/docs/connectors/datastream/kafka.md
index 1f21e7d..d540004 100644
--- a/docs/content/docs/connectors/datastream/kafka.md
+++ b/docs/content/docs/connectors/datastream/kafka.md
@@ -153,6 +153,8 @@ KafkaSource has following options for configuration:
 - ```partition.discovery.interval.ms``` defines the interval im milliseconds 
for Kafka source
   to discover new partitions. See Dynamic Partition Discovery
   below for more details.
+- ```register.consumer.metrics``` specifies whether to register metrics of 
KafkaConsumer in Flink
+metric group
 
 For configurations of KafkaConsumer, you can refer to
 http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka 
documentation
@@ -210,6 +212,40 @@ the properties of Kafka consumer.
 Note that Kafka source does **NOT** rely on committed offsets for fault 
tolerance. Committing offset
 is only for exposing the progress of consumer and consuming group for 
monitoring.
 
+### Monitoring
+Kafka source exposes metrics in Flink's metric group for monitoring and 
diagnosing.
+ Scope of Metric
+All metrics of Kafka source reader are registered under group 
```KafkaSourceReader```, which is a 
+child group of operator metric group. Metrics related to a specific topic 
partition will be registered
+in the group 
```KafkaSourceReader.topic..partition.```.
+
+For example, current consuming offset of topic "my-topic" and partition 1 will 
be reported in metric: 
+```.operator.KafkaSourceReader.topic.my-topic.partition.1.currentOffset```
 ,
+
+and number of successful commits will be reported in metric:
+```.operator.KafkaSourceReader.commitsSucceeded``` .
+
+ List of Metrics
+
+|Metric Name   |   Description   |   
Scope   |
+|::|:---:|:-:|
+|   currentOffset  | Current consuming offset of the topic partition |   
TopicPartition  |
+|  committedOffset | Committed offset of the topic partition |   
TopicPartition  |
+| commitsSucceeded | Number of successful commits| 
KafkaSourceReader |
+|   commitsFailed  | Number of failed commits| 
KafkaSourceReader |
+
+ Kafka Consumer Metrics
+All metrics of Kafka consumer are also registered under group 
```KafkaSourceReader.KafkaConsumer```.
+For example, Kafka consumer metric "records-consumed-total" will be reported 
in metric:
+```.operator.KafkaSourceReader.KafkaConsumer.records-consumed-total```
 .
+
+You can configure whether to register Kafka consumer's metric by configuring 
option 
+```register.consumer.metrics```. This option will be set as true by default. 
+
+For metrics of Kafka consumer, you can refer to 
+http://kafka.apache.org/documentation/#consumer_monitoring";>Apache 
Kafka Documentation
+for more details.
+
 ### Behind the Scene
 {{< hint info >}}
 If you are interested in how Kafka source works under the design of new data 
source API, you may
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
index dd1c3bf..35fd954 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
@@ -36,6 +36,7 @@ import 
org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumStateSe
 import 
org.apache.flink.connector.ka

[flink] 02/03: [hotfix][testutil] Add test utilization for listening metric registration

2021-07-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 1e8619e0d5f9d82d987af41ba897370920cb9310
Author: Qingsheng Ren 
AuthorDate: Tue Jun 29 11:53:50 2021 +0800

[hotfix][testutil] Add test utilization for listening metric registration

(cherry picked from commit 8bb629460ae45b841034be660aaace3851f141fe)
---
 .../flink/metrics/testutils/MetricListener.java| 130 
 .../flink/metric/testutils/MetricListenerTest.java | 131 +
 2 files changed, 261 insertions(+)

diff --git 
a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
 
b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
new file mode 100644
index 000..cbada82
--- /dev/null
+++ 
b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.metrics.testutils;
+
+import org.apache.flink.metrics.Counter;
+import org.apache.flink.metrics.Gauge;
+import org.apache.flink.metrics.Histogram;
+import org.apache.flink.metrics.Meter;
+import org.apache.flink.metrics.Metric;
+import org.apache.flink.metrics.MetricGroup;
+import org.apache.flink.runtime.metrics.groups.GenericMetricGroup;
+import org.apache.flink.runtime.metrics.util.TestingMetricRegistry;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A MetricListener listens metric and group registration under the provided 
root metric group, and
+ * stores them in an internal HashMap for fetching.
+ */
+public class MetricListener {
+
+// Constants
+public static final String DELIMITER = ".";
+public static final String ROOT_METRIC_GROUP_NAME = "rootMetricGroup";
+
+// Root metric group
+private final MetricGroup rootMetricGroup;
+
+// Map for storing registered metrics
+private final Map metrics = new HashMap<>();
+
+public MetricListener() {
+TestingMetricRegistry registry =
+TestingMetricRegistry.builder()
+.setDelimiter(DELIMITER.charAt(0))
+.setRegisterConsumer(
+(metric, name, group) ->
+
this.metrics.put(group.getMetricIdentifier(name), metric))
+.build();
+
+this.rootMetricGroup = new GenericMetricGroup(registry, null, 
ROOT_METRIC_GROUP_NAME);
+}
+
+/**
+ * Get the root metric group of this listener. Note that only metrics and 
groups registered
+ * under this group will be listened.
+ *
+ * @return Root metric group
+ */
+public MetricGroup getMetricGroup() {
+return this.rootMetricGroup;
+}
+
+/**
+ * Get registered {@link Metric} with identifier relative to the root 
metric group.
+ *
+ * For example, identifier of metric "myMetric" registered in group 
"myGroup" under root
+ * metric group can be reached by identifier ("myGroup", "myMetric")
+ *
+ * @param identifier identifier relative to the root metric group
+ * @return Registered metric
+ */
+public  T getMetric(Class metricType, String... 
identifier) {
+String actualIdentifier =
+ROOT_METRIC_GROUP_NAME + DELIMITER + String.join(DELIMITER, 
identifier);
+if (!metrics.containsKey(actualIdentifier)) {
+throw new IllegalArgumentException(
+String.format("Metric '%s' is not registered", 
actualIdentifier));
+}
+return metricType.cast(metrics.get(actualIdentifier));
+}
+
+/**
+ * Get registered {@link Meter} with identifier relative to the root 
metric group.
+ *
+ * @param identifier identifier relative to the root metric group
+ * @return Registered meter
+ */
+public Meter getMeter(String... identifier) {
+return

[flink] 01/03: [FLINK-22722][docs/kafka] Add documentation for Kafka new source (#15974)

2021-07-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git

commit ef21d506bebee0ac6466d4abe2f844ef7c15cde5
Author: Qingsheng Ren 
AuthorDate: Tue Jun 1 08:37:57 2021 +0800

[FLINK-22722][docs/kafka] Add documentation for Kafka new source (#15974)

(cherry picked from commit b582991b8b2b8dadb89e71d5002c4a9cc2055e34)
---
 docs/content/docs/connectors/datastream/kafka.md | 215 ++-
 1 file changed, 212 insertions(+), 3 deletions(-)

diff --git a/docs/content/docs/connectors/datastream/kafka.md 
b/docs/content/docs/connectors/datastream/kafka.md
index 192d185..1f21e7d 100644
--- a/docs/content/docs/connectors/datastream/kafka.md
+++ b/docs/content/docs/connectors/datastream/kafka.md
@@ -38,10 +38,219 @@ For details on Kafka compatibility, please refer to the 
official [Kafka document
 
 {{< artifact flink-connector-kafka withScalaVersion >}}
 
+if you are using Kafka source, ```flink-connector-base``` is also required as 
dependency:
+
+{{< artifact flink-connector-base >}}
+
 Flink's streaming connectors are not currently part of the binary distribution.
 See how to link with them for cluster execution [here]({{< ref 
"docs/dev/datastream/project-configuration" >}}).
 
-## Kafka Consumer
+## Kafka Source
+{{< hint info >}}
+This part describes the Kafka source based on the new 
+[data source]({{< ref "docs/dev/datastream/sources.md" >}}) API.
+{{< /hint >}}
+
+### Usage
+Kafka source provides a builder class for constructing instance of 
KafkaSource. The code snippet
+below shows how to build a KafkaSource to consume messages from the earliest 
offset of topic
+"input-topic", with consumer group "my-group" and deserialize only the value 
of message as string.
+```java
+KafkaSource source = KafkaSource.builder()
+.setBootstrapServers(brokers)
+.setTopics("input-topic")
+.setGroupId("my-group")
+.setStartingOffsets(OffsetsInitializer.earliest())
+.setValueOnlyDeserializer(new SimpleStringSchema())
+.build();
+
+env.fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source");
+```
+The following properties are **required** for building a KafkaSource:
+- Bootstrap servers, configured by ```setBootstrapServers(String)```
+- Consumer group ID, configured by ```setGroupId(String)```
+- Topics / partitions to subscribe, see the following
+  Topic-partition subscription for 
more details.
+- Deserializer to parse Kafka messages, see the following
+  Deserializer for more details.
+
+### Topic-partition Subscription
+Kafka source provide 3 ways of topic-partition subscription:
+- Topic list, subscribing messages from all partitions in a list of topics. 
For example:
+  ```java
+  KafkaSource.builder().setTopics("topic-a", "topic-b")
+  ```
+- Topic pattern, subscribing messages from all topics whose name matches the 
provided regular
+  expression. For example:
+  ```java
+  KafkaSource.builder().setTopicPattern("topic.*")
+  ```
+- Partition set, subscribing partitions in the provided partition set. For 
example:
+  ```java
+  final HashSet partitionSet = new HashSet<>(Arrays.asList(
+  new TopicPartition("topic-a", 0),// Partition 0 of topic 
"topic-a"
+  new TopicPartition("topic-b", 5)));  // Partition 5 of topic 
"topic-b"
+  KafkaSource.builder().setPartitions(partitionSet)
+  ```
+### Deserializer
+A deserializer is required for parsing Kafka messages. Deserializer 
(Deserialization schema) can be
+configured by ```setDeserializer(KakfaRecordDeserializationSchema)```, where
+```KafkaRecordDeserializationSchema``` defines how to deserialize a Kafka 
```ConsumerRecord```.
+
+If only the value of Kafka ```ConsumerRecord``` is needed, you can use
+```setValueOnlyDeserializer(DeserializationSchema)``` in the builder, where
+```DeserializationSchema``` defines how to deserialize binaries of Kafka 
message value.
+
+You can also use a https://kafka.apache.org/24/javadoc/org/apache/kafka/common/serialization/Deserializer.html";>```Kafka
 Deserializer```
+for deserializing Kafka message value. For example using 
```StringDeserializer``` for deserializing
+Kafka message value as string:
+```java
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+KafkaSource.builder()
+
.setDeserializer(KafkaRecordDeserializationSchema.valueOnly(StringSerializer.class));
+```
+
+### Starting Offset
+Kafka source is able to consume messages starting from different offsets by 
specifying
+```OffsetsInitializer```. Built-in initializers include:
+
+```java
+KafkaSource.builder()
+// Start from committed offset of the consuming group, without reset 
strategy
+.setStartingOffsets(OffsetsInitializer.committedOffsets())
+  

[flink] branch release-1.13 updated (4d86534 -> 2c455f3)

2021-07-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 4d86534  [FLINK-23359][test] Fix the number of available slots in 
testResourceCanBeAllocatedForDifferentJobAfterFree
 new ef21d50  [FLINK-22722][docs/kafka] Add documentation for Kafka new 
source (#15974)
 new 1e8619e  [hotfix][testutil] Add test utilization for listening metric 
registration
 new 2c455f3  [FLINK-22766][connector/kafka] Report offsets and Kafka 
consumer metrics in Flink metric group

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/content/docs/connectors/datastream/kafka.md   | 251 -
 .../flink/connector/kafka/source/KafkaSource.java  |  11 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   9 +-
 .../source/metrics/KafkaSourceReaderMetrics.java   | 183 +++
 .../source/reader/KafkaPartitionSplitReader.java   |  33 ++-
 .../kafka/source/reader/KafkaSourceReader.java |  11 +-
 .../metrics/KafkaSourceReaderMetricsTest.java  | 166 ++
 .../reader/KafkaPartitionSplitReaderTest.java  |   8 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +-
 .../source/reader/TestingReaderContext.java|   7 +-
 .../flink/metrics/testutils/MetricListener.java| 130 +++
 .../flink/metric/testutils/MetricListenerTest.java | 131 +++
 12 files changed, 1048 insertions(+), 14 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
 create mode 100644 
flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
 create mode 100644 
flink-test-utils-parent/flink-test-utils/src/test/java/org/apache/flink/metric/testutils/MetricListenerTest.java


[flink] 02/02: [FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in Flink metric group

2021-07-05 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b094a932845db5539fc07b032d49d0bcefd15df2
Author: Qingsheng Ren 
AuthorDate: Tue Jun 8 11:57:52 2021 +0800

[FLINK-22766][connector/kafka] Report offsets and Kafka consumer metrics in 
Flink metric group
---
 docs/content/docs/connectors/datastream/kafka.md   |  36 
 .../flink/connector/kafka/source/KafkaSource.java  |  11 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   9 +-
 .../source/metrics/KafkaSourceReaderMetrics.java   | 183 +
 .../source/reader/KafkaPartitionSplitReader.java   |  33 +++-
 .../kafka/source/reader/KafkaSourceReader.java |  11 +-
 .../metrics/KafkaSourceReaderMetricsTest.java  | 156 ++
 .../reader/KafkaPartitionSplitReaderTest.java  |   8 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +-
 .../source/reader/TestingReaderContext.java|   7 +-
 10 files changed, 565 insertions(+), 11 deletions(-)

diff --git a/docs/content/docs/connectors/datastream/kafka.md 
b/docs/content/docs/connectors/datastream/kafka.md
index c52a6f0..afed481 100644
--- a/docs/content/docs/connectors/datastream/kafka.md
+++ b/docs/content/docs/connectors/datastream/kafka.md
@@ -153,6 +153,8 @@ KafkaSource has following options for configuration:
 - ```partition.discovery.interval.ms``` defines the interval im milliseconds 
for Kafka source
   to discover new partitions. See Dynamic Partition Discovery
   below for more details.
+- ```register.consumer.metrics``` specifies whether to register metrics of 
KafkaConsumer in Flink
+metric group
 
 For configurations of KafkaConsumer, you can refer to
 http://kafka.apache.org/documentation/#consumerconfigs";>Apache Kafka 
documentation
@@ -210,6 +212,40 @@ the properties of Kafka consumer.
 Note that Kafka source does **NOT** rely on committed offsets for fault 
tolerance. Committing offset
 is only for exposing the progress of consumer and consuming group for 
monitoring.
 
+### Monitoring
+Kafka source exposes metrics in Flink's metric group for monitoring and 
diagnosing.
+ Scope of Metric
+All metrics of Kafka source reader are registered under group 
```KafkaSourceReader```, which is a 
+child group of operator metric group. Metrics related to a specific topic 
partition will be registered
+in the group 
```KafkaSourceReader.topic..partition.```.
+
+For example, current consuming offset of topic "my-topic" and partition 1 will 
be reported in metric: 
+```.operator.KafkaSourceReader.topic.my-topic.partition.1.currentOffset```
 ,
+
+and number of successful commits will be reported in metric:
+```.operator.KafkaSourceReader.commitsSucceeded``` .
+
+ List of Metrics
+
+|Metric Name   |   Description   |   
Scope   |
+|::|:---:|:-:|
+|   currentOffset  | Current consuming offset of the topic partition |   
TopicPartition  |
+|  committedOffset | Committed offset of the topic partition |   
TopicPartition  |
+| commitsSucceeded | Number of successful commits| 
KafkaSourceReader |
+|   commitsFailed  | Number of failed commits| 
KafkaSourceReader |
+
+ Kafka Consumer Metrics
+All metrics of Kafka consumer are also registered under group 
```KafkaSourceReader.KafkaConsumer```.
+For example, Kafka consumer metric "records-consumed-total" will be reported 
in metric:
+```.operator.KafkaSourceReader.KafkaConsumer.records-consumed-total```
 .
+
+You can configure whether to register Kafka consumer's metric by configuring 
option 
+```register.consumer.metrics```. This option will be set as true by default. 
+
+For metrics of Kafka consumer, you can refer to 
+http://kafka.apache.org/documentation/#consumer_monitoring";>Apache 
Kafka Documentation
+for more details.
+
 ### Behind the Scene
 {{< hint info >}}
 If you are interested in how Kafka source works under the design of new data 
source API, you may
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
index dd1c3bf..35fd954 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSource.java
@@ -36,6 +36,7 @@ import 
org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumStateSe
 import 
org.apache.flink.connector.kafka.source.enumerator.KafkaSourceEnumerator;
 import 
org.apache.flink.connector.kafka.sourc

[flink] branch master updated (284f484 -> b094a93)

2021-07-05 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 284f484  [FLINK-23232][python] Use pickle.loads defined in pyflink to 
avoid race condition of the default pickle
 new 8bb6294  [hotfix][testutil] Add test utilization for listening metric 
registration
 new b094a93  [FLINK-22766][connector/kafka] Report offsets and Kafka 
consumer metrics in Flink metric group

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/content/docs/connectors/datastream/kafka.md   |  36 
 .../flink/connector/kafka/source/KafkaSource.java  |  11 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   9 +-
 .../source/metrics/KafkaSourceReaderMetrics.java   | 183 +
 .../source/reader/KafkaPartitionSplitReader.java   |  33 +++-
 .../kafka/source/reader/KafkaSourceReader.java |  11 +-
 .../metrics/KafkaSourceReaderMetricsTest.java  | 156 ++
 .../reader/KafkaPartitionSplitReaderTest.java  |   8 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java | 122 +-
 .../source/reader/TestingReaderContext.java|   7 +-
 .../flink/metrics/testutils/MetricListener.java| 130 +++
 .../flink/metric/testutils/MetricListenerTest.java | 131 +++
 12 files changed, 826 insertions(+), 11 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetrics.java
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/metrics/KafkaSourceReaderMetricsTest.java
 create mode 100644 
flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
 create mode 100644 
flink-test-utils-parent/flink-test-utils/src/test/java/org/apache/flink/metric/testutils/MetricListenerTest.java


[flink] 01/02: [hotfix][testutil] Add test utilization for listening metric registration

2021-07-05 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 8bb629460ae45b841034be660aaace3851f141fe
Author: Qingsheng Ren 
AuthorDate: Tue Jun 29 11:53:50 2021 +0800

[hotfix][testutil] Add test utilization for listening metric registration
---
 .../flink/metrics/testutils/MetricListener.java| 130 
 .../flink/metric/testutils/MetricListenerTest.java | 131 +
 2 files changed, 261 insertions(+)

diff --git 
a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
 
b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
new file mode 100644
index 000..cbada82
--- /dev/null
+++ 
b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/metrics/testutils/MetricListener.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.metrics.testutils;
+
+import org.apache.flink.metrics.Counter;
+import org.apache.flink.metrics.Gauge;
+import org.apache.flink.metrics.Histogram;
+import org.apache.flink.metrics.Meter;
+import org.apache.flink.metrics.Metric;
+import org.apache.flink.metrics.MetricGroup;
+import org.apache.flink.runtime.metrics.groups.GenericMetricGroup;
+import org.apache.flink.runtime.metrics.util.TestingMetricRegistry;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A MetricListener listens metric and group registration under the provided 
root metric group, and
+ * stores them in an internal HashMap for fetching.
+ */
+public class MetricListener {
+
+// Constants
+public static final String DELIMITER = ".";
+public static final String ROOT_METRIC_GROUP_NAME = "rootMetricGroup";
+
+// Root metric group
+private final MetricGroup rootMetricGroup;
+
+// Map for storing registered metrics
+private final Map metrics = new HashMap<>();
+
+public MetricListener() {
+TestingMetricRegistry registry =
+TestingMetricRegistry.builder()
+.setDelimiter(DELIMITER.charAt(0))
+.setRegisterConsumer(
+(metric, name, group) ->
+
this.metrics.put(group.getMetricIdentifier(name), metric))
+.build();
+
+this.rootMetricGroup = new GenericMetricGroup(registry, null, 
ROOT_METRIC_GROUP_NAME);
+}
+
+/**
+ * Get the root metric group of this listener. Note that only metrics and 
groups registered
+ * under this group will be listened.
+ *
+ * @return Root metric group
+ */
+public MetricGroup getMetricGroup() {
+return this.rootMetricGroup;
+}
+
+/**
+ * Get registered {@link Metric} with identifier relative to the root 
metric group.
+ *
+ * For example, identifier of metric "myMetric" registered in group 
"myGroup" under root
+ * metric group can be reached by identifier ("myGroup", "myMetric")
+ *
+ * @param identifier identifier relative to the root metric group
+ * @return Registered metric
+ */
+public  T getMetric(Class metricType, String... 
identifier) {
+String actualIdentifier =
+ROOT_METRIC_GROUP_NAME + DELIMITER + String.join(DELIMITER, 
identifier);
+if (!metrics.containsKey(actualIdentifier)) {
+throw new IllegalArgumentException(
+String.format("Metric '%s' is not registered", 
actualIdentifier));
+}
+return metricType.cast(metrics.get(actualIdentifier));
+}
+
+/**
+ * Get registered {@link Meter} with identifier relative to the root 
metric group.
+ *
+ * @param identifier identifier relative to the root metric group
+ * @return Registered meter
+ */
+public Meter getMeter(String... identifier) {
+return getMetric(Meter.class, identifier);
+}
+
+/**
+ * Get registered {@link Count

[flink] branch master updated (53034ea -> 1418a1d)

2021-06-24 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 53034ea  [FLINK-23010][hive] HivePartitionFetcherContextBase shouldn't 
list folders to discover new partitions
 add 07f2705  [FLINK-22147][connector/kafka] Refactor partition discovery 
logic in Kafka source enumerator
 add 1418a1d  [hotfix][connector/test] Make MockSplitEnumeratorContext 
implement AutoClosable and shutdown executors at closing

No new revisions were added by this update.

Summary of changes:
 .../source/enumerator/KafkaSourceEnumerator.java   | 167 +
 .../enumerator/subscriber/KafkaSubscriber.java |  32 +---
 .../subscriber/KafkaSubscriberUtils.java   |  43 ++---
 .../subscriber/PartitionSetSubscriber.java |  48 +++--
 .../enumerator/subscriber/TopicListSubscriber.java |  34 ++--
 .../subscriber/TopicPatternSubscriber.java |  39 ++--
 .../source/enumerator/KafkaEnumeratorTest.java | 200 +
 .../enumerator/subscriber/KafkaSubscriberTest.java |  93 ++
 .../source/mocks/MockSplitEnumeratorContext.java   |  10 +-
 .../source/coordinator/SourceCoordinatorTest.java  |  94 +-
 10 files changed, 456 insertions(+), 304 deletions(-)


[flink] branch master updated (884ff61 -> b582991)

2021-05-31 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 884ff61  [FLINK-22782][docs] Remove legacy planner from Chinese docs
 add b582991  [FLINK-22722][docs/kafka] Add documentation for Kafka new 
source (#15974)

No new revisions were added by this update.

Summary of changes:
 docs/content/docs/connectors/datastream/kafka.md | 215 ++-
 1 file changed, 212 insertions(+), 3 deletions(-)


[flink] branch release-1.12 updated (19b3683 -> 0913824)

2021-04-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.12
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 19b3683  [FLINK-22208][build] Bump snappy-java to 1.1.8.3
 add 0913824  [FLINK-21133][connector/checkpoint] Fix the 
stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in 
SourceOperatorStreamTask#finishTask().

No new revisions were added by this update.

Summary of changes:
 .../runtime/tasks/SourceOperatorStreamTask.java|  5 ++
 .../flink/test/checkpointing/SavepointITCase.java  | 54 ++
 2 files changed, 59 insertions(+)


[flink-ml] 02/02: [FLINK-21976] Add StreamingExamplesITCase

2021-04-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit 195e489cd8114a9cba9ac6857e2d08f4403bfc5b
Author: Dong Lin 
AuthorDate: Wed Mar 31 17:53:59 2021 +0800

[FLINK-21976] Add StreamingExamplesITCase
---
 .../streaming/test/StreamingExamplesITCase.java| 36 +
 .../scala/examples/StreamingExamplesITCase.scala   | 37 ++
 2 files changed, 73 insertions(+)

diff --git 
a/flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java
 
b/flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java
new file mode 100644
index 000..6f7bf5d
--- /dev/null
+++ 
b/flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.test;
+
+import 
org.apache.flink.streaming.examples.ml.util.IncrementalLearningSkeletonData;
+import org.apache.flink.test.util.AbstractTestBase;
+
+import org.junit.Test;
+
+/** Integration test for streaming programs in Java examples. */
+public class StreamingExamplesITCase extends AbstractTestBase {
+
+@Test
+public void testIncrementalLearningSkeleton() throws Exception {
+final String resultPath = getTempDirPath("result");
+
org.apache.flink.streaming.examples.ml.IncrementalLearningSkeleton.main(
+new String[] {"--output", resultPath});
+compareResultsByLinesInMemory(IncrementalLearningSkeletonData.RESULTS, 
resultPath);
+}
+}
diff --git 
a/flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala
 
b/flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala
new file mode 100644
index 000..7fcbe9d
--- /dev/null
+++ 
b/flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.scala.examples
+
+import 
org.apache.flink.streaming.examples.ml.util.IncrementalLearningSkeletonData
+import org.apache.flink.streaming.scala.examples.ml.IncrementalLearningSkeleton
+import org.apache.flink.test.util.{AbstractTestBase, TestBaseUtils}
+import org.junit.Test
+
+/**
+ * Integration test for streaming programs in Scala examples.
+ */
+class StreamingExamplesITCase extends AbstractTestBase {
+
+  @Test
+  def testIncrementalLearningSkeleton(): Unit = {
+val resultPath = getTempDirPath("result")
+IncrementalLearningSkeleton.main(Array("--output", resultPath))
+
TestBaseUtils.compareResultsByLinesInMemory(IncrementalLearningSkeletonData.RESULTS,
 resultPath)
+  }
+}


[flink-ml] branch master updated (492f5a1 -> 195e489)

2021-04-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git.


from 492f5a1  [FLINK-22013] Add Github Actions to flink-ml for every push 
and pull request
 new ebe84a3  [FLINK-21976] Move ML examples from flink/flink-examples to 
the flink-ml repo.
 new 195e489  [FLINK-21976] Add StreamingExamplesITCase

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 flink-ml-examples/examples-batch/pom.xml   | 126 ++
 .../flink/examples/java/ml/LinearRegression.java   | 257 +
 .../java/ml/util/LinearRegressionData.java |  69 ++
 .../ml/util/LinearRegressionDataGenerator.java | 115 +
 .../flink/examples/scala/ml/LinearRegression.scala | 159 +
 flink-ml-examples/examples-streaming/pom.xml   | 190 +++
 .../examples/ml/IncrementalLearningSkeleton.java   | 212 +
 .../ml/util/IncrementalLearningSkeletonData.java   |  33 +++
 .../examples/ml/IncrementalLearningSkeleton.scala  | 184 +++
 .../streaming/test/StreamingExamplesITCase.java|  31 ++-
 .../scala/examples/StreamingExamplesITCase.scala   |  30 ++-
 flink-ml-examples/pom.xml  | 111 +
 pom.xml| 134 +++
 tools/maven/scalastyle-config.xml  | 146 
 14 files changed, 1764 insertions(+), 33 deletions(-)
 create mode 100644 flink-ml-examples/examples-batch/pom.xml
 create mode 100644 
flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java
 create mode 100644 
flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/util/LinearRegressionData.java
 create mode 100644 
flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/util/LinearRegressionDataGenerator.java
 create mode 100644 
flink-ml-examples/examples-batch/src/main/scala/org/apache/flink/examples/scala/ml/LinearRegression.scala
 create mode 100644 flink-ml-examples/examples-streaming/pom.xml
 create mode 100644 
flink-ml-examples/examples-streaming/src/main/java/org/apache/flink/streaming/examples/ml/IncrementalLearningSkeleton.java
 create mode 100644 
flink-ml-examples/examples-streaming/src/main/java/org/apache/flink/streaming/examples/ml/util/IncrementalLearningSkeletonData.java
 create mode 100644 
flink-ml-examples/examples-streaming/src/main/scala/org/apache/flink/streaming/scala/examples/ml/IncrementalLearningSkeleton.scala
 copy 
flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamValidator.java
 => 
flink-ml-examples/examples-streaming/src/test/java/org/apache/flink/streaming/test/StreamingExamplesITCase.java
 (53%)
 copy 
flink-ml-api/src/main/java/org/apache/flink/ml/api/misc/param/ParamValidator.java
 => 
flink-ml-examples/examples-streaming/src/test/scala/org/apache/flink/streaming/scala/examples/StreamingExamplesITCase.scala
 (53%)
 create mode 100644 flink-ml-examples/pom.xml
 create mode 100644 tools/maven/scalastyle-config.xml


[flink-ml] 01/02: [FLINK-21976] Move ML examples from flink/flink-examples to the flink-ml repo.

2021-04-19 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit ebe84a336f691d611f34c30a2c43370457fdbc3f
Author: Dong Lin 
AuthorDate: Wed Mar 31 15:04:54 2021 +0800

[FLINK-21976] Move ML examples from flink/flink-examples to the flink-ml 
repo.
---
 flink-ml-examples/examples-batch/pom.xml   | 126 ++
 .../flink/examples/java/ml/LinearRegression.java   | 257 +
 .../java/ml/util/LinearRegressionData.java |  69 ++
 .../ml/util/LinearRegressionDataGenerator.java | 115 +
 .../flink/examples/scala/ml/LinearRegression.scala | 159 +
 flink-ml-examples/examples-streaming/pom.xml   | 190 +++
 .../examples/ml/IncrementalLearningSkeleton.java   | 212 +
 .../ml/util/IncrementalLearningSkeletonData.java   |  33 +++
 .../examples/ml/IncrementalLearningSkeleton.scala  | 184 +++
 flink-ml-examples/pom.xml  | 111 +
 pom.xml| 134 +++
 tools/maven/scalastyle-config.xml  | 146 
 12 files changed, 1736 insertions(+)

diff --git a/flink-ml-examples/examples-batch/pom.xml 
b/flink-ml-examples/examples-batch/pom.xml
new file mode 100644
index 000..599f839
--- /dev/null
+++ b/flink-ml-examples/examples-batch/pom.xml
@@ -0,0 +1,126 @@
+
+
+http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+
+  4.0.0
+
+  
+org.apache.flink
+flink-ml-examples
+0.1-SNAPSHOT
+  
+
+  flink-ml-examples-batch_${scala.binary.version}
+  Flink ML : Examples : Batch
+
+  jar
+
+  
+
+  org.apache.flink
+  flink-java
+  ${flink.version}
+
+
+
+  org.apache.flink
+  flink-scala_${scala.binary.version}
+  ${flink.version}
+
+
+
+  org.apache.flink
+  flink-clients_${scala.binary.version}
+  ${flink.version}
+
+  
+  
+  
+  
+
+  
+  
+net.alchim31.maven
+scala-maven-plugin
+
+  
+  
+scala-compile-first
+process-resources
+
+  compile
+
+  
+
+
+  
+-Xms128m
+-Xmx512m
+  
+
+  
+
+  
+  
+org.codehaus.mojo
+build-helper-maven-plugin
+
+  
+  
+add-source
+generate-sources
+
+  add-source
+
+
+  
+src/main/scala
+  
+
+  
+  
+  
+add-test-source
+generate-test-sources
+
+  add-test-source
+
+
+  
+src/test/scala
+  
+
+  
+
+  
+
+  
+  
+org.scalastyle
+scalastyle-maven-plugin
+
+  
${project.basedir}/../../tools/maven/scalastyle-config.xml
+
+  
+
+  
+
+
diff --git 
a/flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java
 
b/flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java
new file mode 100644
index 000..4f2f528
--- /dev/null
+++ 
b/flink-ml-examples/examples-batch/src/main/java/org/apache/flink/examples/java/ml/LinearRegression.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.examples.java.ml;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.ReduceFunction;
+import org.apache.flink.api.common.functions.RichMapFunction;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.operators.IterativeDataSet;
+import

[flink] branch release-1.13 updated: [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask().

2021-04-05 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.13
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.13 by this push:
 new e129817  [FLINK-21133][connector/checkpoint] Fix the 
stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in 
SourceOperatorStreamTask#finishTask().
e129817 is described below

commit e129817f290442c6fd7e891a6d28d848ec090050
Author: Jiangjie (Becket) Qin 
AuthorDate: Mon Mar 29 16:06:31 2021 +0800

[FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in 
FLIP-27 source by stopping the mailbox loop in 
SourceOperatorStreamTask#finishTask().
---
 .../runtime/tasks/SourceOperatorStreamTask.java|  5 ++
 .../flink/test/checkpointing/SavepointITCase.java  | 54 ++
 2 files changed, 59 insertions(+)

diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
index 1c8589f..7b3b06b 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
@@ -108,6 +108,11 @@ public class SourceOperatorStreamTask extends 
StreamTask triggerCheckpointAsync(
 CheckpointMetaData checkpointMetaData, CheckpointOptions 
checkpointOptions) {
 if (!isExternallyInducedSource) {
diff --git 
a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
 
b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
index 44b206f..fa3709d 100644
--- 
a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
+++ 
b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
@@ -154,6 +154,60 @@ public class SavepointITCase extends TestLogger {
 }
 }
 
+@Test
+public void testStopWithSavepointForFlip27SourceWithDrain() throws 
Exception {
+testStopWithSavepointForFlip27Source(true);
+}
+
+@Test
+public void testStopWithSavepointForFlip27SourceWithoutDrain() throws 
Exception {
+testStopWithSavepointForFlip27Source(false);
+}
+
+private void testStopWithSavepointForFlip27Source(boolean drain) throws 
Exception {
+final int numTaskManagers = 2;
+final int numSlotsPerTaskManager = 2;
+
+final MiniClusterResourceFactory clusterFactory =
+new MiniClusterResourceFactory(
+numTaskManagers, numSlotsPerTaskManager, 
getFileBasedCheckpointsConfig());
+
+StreamExecutionEnvironment env = 
StreamExecutionEnvironment.getExecutionEnvironment();
+env.setParallelism(1);
+
+BoundedPassThroughOperator operator =
+new BoundedPassThroughOperator<>(ChainingStrategy.ALWAYS);
+DataStream stream =
+env.fromSequence(0, Long.MAX_VALUE)
+.transform("pass-through", 
BasicTypeInfo.LONG_TYPE_INFO, operator);
+stream.addSink(new DiscardingSink<>());
+
+final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
+final JobID jobId = jobGraph.getJobID();
+
+MiniClusterWithClientResource cluster = clusterFactory.get();
+cluster.before();
+ClusterClient client = cluster.getClusterClient();
+
+try {
+BoundedPassThroughOperator.resetForTest(1, true);
+
+client.submitJob(jobGraph).get();
+
+BoundedPassThroughOperator.getProgressLatch().await();
+
+client.stopWithSavepoint(jobId, drain, null).get();
+
+if (drain) {
+Assert.assertTrue(BoundedPassThroughOperator.inputEnded);
+} else {
+Assert.assertFalse(BoundedPassThroughOperator.inputEnded);
+}
+} finally {
+cluster.after();
+}
+}
+
 /**
  * Triggers a savepoint for a job that uses the FsStateBackend. We expect 
that all checkpoint
  * files are written to a new savepoint directory.


[flink] branch master updated: [FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in SourceOperatorStreamTask#finishTask().

2021-04-05 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new a9cf18b  [FLINK-21133][connector/checkpoint] Fix the 
stop-with-savepoint case in FLIP-27 source by stopping the mailbox loop in 
SourceOperatorStreamTask#finishTask().
a9cf18b is described below

commit a9cf18b4d25f130e0bd24d51b128bbcf71892b45
Author: Jiangjie (Becket) Qin 
AuthorDate: Mon Mar 29 16:06:31 2021 +0800

[FLINK-21133][connector/checkpoint] Fix the stop-with-savepoint case in 
FLIP-27 source by stopping the mailbox loop in 
SourceOperatorStreamTask#finishTask().
---
 .../runtime/tasks/SourceOperatorStreamTask.java|  5 ++
 .../flink/test/checkpointing/SavepointITCase.java  | 54 ++
 2 files changed, 59 insertions(+)

diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
index 1c8589f..7b3b06b 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
@@ -108,6 +108,11 @@ public class SourceOperatorStreamTask extends 
StreamTask triggerCheckpointAsync(
 CheckpointMetaData checkpointMetaData, CheckpointOptions 
checkpointOptions) {
 if (!isExternallyInducedSource) {
diff --git 
a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
 
b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
index 44b206f..fa3709d 100644
--- 
a/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
+++ 
b/flink-tests/src/test/java/org/apache/flink/test/checkpointing/SavepointITCase.java
@@ -154,6 +154,60 @@ public class SavepointITCase extends TestLogger {
 }
 }
 
+@Test
+public void testStopWithSavepointForFlip27SourceWithDrain() throws 
Exception {
+testStopWithSavepointForFlip27Source(true);
+}
+
+@Test
+public void testStopWithSavepointForFlip27SourceWithoutDrain() throws 
Exception {
+testStopWithSavepointForFlip27Source(false);
+}
+
+private void testStopWithSavepointForFlip27Source(boolean drain) throws 
Exception {
+final int numTaskManagers = 2;
+final int numSlotsPerTaskManager = 2;
+
+final MiniClusterResourceFactory clusterFactory =
+new MiniClusterResourceFactory(
+numTaskManagers, numSlotsPerTaskManager, 
getFileBasedCheckpointsConfig());
+
+StreamExecutionEnvironment env = 
StreamExecutionEnvironment.getExecutionEnvironment();
+env.setParallelism(1);
+
+BoundedPassThroughOperator operator =
+new BoundedPassThroughOperator<>(ChainingStrategy.ALWAYS);
+DataStream stream =
+env.fromSequence(0, Long.MAX_VALUE)
+.transform("pass-through", 
BasicTypeInfo.LONG_TYPE_INFO, operator);
+stream.addSink(new DiscardingSink<>());
+
+final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
+final JobID jobId = jobGraph.getJobID();
+
+MiniClusterWithClientResource cluster = clusterFactory.get();
+cluster.before();
+ClusterClient client = cluster.getClusterClient();
+
+try {
+BoundedPassThroughOperator.resetForTest(1, true);
+
+client.submitJob(jobGraph).get();
+
+BoundedPassThroughOperator.getProgressLatch().await();
+
+client.stopWithSavepoint(jobId, drain, null).get();
+
+if (drain) {
+Assert.assertTrue(BoundedPassThroughOperator.inputEnded);
+} else {
+Assert.assertFalse(BoundedPassThroughOperator.inputEnded);
+}
+} finally {
+cluster.after();
+}
+}
+
 /**
  * Triggers a savepoint for a job that uses the FsStateBackend. We expect 
that all checkpoint
  * files are written to a new savepoint directory.


[flink-ml] branch master updated: [FLINK-22013] Add Github Actions to flink-ml for every push and pull request

2021-03-30 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git


The following commit(s) were added to refs/heads/master by this push:
 new 492f5a1  [FLINK-22013] Add Github Actions to flink-ml for every push 
and pull request
492f5a1 is described below

commit 492f5a12d2cbfa9346d4772306a469bd1a5163f5
Author: Dong Lin 
AuthorDate: Wed Mar 31 10:00:07 2021 +0800

[FLINK-22013] Add Github Actions to flink-ml for every push and pull request
---
 .github/workflows/java8-build.yml | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/.github/workflows/java8-build.yml 
b/.github/workflows/java8-build.yml
new file mode 100644
index 000..d68d465
--- /dev/null
+++ b/.github/workflows/java8-build.yml
@@ -0,0 +1,18 @@
+name: Java 8 Build
+
+on: [push, pull_request]
+
+jobs:
+  build:
+runs-on: ubuntu-latest
+
+steps:
+  - name: Checkout code
+uses: actions/checkout@v2
+  - name: Set up JDK 1.8
+uses: actions/setup-java@v1
+with:
+  java-version: 1.8
+  - name: Build
+run: mvn clean install
+


[flink-ml] branch master updated: [hotfix] Change the notification list of GitHub activities to iss...@flink.apache.org

2021-03-30 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git


The following commit(s) were added to refs/heads/master by this push:
 new fb67074  [hotfix] Change the notification list of GitHub activities to 
iss...@flink.apache.org
fb67074 is described below

commit fb6707434bda77b1bb529f223a77f5316eeb04f6
Author: Jiangjie (Becket) Qin 
AuthorDate: Wed Mar 31 12:04:36 2021 +0800

[hotfix] Change the notification list of GitHub activities to 
iss...@flink.apache.org
---
 .asf.yaml | 5 +
 1 file changed, 5 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
new file mode 100644
index 000..82eef0b
--- /dev/null
+++ b/.asf.yaml
@@ -0,0 +1,5 @@
+notifications:
+  commits:  commits@flink.apache.org
+  issues:   iss...@flink.apache.org
+  pullrequests: iss...@flink.apache.org
+  jira_options: link label


[flink] 03/12: [hotfix][connector/kafka] Reduce the offset commit logging verbosity from INFO to DEBUG.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b5b9682827f0698d9b4d24215b4dd2daaf25ec30
Author: Dong Lin 
AuthorDate: Sat Mar 6 12:51:52 2021 +0800

[hotfix][connector/kafka] Reduce the offset commit logging verbosity from 
INFO to DEBUG.
---
 .../flink/connector/kafka/source/reader/KafkaSourceReader.java  | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
index 8ca0337..bf3d42e 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
@@ -109,11 +109,13 @@ public class KafkaSourceReader
 
 @Override
 public void notifyCheckpointComplete(long checkpointId) throws Exception {
-LOG.info("Committing offsets for checkpoint {}", checkpointId);
+LOG.debug("Committing offsets for checkpoint {}", checkpointId);
 ((KafkaSourceFetcherManager) splitFetcherManager)
 .commitOffsets(
 offsetsToCommit.get(checkpointId),
 (ignored, e) -> {
+// The offset commit here is needed by the 
external monitoring. It won't
+// break Flink job's correctness if we fail to 
commit the offset here.
 if (e != null) {
 LOG.warn(
 "Failed to commit consumer offsets for 
checkpoint {}",
@@ -124,7 +126,7 @@ public class KafkaSourceReader
 "Successfully committed offsets for 
checkpoint {}",
 checkpointId);
 // If the finished topic partition has been 
committed, we remove it
-// from the offsets of finsihed splits map.
+// from the offsets of the finished splits map.
 Map 
committedPartitions =
 offsetsToCommit.get(checkpointId);
 offsetsOfFinishedSplits


[flink] 08/12: [FLINK-20114][connector/kafka] SourceOperatorStreamTask should check the committed offset first before using OffsetResetStrategy.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 8de17841d144c780a46389f5ef69b8c484c0747b
Author: Dong Lin 
AuthorDate: Sun Mar 7 19:33:58 2021 +0800

[FLINK-20114][connector/kafka] SourceOperatorStreamTask should check the 
committed offset first before using OffsetResetStrategy.

This is necessary to keep the same behavior as the legacy 
FlinkKafkaConsumer.
---
 .../initializer/SpecifiedOffsetsInitializer.java|  6 ++
 .../initializer/OffsetsInitializerTest.java | 21 -
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java
index 186e2d5..d3335de 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/initializer/SpecifiedOffsetsInitializer.java
@@ -60,6 +60,12 @@ class SpecifiedOffsetsInitializer implements 
OffsetsInitializer {
 }
 }
 if (!toLookup.isEmpty()) {
+// First check the committed offsets.
+Map committedOffsets =
+partitionOffsetsRetriever.committedOffsets(toLookup);
+offsets.putAll(committedOffsets);
+toLookup.removeAll(committedOffsets.keySet());
+
 switch (offsetResetStrategy) {
 case EARLIEST:
 
offsets.putAll(partitionOffsetsRetriever.beginningOffsets(toLookup));
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java
index 94d0c30..0e84882 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/enumerator/initializer/OffsetsInitializerTest.java
@@ -41,12 +41,14 @@ import static org.junit.Assert.assertTrue;
 /** Unit tests for {@link OffsetsInitializer}. */
 public class OffsetsInitializerTest {
 private static final String TOPIC = "topic";
+private static final String TOPIC2 = "topic2";
 private static KafkaSourceEnumerator.PartitionOffsetsRetrieverImpl 
retriever;
 
 @BeforeClass
 public static void setup() throws Throwable {
 KafkaSourceTestEnv.setup();
 KafkaSourceTestEnv.setupTopic(TOPIC, true, true);
+KafkaSourceTestEnv.setupTopic(TOPIC2, false, false);
 retriever =
 new KafkaSourceEnumerator.PartitionOffsetsRetrieverImpl(
 KafkaSourceTestEnv.getConsumer(),
@@ -116,19 +118,28 @@ public class OffsetsInitializerTest {
 List partitions = 
KafkaSourceTestEnv.getPartitionsForTopic(TOPIC);
 Map committedOffsets =
 KafkaSourceTestEnv.getCommittedOffsets(partitions);
-committedOffsets.forEach((tp, oam) -> specifiedOffsets.put(tp, 
oam.offset()));
+partitions.forEach(tp -> specifiedOffsets.put(tp, (long) 
tp.partition()));
 // Remove the specified offsets for partition 0.
-TopicPartition missingPartition = new TopicPartition(TOPIC, 0);
-specifiedOffsets.remove(missingPartition);
+TopicPartition partitionSetToCommitted = new TopicPartition(TOPIC, 0);
+specifiedOffsets.remove(partitionSetToCommitted);
 OffsetsInitializer initializer = 
OffsetsInitializer.offsets(specifiedOffsets);
 
 assertEquals(OffsetResetStrategy.EARLIEST, 
initializer.getAutoOffsetResetStrategy());
+// The partition without committed offset should fallback to offset 
reset strategy.
+TopicPartition partitionSetToEarliest = new TopicPartition(TOPIC2, 0);
+partitions.add(partitionSetToEarliest);
 
 Map offsets = 
initializer.getPartitionOffsets(partitions, retriever);
 for (TopicPartition tp : partitions) {
 Long offset = offsets.get(tp);
-long expectedOffset =
-tp.equals(missingPartition) ? 0L : 
committedOffsets.get(tp).offset();
+long expectedOffset;
+if (tp.equals(partitionSetToCommitted)) {
+expectedOffset = committedOffsets.get(tp).offset();
+} else if (tp.equ

[flink] 06/12: [hotfix][connector/kafka] Remove the unused close.timeout.ms config.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b3471f2a55df9d10248c41c6d4efedec188b93d6
Author: Dong Lin 
AuthorDate: Sun Mar 7 17:44:59 2021 +0800

[hotfix][connector/kafka] Remove the unused close.timeout.ms config.
---
 .../org/apache/flink/connector/kafka/source/KafkaSourceOptions.java | 6 --
 1 file changed, 6 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
index 03d417f..db09fa5 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
@@ -41,12 +41,6 @@ public class KafkaSourceOptions {
 "The interval in milliseconds for the Kafka source 
to discover "
 + "the new partitions. A non-positive 
value disables the partition discovery.");
 
-public static final ConfigOption CLOSE_TIMEOUT_MS =
-ConfigOptions.key("close.timeout.ms")
-.longType()
-.defaultValue(1L)
-.withDescription("The max time to wait when closing 
components.");
-
 @SuppressWarnings("unchecked")
 public static  T getOption(
 Properties props, ConfigOption configOption, Function 
parser) {


[flink] 12/12: [FLINK-20114][connector/kafka] Remove duplicated warning and remove redundant default value for partition.discovery.interval.ms

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 8d2fa1bb81eff790bdc21a69d15a17aa074010ae
Author: Dong Lin 
AuthorDate: Wed Mar 24 15:26:23 2021 +0800

[FLINK-20114][connector/kafka] Remove duplicated warning and remove 
redundant default value for partition.discovery.interval.ms
---
 .../apache/flink/connector/kafka/source/KafkaSourceBuilder.java   | 8 ++--
 .../apache/flink/connector/kafka/source/KafkaSourceOptions.java   | 2 +-
 .../streaming/runtime/io/StreamMultipleInputProcessorFactory.java | 6 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index 8ede378..77f844f 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -442,14 +442,10 @@ public class KafkaSourceBuilder {
 true);
 
 // If the source is bounded, do not run periodic partition discovery.
-if (maybeOverride(
+maybeOverride(
 KafkaSourceOptions.PARTITION_DISCOVERY_INTERVAL_MS.key(),
 "-1",
-boundedness == Boundedness.BOUNDED)) {
-LOG.warn(
-"{} property is overridden to -1 because the source is 
bounded.",
-KafkaSourceOptions.PARTITION_DISCOVERY_INTERVAL_MS);
-}
+boundedness == Boundedness.BOUNDED);
 
 // If the client id prefix is not set, reuse the consumer group id as 
the client id prefix.
 maybeOverride(
diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
index db09fa5..98c735f 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceOptions.java
@@ -36,7 +36,7 @@ public class KafkaSourceOptions {
 public static final ConfigOption PARTITION_DISCOVERY_INTERVAL_MS =
 ConfigOptions.key("partition.discovery.interval.ms")
 .longType()
-.defaultValue(3L)
+.noDefaultValue()
 .withDescription(
 "The interval in milliseconds for the Kafka source 
to discover "
 + "the new partitions. A non-positive 
value disables the partition discovery.");
diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
index a70b112..db351bc 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
@@ -351,7 +351,11 @@ public class StreamMultipleInputProcessorFactory {
 WatermarkGauge inputWatermarkGauge,
 MultiStreamStreamStatusTracker streamStatusTracker,
 int inputIndex) {
-super(chainedSourceOutput, streamStatusMaintainer, new 
SimpleCounter(), inputWatermarkGauge);
+super(
+chainedSourceOutput,
+streamStatusMaintainer,
+new SimpleCounter(),
+inputWatermarkGauge);
 this.streamStatusTracker = streamStatusTracker;
 this.inputIndex = inputIndex;
 }


[flink] 11/12: [hotfix][examples] Update StateMachineExample to use KafkaSource

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit e079aef0efcd544322a028cc15a24bdaa456af4f
Author: Dong Lin 
AuthorDate: Sun Mar 21 17:33:39 2021 +0800

[hotfix][examples] Update StateMachineExample to use KafkaSource
---
 .../examples/statemachine/StateMachineExample.java | 69 +++---
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git 
a/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java
 
b/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java
index 3dfd131..1bb76ad 100644
--- 
a/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java
+++ 
b/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/statemachine/StateMachineExample.java
@@ -18,18 +18,20 @@
 
 package org.apache.flink.streaming.examples.statemachine;
 
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 import org.apache.flink.api.common.functions.RichFlatMapFunction;
 import org.apache.flink.api.common.state.ValueState;
 import org.apache.flink.api.common.state.ValueStateDescriptor;
 import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.connector.kafka.source.KafkaSource;
+import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
+import 
org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
 import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
 import org.apache.flink.core.fs.FileSystem;
 import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
 import org.apache.flink.streaming.api.datastream.DataStream;
 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.apache.flink.streaming.api.functions.source.SourceFunction;
-import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
 import org.apache.flink.streaming.examples.statemachine.dfa.State;
 import org.apache.flink.streaming.examples.statemachine.event.Alert;
 import org.apache.flink.streaming.examples.statemachine.event.Event;
@@ -37,8 +39,6 @@ import 
org.apache.flink.streaming.examples.statemachine.generator.EventsGenerato
 import 
org.apache.flink.streaming.examples.statemachine.kafka.EventDeSerializer;
 import org.apache.flink.util.Collector;
 
-import java.util.Properties;
-
 /**
  * Main class of the state machine example. This class implements the 
streaming application that
  * receives the stream of events and evaluates a state machine (per 
originating address) to validate
@@ -69,9 +69,26 @@ public class StateMachineExample {
 
 //  determine whether to use the built-in source, or read from 
Kafka 
 
-final SourceFunction source;
+final DataStream events;
 final ParameterTool params = ParameterTool.fromArgs(args);
 
+// create the environment to create streams and configure execution
+final StreamExecutionEnvironment env = 
StreamExecutionEnvironment.getExecutionEnvironment();
+env.enableCheckpointing(2000L);
+
+final String stateBackend = params.get("backend", "memory");
+if ("hashmap".equals(stateBackend)) {
+final String checkpointDir = params.get("checkpoint-dir");
+boolean asyncCheckpoints = params.getBoolean("async-checkpoints", 
false);
+env.setStateBackend(new HashMapStateBackend(asyncCheckpoints));
+env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
+} else if ("rocks".equals(stateBackend)) {
+final String checkpointDir = params.get("checkpoint-dir");
+boolean incrementalCheckpoints = 
params.getBoolean("incremental-checkpoints", false);
+env.setStateBackend(new 
EmbeddedRocksDBStateBackend(incrementalCheckpoints));
+env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
+}
+
 if (params.has("kafka-topic")) {
 // set up the Kafka reader
 String kafkaTopic = params.get("kafka-topic");
@@ -80,14 +97,19 @@ public class StateMachineExample {
 System.out.printf("Reading from kafka topic %s @ %s\n", 
kafkaTopic, brokers);
 System.out.println();
 
-Properties kafkaProps = new Properties();
-kafkaProps.setProperty("bootstrap.servers", brokers);
-
-FlinkKafkaConsumer kafka =
-new FlinkKafkaConsumer<>(kafkaTopic, new 
EventDeSerializer(), kafkaProps);
-kafka.setStartFromL

[flink] 10/12: [FLINK-20114][connector/kafka] Add IT cases for KafkaSource by migrating IT cases from FlinkKafkaConsumer.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 3abf5550a11ac4733799187bf49122417a177b6a
Author: Dong Lin 
AuthorDate: Mon Mar 8 09:35:30 2021 +0800

[FLINK-20114][connector/kafka] Add IT cases for KafkaSource by migrating IT 
cases from FlinkKafkaConsumer.
---
 .../connector/kafka/source/KafkaSourceITCase.java  |  63 +
 .../kafka/source/KafkaSourceLegacyITCase.java  | 162 +
 .../connectors/kafka/KafkaConsumerTestBase.java| 260 +++--
 .../connectors/kafka/KafkaTestEnvironment.java |  20 ++
 .../connectors/kafka/KafkaTestEnvironmentImpl.java |  19 ++
 5 files changed, 453 insertions(+), 71 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java
index 25acf75..6f7c66d 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceITCase.java
@@ -18,19 +18,25 @@
 
 package org.apache.flink.connector.kafka.source;
 
+import org.apache.flink.api.common.JobExecutionResult;
 import org.apache.flink.api.common.accumulators.ListAccumulator;
 import org.apache.flink.api.common.eventtime.WatermarkStrategy;
+import org.apache.flink.api.common.functions.MapFunction;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.configuration.Configuration;
 import 
org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
 import 
org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
 import org.apache.flink.streaming.api.datastream.DataStream;
 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
 import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
+import org.apache.flink.streaming.api.operators.StreamMap;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
 import org.apache.flink.util.CloseableIterator;
 import org.apache.flink.util.Collector;
 
 import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.serialization.IntegerDeserializer;
@@ -67,6 +73,42 @@ public class KafkaSourceITCase {
 }
 
 @Test
+public void testTimestamp() throws Throwable {
+final String topic = "testTimestamp";
+KafkaSourceTestEnv.createTestTopic(topic, 1, 1);
+KafkaSourceTestEnv.produceToKafka(
+Arrays.asList(
+new ProducerRecord<>(topic, 0, 1L, "key0", 0),
+new ProducerRecord<>(topic, 0, 2L, "key1", 1),
+new ProducerRecord<>(topic, 0, 3L, "key2", 2)));
+
+KafkaSource source =
+KafkaSource.builder()
+
.setBootstrapServers(KafkaSourceTestEnv.brokerConnectionStrings)
+.setGroupId("testTimestampAndWatermark")
+.setTopics(topic)
+.setDeserializer(new 
TestingKafkaRecordDeserializationSchema())
+.setStartingOffsets(OffsetsInitializer.earliest())
+.setBounded(OffsetsInitializer.latest())
+.build();
+
+StreamExecutionEnvironment env = 
StreamExecutionEnvironment.getExecutionEnvironment();
+env.setParallelism(1);
+DataStream stream =
+env.fromSource(source, WatermarkStrategy.noWatermarks(), 
"testTimestamp");
+
+// Verify that the timestamp and watermark are working fine.
+stream.transform(
+"timestampVerifier",
+TypeInformation.of(PartitionAndValue.class),
+new WatermarkVerifyingOperator(v -> v));
+stream.addSink(new DiscardingSink<>());
+JobExecutionResult result = env.execute();
+
+assertEquals(Arrays.asList(1L, 2L, 3L), 
result.getAccumulatorResult("timestamp"));
+}
+
+@Test
 public void testBasicRead() throws Exception {
 KafkaSource source =
 KafkaSource.builder()
@@ -167,6 +209,27 @@ public class KafkaSourceITCase {
 }
 }
 
+private static class WatermarkVerifyingOperator
+extends StreamMap {
+
+public WatermarkVerifyingOperator(
+MapFunc

[flink] 07/12: [FLINK-20114][connector/kafka] PartitionOffsetsRetrieverImpl.committedOffsets() should handle the case without committed offsets.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b831b853b908d26a2b7890478fc9f8446b2eb724
Author: Dong Lin 
AuthorDate: Sun Mar 7 17:53:27 2021 +0800

[FLINK-20114][connector/kafka] 
PartitionOffsetsRetrieverImpl.committedOffsets() should handle the case without 
committed offsets.
---
 .../connector/kafka/source/enumerator/KafkaSourceEnumerator.java   | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
index 92ac770..1f3f3c6 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
@@ -407,7 +407,12 @@ public class KafkaSourceEnumerator
 .thenApply(
 result -> {
 Map offsets = new 
HashMap<>();
-result.forEach((tp, oam) -> 
offsets.put(tp, oam.offset()));
+result.forEach(
+(tp, oam) -> {
+if (oam != null) {
+offsets.put(tp, 
oam.offset());
+}
+});
 return offsets;
 })
 .get();


[flink] 01/12: [FLINK-20114][connector/kafka] KafkaSourceReader should not commit offsets for partitions whose offsets have not been initialized.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 7a36623dc081a555d4e6e9d2b1f7bfb908640fee
Author: Dong Lin 
AuthorDate: Wed Mar 3 22:37:25 2021 +0800

[FLINK-20114][connector/kafka] KafkaSourceReader should not commit offsets 
for partitions whose offsets have not been initialized.
---
 .../kafka/source/reader/KafkaSourceReader.java | 18 --
 .../kafka/source/reader/KafkaSourceReaderTest.java | 14 ++
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
index 9fd3a70..8ca0337 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReader.java
@@ -75,9 +75,11 @@ public class KafkaSourceReader
 protected void onSplitFinished(Map 
finishedSplitIds) {
 finishedSplitIds.forEach(
 (ignored, splitState) -> {
-offsetsOfFinishedSplits.put(
-splitState.getTopicPartition(),
-new 
OffsetAndMetadata(splitState.getCurrentOffset()));
+if (splitState.getCurrentOffset() >= 0) {
+offsetsOfFinishedSplits.put(
+splitState.getTopicPartition(),
+new 
OffsetAndMetadata(splitState.getCurrentOffset()));
+}
 });
 }
 
@@ -91,9 +93,13 @@ public class KafkaSourceReader
 offsetsToCommit.computeIfAbsent(checkpointId, id -> new 
HashMap<>());
 // Put the offsets of the active splits.
 for (KafkaPartitionSplit split : splits) {
-offsetsMap.put(
-split.getTopicPartition(),
-new OffsetAndMetadata(split.getStartingOffset(), 
null));
+// If the checkpoint is triggered before the partition 
starting offsets
+// is retrieved, do not commit the offsets for those 
partitions.
+if (split.getStartingOffset() >= 0) {
+offsetsMap.put(
+split.getTopicPartition(),
+new OffsetAndMetadata(split.getStartingOffset()));
+}
 }
 // Put offsets of all the finished splits.
 offsetsMap.putAll(offsetsOfFinishedSplits);
diff --git 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
index 6366d40..579bab3 100644
--- 
a/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
+++ 
b/flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/KafkaSourceReaderTest.java
@@ -207,6 +207,20 @@ public class KafkaSourceReaderTest extends 
SourceReaderTestBase reader = (KafkaSourceReader) 
createReader()) {
+KafkaPartitionSplit split =
+new KafkaPartitionSplit(
+new TopicPartition(TOPIC, 0), 
KafkaPartitionSplit.EARLIEST_OFFSET);
+reader.addSplits(Collections.singletonList(split));
+reader.snapshotState(checkpointId);
+assertEquals(1, reader.getOffsetsToCommit().size());
+
assertTrue(reader.getOffsetsToCommit().get(checkpointId).isEmpty());
+}
+}
+
 // --
 
 @Override


[flink] 04/12: [FLINK-20114][connector/common] SourceOperatorStreamTask should update the numRecordsOutCount metric

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b23f65f17b4de16cf0fd91b225c3c3c61c849450
Author: Dong Lin 
AuthorDate: Sat Mar 6 13:07:31 2021 +0800

[FLINK-20114][connector/common] SourceOperatorStreamTask should update the 
numRecordsOutCount metric
---
 .../io/StreamMultipleInputProcessorFactory.java|  2 +-
 .../runtime/tasks/SourceOperatorStreamTask.java| 18 --
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
index fdea270..a70b112 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/io/StreamMultipleInputProcessorFactory.java
@@ -351,7 +351,7 @@ public class StreamMultipleInputProcessorFactory {
 WatermarkGauge inputWatermarkGauge,
 MultiStreamStreamStatusTracker streamStatusTracker,
 int inputIndex) {
-super(chainedSourceOutput, streamStatusMaintainer, 
inputWatermarkGauge);
+super(chainedSourceOutput, streamStatusMaintainer, new 
SimpleCounter(), inputWatermarkGauge);
 this.streamStatusTracker = streamStatusTracker;
 this.inputIndex = inputIndex;
 }
diff --git 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
index b922dba..1c8589f 100644
--- 
a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
+++ 
b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/SourceOperatorStreamTask.java
@@ -21,10 +21,12 @@ package org.apache.flink.streaming.runtime.tasks;
 import org.apache.flink.annotation.Internal;
 import org.apache.flink.api.connector.source.ExternallyInducedSourceReader;
 import org.apache.flink.api.connector.source.SourceReader;
+import org.apache.flink.metrics.Counter;
 import org.apache.flink.runtime.checkpoint.CheckpointMetaData;
 import org.apache.flink.runtime.checkpoint.CheckpointOptions;
 import org.apache.flink.runtime.checkpoint.CheckpointType;
 import org.apache.flink.runtime.execution.Environment;
+import org.apache.flink.runtime.metrics.groups.OperatorMetricGroup;
 import org.apache.flink.runtime.state.CheckpointStorageLocationReference;
 import org.apache.flink.streaming.api.operators.Output;
 import org.apache.flink.streaming.api.operators.SourceOperator;
@@ -72,7 +74,7 @@ public class SourceOperatorStreamTask extends 
StreamTask sourceReader = mainOperator.getSourceReader();
+final SourceReader sourceReader = 
sourceOperator.getSourceReader();
 final StreamTaskInput input;
 
 if (sourceReader instanceof ExternallyInducedSourceReader) {
@@ -88,11 +90,19 @@ public class SourceOperatorStreamTask extends 
StreamTask(sourceOperator, 0, 0);
 }
 
+Counter numRecordsOut =
+((OperatorMetricGroup) sourceOperator.getMetricGroup())
+.getIOMetricGroup()
+.getNumRecordsOutCounter();
+
 // The SourceOperatorStreamTask doesn't have any inputs, so there is 
no need for
 // a WatermarkGauge on the input.
 output =
 new AsyncDataOutputToOutput<>(
-operatorChain.getMainOperatorOutput(), 
getStreamStatusMaintainer(), null);
+operatorChain.getMainOperatorOutput(),
+getStreamStatusMaintainer(),
+numRecordsOut,
+null);
 
 inputProcessor = new StreamOneInputProcessor<>(input, output, 
operatorChain);
 }
@@ -144,20 +154,24 @@ public class SourceOperatorStreamTask extends 
StreamTask extends 
AbstractDataOutput {
 
 private final Output> output;
+private final Counter numRecordsOut;
 @Nullable private final WatermarkGauge inputWatermarkGauge;
 
 public AsyncDataOutputToOutput(
 Output> output,
 StreamStatusMaintainer streamStatusMaintainer,
+Counter numRecordsOut,
 @Nullable WatermarkGauge inputWatermarkGauge) {
 super(streamStatusMaintainer);
 
 this.output = checkNotNull(output);
+this.numRecordsOut = numRecordsOut;
 this.inputWatermarkGauge = inputWatermarkGauge;
 }
 
 @Override
 public void emitRecord(StreamRecord streamRecord) {
+  

[flink] branch master updated (5f0c76f -> 8d2fa1b)

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 5f0c76f  [FLINK-21330] Optimize the performance of 
PipelinedRegionSchedulingStrategy
 new 7a36623  [FLINK-20114][connector/kafka] KafkaSourceReader should not 
commit offsets for partitions whose offsets have not been initialized.
 new b0077c4  [FLINK-20114][connector/common] SourceCoordinatorContext 
should not log and fail job again if it receives InterruptedException after it 
is closed.
 new b5b9682  [hotfix][connector/kafka] Reduce the offset commit logging 
verbosity from INFO to DEBUG.
 new b23f65f  [FLINK-20114][connector/common] SourceOperatorStreamTask 
should update the numRecordsOutCount metric
 new 4c6c423  [FLINK-20114][connector/kafka] KafkaSourceEnumerator should 
close the admin client early if periodic partition discovery is disabled.
 new b3471f2  [hotfix][connector/kafka] Remove the unused close.timeout.ms 
config.
 new b831b85  [FLINK-20114][connector/kafka] 
PartitionOffsetsRetrieverImpl.committedOffsets() should handle the case without 
committed offsets.
 new 8de1784  [FLINK-20114][connector/kafka] SourceOperatorStreamTask 
should check the committed offset first before using OffsetResetStrategy.
 new a4360c7  [FLINK-20114][connector/kafka] Auto offset commit should be 
disabled by default.
 new 3abf555  [FLINK-20114][connector/kafka] Add IT cases for KafkaSource 
by migrating IT cases from FlinkKafkaConsumer.
 new e079aef  [hotfix][examples] Update StateMachineExample to use 
KafkaSource
 new 8d2fa1b  [FLINK-20114][connector/kafka] Remove duplicated warning and 
remove redundant default value for partition.discovery.interval.ms

The 12 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../connector/kafka/source/KafkaSourceBuilder.java |   9 +-
 .../connector/kafka/source/KafkaSourceOptions.java |   8 +-
 .../source/enumerator/KafkaSourceEnumerator.java   |  17 +-
 .../initializer/SpecifiedOffsetsInitializer.java   |   6 +
 .../kafka/source/reader/KafkaSourceReader.java |  24 +-
 .../connector/kafka/source/KafkaSourceITCase.java  |  63 +
 .../kafka/source/KafkaSourceLegacyITCase.java  | 162 +
 .../initializer/OffsetsInitializerTest.java|  21 +-
 .../kafka/source/reader/KafkaSourceReaderTest.java |  14 ++
 .../connectors/kafka/KafkaConsumerTestBase.java| 260 +++--
 .../connectors/kafka/KafkaTestEnvironment.java |  20 ++
 .../connectors/kafka/KafkaTestEnvironmentImpl.java |  19 ++
 .../connector/source/SplitEnumeratorContext.java   |   2 +-
 .../examples/statemachine/StateMachineExample.java |  69 +++---
 .../coordinator/SourceCoordinatorContext.java  |  23 +-
 .../coordinator/SourceCoordinatorContextTest.java  |  49 +++-
 .../coordinator/SourceCoordinatorTestBase.java |   7 +-
 .../io/StreamMultipleInputProcessorFactory.java|   6 +-
 .../runtime/tasks/SourceOperatorStreamTask.java|  18 +-
 19 files changed, 651 insertions(+), 146 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/KafkaSourceLegacyITCase.java


[flink] 09/12: [FLINK-20114][connector/kafka] Auto offset commit should be disabled by default.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit a4360c7f4128eb7ec97c9707b184ab121e829312
Author: Dong Lin 
AuthorDate: Sun Mar 7 23:05:02 2021 +0800

[FLINK-20114][connector/kafka] Auto offset commit should be disabled by 
default.
---
 .../java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
index 73ba675..8ede378 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/KafkaSourceBuilder.java
@@ -435,6 +435,7 @@ public class KafkaSourceBuilder {
 true);
 maybeOverride(
 ConsumerConfig.GROUP_ID_CONFIG, "KafkaSource-" + new 
Random().nextLong(), false);
+maybeOverride(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false", 
false);
 maybeOverride(
 ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
 
startingOffsetsInitializer.getAutoOffsetResetStrategy().name().toLowerCase(),


[flink] 05/12: [FLINK-20114][connector/kafka] KafkaSourceEnumerator should close the admin client early if periodic partition discovery is disabled.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit 4c6c42392e7d116dee572fefb2e4e0e02abacefb
Author: Dong Lin 
AuthorDate: Sun Mar 7 13:10:08 2021 +0800

[FLINK-20114][connector/kafka] KafkaSourceEnumerator should close the admin 
client early if periodic partition discovery is disabled.
---
 .../kafka/source/enumerator/KafkaSourceEnumerator.java | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
index bc34630..92ac770 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/enumerator/KafkaSourceEnumerator.java
@@ -149,7 +149,15 @@ public class KafkaSourceEnumerator
 + "without periodic partition discovery.",
 consumerGroupId);
 context.callAsync(
-this::discoverAndInitializePartitionSplit, 
this::handlePartitionSplitChanges);
+() -> {
+try {
+return discoverAndInitializePartitionSplit();
+} finally {
+// Close the admin client early because we won't 
use it anymore.
+adminClient.close();
+}
+},
+this::handlePartitionSplitChanges);
 }
 }
 


[flink] 02/12: [FLINK-20114][connector/common] SourceCoordinatorContext should not log and fail job again if it receives InterruptedException after it is closed.

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit b0077c4a4a0f867069e428b1fa13ee87fc5a5556
Author: Dong Lin 
AuthorDate: Sat Mar 6 12:45:23 2021 +0800

[FLINK-20114][connector/common] SourceCoordinatorContext should not log and 
fail job again if it receives InterruptedException after it is closed.
---
 .../connector/source/SplitEnumeratorContext.java   |  2 +-
 .../coordinator/SourceCoordinatorContext.java  | 23 ++
 .../coordinator/SourceCoordinatorContextTest.java  | 49 +-
 .../coordinator/SourceCoordinatorTestBase.java |  7 +++-
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git 
a/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java
 
b/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java
index bef1666..66b3ef4 100644
--- 
a/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java
+++ 
b/flink-core/src/main/java/org/apache/flink/api/connector/source/SplitEnumeratorContext.java
@@ -110,7 +110,7 @@ public interface SplitEnumeratorContext {
 /**
  * Invoke the given callable periodically and handover the return value to 
the handler which
  * will be executed by the source coordinator. When this method is invoked 
multiple times, The
- * Coallbles may be executed in a thread pool concurrently.
+ * Callables may be executed in a thread pool concurrently.
  *
  * It is important to make sure that the callable does not modify any 
shared state,
  * especially the states that will be a part of the {@link 
SplitEnumerator#snapshotState()}.
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
index a262807..6c0dcd9 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/source/coordinator/SourceCoordinatorContext.java
@@ -19,6 +19,7 @@ limitations under the License.
 package org.apache.flink.runtime.source.coordinator;
 
 import org.apache.flink.annotation.Internal;
+import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.api.connector.source.ReaderInfo;
 import org.apache.flink.api.connector.source.SourceEvent;
 import org.apache.flink.api.connector.source.SourceSplit;
@@ -54,6 +55,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.function.BiConsumer;
 
@@ -94,6 +96,7 @@ public class SourceCoordinatorContext
 private final SourceCoordinatorProvider.CoordinatorExecutorThreadFactory
 coordinatorThreadFactory;
 private final String coordinatorThreadName;
+private volatile boolean closed;
 
 public SourceCoordinatorContext(
 ExecutorService coordinatorExecutor,
@@ -103,18 +106,22 @@ public class SourceCoordinatorContext
 SimpleVersionedSerializer splitSerializer) {
 this(
 coordinatorExecutor,
+Executors.newScheduledThreadPool(
+numWorkerThreads,
+new ExecutorThreadFactory(
+
coordinatorThreadFactory.getCoordinatorThreadName() + "-worker")),
 coordinatorThreadFactory,
-numWorkerThreads,
 operatorCoordinatorContext,
 splitSerializer,
 new SplitAssignmentTracker<>());
 }
 
 // Package private method for unit test.
+@VisibleForTesting
 SourceCoordinatorContext(
 ExecutorService coordinatorExecutor,
+ScheduledExecutorService workerExecutor,
 SourceCoordinatorProvider.CoordinatorExecutorThreadFactory 
coordinatorThreadFactory,
-int numWorkerThreads,
 OperatorCoordinator.Context operatorCoordinatorContext,
 SimpleVersionedSerializer splitSerializer,
 SplitAssignmentTracker splitAssignmentTracker) {
@@ -132,12 +139,7 @@ public class SourceCoordinatorContext
 new ThrowableCatchingRunnable(
 
this::handleUncaughtExceptionFromAsyncCall, runnable));
 
-this.notifier =
-new ExecutorNotifier(
-Executors.newScheduledThreadPool(
-numWorkerThreads,
-new 
ExecutorThreadFactory(coordinatorThrea

[flink-ml] 05/05: [FLINK-21976] Update README.md to include overview, build, contributing and license sections

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit 08d058046f34b711128e0646ffbdc7e384c22064
Author: Dong Lin 
AuthorDate: Mon Mar 29 17:00:54 2021 +0800

[FLINK-21976] Update README.md to include overview, build, contributing and 
license sections
---
 README.md | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c2031eb..e5d4eaf 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,17 @@
-# flink-ml
+Flink ML is a library which provides machine learning (ML) APIs and libraries 
that simplify the building of machine learning pipelines. It provides a set of 
standard ML APIs for MLlib developers to implement ML algorithms, as well as 
libraries of ML algorithms that can be used to build ML pipelines for both 
training and inference jobs.
+
+Flink ML is developed under the umbrella of [Apache 
Flink](https://flink.apache.org/).
+
+## Building the Project
+
+Run the `mvn clean package` command.
+
+Then You will find a JAR file that contains your application, plus any 
libraries that you may have added as dependencies to the application: 
`target/-.jar`.
+
+## Contributing
+
+You can learn more about how to contribute in the [Apache Flink 
website](https://flink.apache.org/contributing/how-to-contribute.html). For 
code contributions, please read carefully the [Contributing 
Code](https://flink.apache.org/contributing/contribute-code.html) section for 
an overview of ongoing community work.
+
+## License
+
+The code in this repository is licensed under the [Apache Software License 
2](LICENSE).


[flink-ml] 02/05: [FLINK-21976] Add CODE_OF_CONDUCT.md, LICENSE and .gitignore

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit f8b84cc202ecbe7de2ca26651191b3991319
Author: Dong Lin 
AuthorDate: Sun Mar 28 19:28:53 2021 +0800

[FLINK-21976] Add CODE_OF_CONDUCT.md, LICENSE and .gitignore
---
 .gitignore |  18 +
 CODE_OF_CONDUCT.md |   3 +
 LICENSE| 201 +
 3 files changed, 222 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000..afd1e95
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+.cache
+.classpath
+.idea
+.metadata
+.settings
+.project
+target
+.version.properties
+*.class
+*.iml
+*.swp
+*.jar
+*.zip
+*.log
+*.pyc
+.DS_Store
+*.ipr
+*.iws
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 000..8821b1e
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,3 @@
+# Code of Conduct
+
+Apache Flink and all its associated repositories follow the [Code of Conduct 
of the Apache Software 
Foundation](https://www.apache.org/foundation/policies/conduct).
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000..261eeb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+   Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+  "License" shall mean the terms and conditions for use, reproduction,
+  and distribution as defined by Sections 1 through 9 of this document.
+
+  "Licensor" shall mean the copyright owner or entity authorized by
+  the copyright owner that is granting the License.
+
+  "Legal Entity" shall mean the union of the acting entity and all
+  other entities that control, are controlled by, or are under common
+  control with that entity. For the purposes of this definition,
+  "control" means (i) the power, direct or indirect, to cause the
+  direction or management of such entity, whether by contract or
+  otherwise, or (ii) ownership of fifty percent (50%) or more of the
+  outstanding shares, or (iii) beneficial ownership of such entity.
+
+  "You" (or "Your") shall mean an individual or Legal Entity
+  exercising permissions granted by this License.
+
+  "Source" form shall mean the preferred form for making modifications,
+  including but not limited to software source code, documentation
+  source, and configuration files.
+
+  "Object" form shall mean any form resulting from mechanical
+  transformation or translation of a Source form, including but
+  not limited to compiled object code, generated documentation,
+  and conversions to other media types.
+
+  "Work" shall mean the work of authorship, whether in Source or
+  Object form, made available under the License, as indicated by a
+  copyright notice that is included in or attached to the work
+  (an example is provided in the Appendix below).
+
+  "Derivative Works" shall mean any work, whether in Source or Object
+  form, that is based on (or derived from) the Work and for which the
+  editorial revisions, annotations, elaborations, or other modifications
+  represent, as a whole, an original work of authorship. For the purposes
+  of this License, Derivative Works shall not include works that remain
+  separable from, or merely link (or bind by name) to the interfaces of,
+  the Work and Derivative Works thereof.
+
+  "Contribution" shall mean any work of authorship, including
+  the original version of the Work and any modifications or additions
+  to that Work or Derivative Works thereof, that is intentionally
+  submitted to Licensor for inclusion in the Work by the copyright owner
+  or by an individual or Legal Entity authorized to submit on behalf of
+  the copyright owner. For the purposes of this definition, "submitted"
+  means any form of electronic, verbal, or written communication sent
+  to the Licensor or its representatives, including but not limited to
+  communication on electronic mailing lists, source code control systems,
+  and issue tracking systems that are managed by, or on behalf of, the
+  Licensor for the purpose of discussing and improving the Work, but
+  excluding communication that is conspicuously marked or otherwise
+  designated in writing by the copyright owner as "Not a Contribution."
+
+  "Contributor" shall mean Licensor and any individual or Legal Entity
+  on behalf of whom a Contribution has been received by Licensor and
+  subsequently incorporated within the Work.
+
+   

[flink-ml] 04/05: [FLINK-21976] Update pom.xml

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit 8e2415bfb2ddb00074dc5a44ffd9c828d63af9c7
Author: Dong Lin 
AuthorDate: Sun Mar 28 16:51:46 2021 +0800

[FLINK-21976] Update pom.xml
---
 flink-ml-api/pom.xml   |   6 +-
 flink-ml-lib/pom.xml   |  14 +-
 flink-ml-uber/.pom.xml.swo | Bin 12288 -> 0 bytes
 flink-ml-uber/pom.xml  |   8 +-
 pom.xml| 578 -
 5 files changed, 586 insertions(+), 20 deletions(-)

diff --git a/flink-ml-api/pom.xml b/flink-ml-api/pom.xml
index dd7863d..81fdcc7 100644
--- a/flink-ml-api/pom.xml
+++ b/flink-ml-api/pom.xml
@@ -25,17 +25,17 @@ under the License.
   
 org.apache.flink
 flink-ml-parent
-1.13-SNAPSHOT
+0.1-SNAPSHOT
   
 
   flink-ml-api
-  Flink : ML : API
+  Flink ML : API
 
   
 
   org.apache.flink
   flink-table-api-java
-  ${project.version}
+  ${flink.version}
   provided
 
 
diff --git a/flink-ml-lib/pom.xml b/flink-ml-lib/pom.xml
index d6ca639..bd5f3ac 100644
--- a/flink-ml-lib/pom.xml
+++ b/flink-ml-lib/pom.xml
@@ -23,41 +23,41 @@ under the License.
   
 org.apache.flink
 flink-ml-parent
-1.13-SNAPSHOT
+0.1-SNAPSHOT
   
 
   flink-ml-lib_${scala.binary.version}
-  Flink : ML : Lib
+  Flink ML : Lib
 
   
 
   org.apache.flink
   flink-ml-api
-  ${project.version}
+  ${flink.version}
   provided
 
 
   org.apache.flink
   flink-table-api-java
-  ${project.version}
+  ${flink.version}
   provided
 
 
   org.apache.flink
   
flink-table-api-java-bridge_${scala.binary.version}
-  ${project.version}
+  ${flink.version}
   provided
 
 
   org.apache.flink
   flink-table-planner_${scala.binary.version}
-  ${project.version}
+  ${flink.version}
   test
 
 
   org.apache.flink
   flink-clients_${scala.binary.version}
-  ${project.version}
+  ${flink.version}
   provided
 
 
diff --git a/flink-ml-uber/.pom.xml.swo b/flink-ml-uber/.pom.xml.swo
deleted file mode 100644
index 6d87542..000
Binary files a/flink-ml-uber/.pom.xml.swo and /dev/null differ
diff --git a/flink-ml-uber/pom.xml b/flink-ml-uber/pom.xml
index f28a27f..ae9850b 100644
--- a/flink-ml-uber/pom.xml
+++ b/flink-ml-uber/pom.xml
@@ -23,11 +23,11 @@ under the License.
   
 org.apache.flink
 flink-ml-parent
-1.13-SNAPSHOT
+0.1-SNAPSHOT
   
 
   flink-ml-uber_${scala.binary.version}
-  Flink : ML : Uber
+  Flink ML : Uber
   
 This module contains both the api and libraries for writing Flink ML 
programs.
   
@@ -38,12 +38,12 @@ under the License.
 
   org.apache.flink
   flink-ml-api
-  ${project.version}
+  ${flink.version}
 
 
   org.apache.flink
   flink-ml-lib_${scala.binary.version}
-  ${project.version}
+  ${flink.version}
 
   
 
diff --git a/pom.xml b/pom.xml
index d5a5018..0e9479a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,20 +23,586 @@ under the License.
   4.0.0
 
   
-org.apache.flink
-flink-parent
-1.13-SNAPSHOT
-..
+org.apache
+apache
+23
   
 
   flink-ml-parent
-  Flink : ML : 
-
+  org.apache.flink
+  Flink ML
+  0.1-SNAPSHOT
   pom
 
+  http://flink.apache.org
+  2019
+
+  
+
+  The Apache Software License, Version 2.0
+  https://www.apache.org/licenses/LICENSE-2.0.txt
+  repo
+
+  
+
+  
+https://github.com/apache/flink-ml
+g...@github.com:apache/flink-ml.git
+
scm:git:https://gitbox.apache.org/repos/asf/flink-ml.git
+  
+
   
 flink-ml-api
 flink-ml-lib
 flink-ml-uber
   
+
+  
+12.0
+2.11
+2.10.1
+1.8
+2.4.2
+4.12
+1C
+true
+1.12.1
+
+
+
+**/*Test.*
+  
+
+
+  
+
+
+
+  junit
+  junit
+  jar
+  test
+
+  
+
+  
+
+  
+org.apache.flink
+flink-shaded-jackson
+${jackson.version}-${flink.shaded.version}
+  
+
+  
+junit
+junit
+${junit.version}
+  
+
+  
+
+
+  
+
+  release
+  
+
+  release
+
+  
+  
+1.8
+  
+  
+
+  
+org.apache.maven.plugins
+maven-gpg-plugin
+1.4
+
+  
+sign-artifacts
+verify
+
+  sign
+
+  
+
+  
+  
+org.apache.maven.plugins
+maven-enforcer-plugin
+
+  
+enforce-maven
+
+  enforce
+
+
+  
+
+  1.

[flink-ml] 03/05: [FLINK-21976] Add files needed for checkstyle under tools/maven

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit bdb1b93130254dcdb868cc48b3873db825312c72
Author: Dong Lin 
AuthorDate: Sun Mar 28 22:07:10 2021 +0800

[FLINK-21976] Add files needed for checkstyle under tools/maven
---
 tools/maven/checkstyle.xml   | 562 +++
 tools/maven/suppressions.xml |  85 +++
 2 files changed, 647 insertions(+)

diff --git a/tools/maven/checkstyle.xml b/tools/maven/checkstyle.xml
new file mode 100644
index 000..2048fd1
--- /dev/null
+++ b/tools/maven/checkstyle.xml
@@ -0,0 +1,562 @@
+
+
+http://www.puppycrawl.com/dtds/configuration_1_3.dtd

[flink-ml] branch master updated (ea541d7 -> 08d0580)

2021-03-29 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git.


from ea541d7  Initialize repository.
 new 8f86ecf  [FLINK-21976] Move files under flink/flink-ml-parent to 
flink-ml repo
 new f8b84cc  [FLINK-21976] Add CODE_OF_CONDUCT.md, LICENSE and .gitignore
 new bdb1b93  [FLINK-21976] Add files needed for checkstyle under 
tools/maven
 new 8e2415b  [FLINK-21976] Update pom.xml
 new 08d0580  [FLINK-21976] Update README.md to include overview, build, 
contributing and license sections

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .gitignore |  18 +
 CODE_OF_CONDUCT.md |   3 +
 LICENSE| 201 +++
 README.md  |  18 +-
 flink-ml-api/pom.xml   |  47 ++
 .../org/apache/flink/ml/api/core/Estimator.java|  47 ++
 .../java/org/apache/flink/ml/api/core/Model.java   |  37 ++
 .../org/apache/flink/ml/api/core/Pipeline.java | 259 +
 .../apache/flink/ml/api/core/PipelineStage.java|  46 ++
 .../org/apache/flink/ml/api/core/Transformer.java  |  42 ++
 .../apache/flink/ml/api/misc/param/ParamInfo.java  | 151 +
 .../flink/ml/api/misc/param/ParamInfoFactory.java  | 134 +
 .../flink/ml/api/misc/param/ParamValidator.java|  39 ++
 .../org/apache/flink/ml/api/misc/param/Params.java | 277 ++
 .../apache/flink/ml/api/misc/param/WithParams.java |  60 ++
 .../flink/ml/util/param/ExtractParamInfosUtil.java |  71 +++
 .../org/apache/flink/ml/api/core/PipelineTest.java | 167 ++
 .../org/apache/flink/ml/api/misc/ParamsTest.java   | 179 ++
 .../ml/util/param/ExtractParamInfosUtilTest.java   | 109 
 flink-ml-lib/pom.xml   |  86 +++
 .../org/apache/flink/ml/common/MLEnvironment.java  | 151 +
 .../flink/ml/common/MLEnvironmentFactory.java  | 116 
 .../org/apache/flink/ml/common/linalg/BLAS.java| 234 
 .../apache/flink/ml/common/linalg/DenseMatrix.java | 577 +++
 .../apache/flink/ml/common/linalg/DenseVector.java | 379 +
 .../apache/flink/ml/common/linalg/MatVecOp.java| 307 +++
 .../flink/ml/common/linalg/SparseVector.java   | 574 +++
 .../org/apache/flink/ml/common/linalg/Vector.java  |  89 +++
 .../flink/ml/common/linalg/VectorIterator.java |  73 +++
 .../apache/flink/ml/common/linalg/VectorUtil.java  | 240 
 .../org/apache/flink/ml/common/mapper/Mapper.java  |  79 +++
 .../flink/ml/common/mapper/MapperAdapter.java  |  46 ++
 .../apache/flink/ml/common/mapper/ModelMapper.java |  66 +++
 .../flink/ml/common/mapper/ModelMapperAdapter.java |  62 +++
 .../common/model/BroadcastVariableModelSource.java |  47 ++
 .../apache/flink/ml/common/model/ModelSource.java  |  40 ++
 .../flink/ml/common/model/RowsModelSource.java |  46 ++
 .../basicstatistic/MultivariateGaussian.java   | 137 +
 .../ml/common/utils/DataSetConversionUtil.java | 167 ++
 .../ml/common/utils/DataStreamConversionUtil.java  | 167 ++
 .../flink/ml/common/utils/OutputColsHelper.java| 211 +++
 .../apache/flink/ml/common/utils/TableUtil.java| 439 +++
 .../apache/flink/ml/common/utils/VectorTypes.java  |  43 ++
 .../org/apache/flink/ml/operator/AlgoOperator.java | 186 +++
 .../flink/ml/operator/batch/BatchOperator.java | 113 
 .../operator/batch/source/TableSourceBatchOp.java  |  40 ++
 .../flink/ml/operator/stream/StreamOperator.java   | 114 
 .../stream/source/TableSourceStreamOp.java |  40 ++
 .../flink/ml/params/shared/HasMLEnvironmentId.java |  43 ++
 .../ml/params/shared/colname/HasOutputCol.java |  48 ++
 .../shared/colname/HasOutputColDefaultAsNull.java  |  49 ++
 .../ml/params/shared/colname/HasOutputCols.java|  48 ++
 .../shared/colname/HasOutputColsDefaultAsNull.java |  49 ++
 .../ml/params/shared/colname/HasPredictionCol.java |  42 ++
 .../shared/colname/HasPredictionDetailCol.java |  47 ++
 .../ml/params/shared/colname/HasReservedCols.java  |  45 ++
 .../ml/params/shared/colname/HasSelectedCol.java   |  48 ++
 .../colname/HasSelectedColDefaultAsNull.java   |  49 ++
 .../ml/params/shared/colname/HasSelectedCols.java  |  48 ++
 .../colname/HasSelectedColsDefaultAsNull.java  |  49 ++
 .../apache/flink/ml/pipeline/EstimatorBase.java| 103 
 .../org/apache/flink/ml/pipeline/ModelBase.java|  68 +++
 .../flink/ml/pipeline/PipelineStageBase.java   |  72 +++
 .../apache/flink/ml/pipeline/TransformerBase.java  |  99 
 .../apache/flink/ml/common/MLEnviron

[flink-ml] 01/01: Initialize repository.

2021-03-28 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git

commit ea541d70e4a31006ba0a06d8465e77dda81528f7
Author: Jiangjie (Becket) Qin 
AuthorDate: Mon Mar 29 14:19:04 2021 +0800

Initialize repository.
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
new file mode 100644
index 000..c2031eb
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# flink-ml


[flink-ml] branch master created (now ea541d7)

2021-03-28 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink-ml.git.


  at ea541d7  Initialize repository.

This branch includes the following new commits:

 new ea541d7  Initialize repository.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[flink] branch master updated (3582bb2 -> 844601b)

2021-03-18 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 3582bb2  [FLINK-21811][blink-table-planner] Support StreamExecJoin 
json serialization/deserialization
 add 844601b  [FLINK-21160][connector/kafka] Use deserializer class 
instance instead of class name to avoid NPE when invoking getProducedType 
(#14784)

No new revisions were added by this update.

Summary of changes:
 .../KafkaValueOnlyDeserializerWrapper.java |  9 ++---
 .../connector/kafka/source/KafkaSourceITCase.java  | 47 ++
 2 files changed, 51 insertions(+), 5 deletions(-)


[flink] branch master updated (cb987a1 -> 2339616)

2021-03-11 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from cb987a1  [hotfix][connectors/jdbc] Use full checkpoint ID in XA global 
transaction ID
 add 133385e  [FLINK-20379][connector/kafka] Rename KafkaRecordDeserializer 
to KafkaRecordDeserializationSchema to follow the naming convention.
 add 0f89bc5  [FLINK-20379][connector/common] Add a method of 
getUserCodeClassLoader() method to the SourceReaderContext.
 add 1b7939b  [FLINK-20379][connector/kafka] Added methods valueOnly(...) 
and open(..) in the KafkaRecordDeserializationSchema interface to enable the 
reuse of the DeserializationSchema and KafkaDeserializationSchema.
 add 3102bcf  [FLINK-20379][connector/kafka] Add a convenient method 
setValueOnlyDeserializer(DeserializationSchema) to KafkaSourceBuilder.
 add 2339616  [FLINK-20379][connector/common][test] Add 
TestingDeserializationContext and KafkaRecordDeserializationSchemaTest

No new revisions were added by this update.

Summary of changes:
 .../file/src/FileSourceHeavyThroughputTest.java|   7 ++
 .../flink/connector/kafka/source/KafkaSource.java  |  27 -
 .../connector/kafka/source/KafkaSourceBuilder.java |  29 -
 .../source/reader/KafkaPartitionSplitReader.java   |   8 +-
 .../KafkaDeserializationSchemaWrapper.java |  65 ++
 .../KafkaRecordDeserializationSchema.java  | 131 +
 .../deserializer/KafkaRecordDeserializer.java  |  72 ---
 ...afkaValueOnlyDeserializationSchemaWrapper.java} |  35 +++---
 ...java => KafkaValueOnlyDeserializerWrapper.java} |  30 -
 .../connector/kafka/source/KafkaSourceITCase.java  |  11 +-
 .../reader/KafkaPartitionSplitReaderTest.java  |  15 ++-
 .../kafka/source/reader/KafkaSourceReaderTest.java |  11 +-
 .../KafkaRecordDeserializationSchemaTest.java  | 123 +++
 .../api/connector/source/SourceReaderContext.java  |   9 ++
 .../source/lib/NumberSequenceSourceTest.java   |   7 ++
 .../streaming/api/operators/SourceOperator.java|  19 +++
 .../TestingDeserializationContext.java |  31 ++---
 .../source/reader/SourceReaderTestBase.java|   2 +-
 .../source/reader/TestingReaderContext.java|   7 ++
 19 files changed, 491 insertions(+), 148 deletions(-)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaDeserializationSchemaWrapper.java
 create mode 100644 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaRecordDeserializationSchema.java
 delete mode 100644 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaRecordDeserializer.java
 copy 
flink-connectors/{flink-connector-gcp-pubsub/src/main/java/org/apache/flink/streaming/connectors/gcp/pubsub/DeserializationSchemaWrapper.java
 => 
flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaValueOnlyDeserializationSchemaWrapper.java}
 (58%)
 rename 
flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/deserializer/{ValueDeserializerWrapper.java
 => KafkaValueOnlyDeserializerWrapper.java} (72%)
 create mode 100644 
flink-connectors/flink-connector-kafka/src/test/java/org/apache/flink/connector/kafka/source/reader/deserializer/KafkaRecordDeserializationSchemaTest.java
 copy 
flink-runtime/src/test/java/org/apache/flink/runtime/metrics/util/TestReporter.java
 => 
flink-test-utils-parent/flink-connector-test-utils/src/main/java/org/apache/flink/connector/testutils/source/deserialization/TestingDeserializationContext.java
 (54%)



[flink] branch release-1.12 updated (9f7c7be -> 4b1212d)

2021-03-11 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch release-1.12
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 9f7c7be  [hotfix][connectors/kafka] Correctly check required configs 
in KafkaSourceBuilder
 add 4b1212d  [FLINK-21178][Runtime/Checkpointing] Task failure should 
trigger master hook's reset() (#14890)

No new revisions were added by this update.

Summary of changes:
 .../runtime/checkpoint/CheckpointCoordinator.java  |  6 +--
 .../checkpoint/CheckpointCoordinatorTest.java  | 58 ++
 2 files changed, 61 insertions(+), 3 deletions(-)



[flink] branch master updated (01794fe -> a08ba48)

2021-03-04 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git.


from 01794fe  [FLINK-21542][docs][table] Add documentation for supporting 
INSERT INTO specific columns
 add a08ba48  [FLINK-21178][runtime/checkpoint] Task failure will not 
trigger master hook's reset() (re-merge after rebase). (#15067)

No new revisions were added by this update.

Summary of changes:
 .../runtime/checkpoint/CheckpointCoordinator.java  |  6 +--
 .../checkpoint/CheckpointCoordinatorTest.java  | 53 ++
 2 files changed, 56 insertions(+), 3 deletions(-)



[flink] branch master updated: [FLINK-21178][Runtime/Checkpointing] Task failure should trigger master hook's reset() (#14890)

2021-03-02 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
 new 816ce96  [FLINK-21178][Runtime/Checkpointing] Task failure should 
trigger master hook's reset() (#14890)
816ce96 is described below

commit 816ce969df408dcaff52c6341be9299ffaa61805
Author: Brian Zhou 
AuthorDate: Wed Mar 3 09:03:16 2021 +0800

[FLINK-21178][Runtime/Checkpointing] Task failure should trigger master 
hook's reset() (#14890)
---
 .../runtime/checkpoint/CheckpointCoordinator.java  |  6 +--
 .../checkpoint/CheckpointCoordinatorTest.java  | 58 ++
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
index 38ddee5..c84ca91 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
@@ -1536,13 +1536,13 @@ public class CheckpointCoordinator {
 throw new IllegalStateException("No completed checkpoint 
available");
 }
 
+LOG.debug("Resetting the master hooks.");
+MasterHooks.reset(masterHooks.values(), LOG);
+
 if (operatorCoordinatorRestoreBehavior
 == 
OperatorCoordinatorRestoreBehavior.RESTORE_OR_RESET) {
 // we let the JobManager-side components know that there 
was a recovery,
 // even if there was no checkpoint to recover from, yet
-LOG.debug("Resetting the master hooks.");
-MasterHooks.reset(masterHooks.values(), LOG);
-
 LOG.info("Resetting the Operator Coordinators to an empty 
state.");
 restoreStateToCoordinators(
 OperatorCoordinator.NO_CHECKPOINT, 
Collections.emptyMap());
diff --git 
a/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java
 
b/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java
index f8cbf4d..bb9bb15 100644
--- 
a/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java
+++ 
b/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinatorTest.java
@@ -3351,6 +3351,26 @@ public class CheckpointCoordinatorTest extends 
TestLogger {
 }
 
 @Test
+public void testResetCalledInRegionRecovery() throws Exception {
+final JobID jobId = new JobID();
+
+// set up the coordinator
+CheckpointCoordinator checkpointCoordinator =
+new CheckpointCoordinatorBuilder()
+.setJobId(jobId)
+.setTimer(manuallyTriggeredScheduledExecutor)
+.build();
+
+TestResetHook hook = new TestResetHook("id");
+
+// Add a master hook
+checkpointCoordinator.addMasterHook(hook);
+assertFalse(hook.resetCalled);
+
checkpointCoordinator.restoreLatestCheckpointedStateToSubtasks(Collections.emptySet());
+assertTrue(hook.resetCalled);
+}
+
+@Test
 public void testNotifyCheckpointAbortionInOperatorCoordinator() throws 
Exception {
 JobVertexID jobVertexID = new JobVertexID();
 ExecutionGraph graph =
@@ -3592,4 +3612,42 @@ public class CheckpointCoordinatorTest extends 
TestLogger {
 return invokeCounter;
 }
 }
+
+private static class TestResetHook implements 
MasterTriggerRestoreHook {
+
+private final String id;
+boolean resetCalled;
+
+TestResetHook(String id) {
+this.id = id;
+this.resetCalled = false;
+}
+
+@Override
+public String getIdentifier() {
+return id;
+}
+
+@Override
+public void reset() throws Exception {
+resetCalled = true;
+}
+
+@Override
+public CompletableFuture triggerCheckpoint(
+long checkpointId, long timestamp, Executor executor) {
+throw new UnsupportedOperationException();
+}
+
+@Override
+public void restoreCheckpoint(long checkpointId, @Nullable String 
checkpointData)
+throws Exception {
+throw new UnsupportedOperationException();
+}
+
+@Override
+public SimpleVersionedSerializer 
createCheckpointDataSerializer() {
+throw new UnsupportedOperationException();
+}
+}
 }



[flink] branch release-1.12 updated: [FLINK-20848][connector/kafka] Fix Kafka consumer client ID with subtask ID suffix (#14556)

2021-01-12 Thread jqin
This is an automated email from the ASF dual-hosted git repository.

jqin pushed a commit to branch release-1.12
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/release-1.12 by this push:
 new e203e24  [FLINK-20848][connector/kafka] Fix Kafka consumer client ID 
with subtask ID suffix (#14556)
e203e24 is described below

commit e203e24cf775e4d5cdd0469b9db3cf2894b2f9c5
Author: Qingsheng Ren 
AuthorDate: Wed Jan 13 10:29:05 2021 +0800

[FLINK-20848][connector/kafka] Fix Kafka consumer client ID with subtask ID 
suffix (#14556)
---
 .../flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
index 7d9e7ba..c1a827c 100644
--- 
a/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
+++ 
b/flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/connector/kafka/source/reader/KafkaPartitionSplitReader.java
@@ -77,6 +77,7 @@ public class KafkaPartitionSplitReader
 
 public KafkaPartitionSplitReader(
 Properties props, KafkaRecordDeserializer 
deserializationSchema, int subtaskId) {
+this.subtaskId = subtaskId;
 Properties consumerProps = new Properties();
 consumerProps.putAll(props);
 consumerProps.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, 
createConsumerClientId(props));
@@ -85,7 +86,6 @@ public class KafkaPartitionSplitReader
 this.deserializationSchema = deserializationSchema;
 this.collector = new SimpleCollector<>();
 this.groupId = 
consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG);
-this.subtaskId = subtaskId;
 }
 
 @Override



  1   2   3   4   >