This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f70dfc64316 [fix](cloud) fix dead cloud cluster status empty (#32471)
f70dfc64316 is described below
commit f70dfc64316514ff92102bd6b0e0d4f9de19dbaa
Author: yujun <[email protected]>
AuthorDate: Wed Mar 20 23:38:29 2024 +0800
[fix](cloud) fix dead cloud cluster status empty (#32471)
---
.../java/org/apache/doris/qe/ConnectContext.java | 2 +
.../java/org/apache/doris/qe/StmtExecutor.java | 51 +++++++++++-----------
2 files changed, 28 insertions(+), 25 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
index 4ef75173786..33e5ff72d91 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
@@ -1071,6 +1071,8 @@ public class ConnectContext {
}
/**
+ * @param updateErr whether set this connect state to error when the
returned cluster is null or empty.
+ *
* @return Returns an available cluster in the following order
* 1 Use an explicitly specified cluster
* 2 If no cluster is specified, the user's default cluster is used
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index a4ea0cb25bc..808fea13be2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -791,38 +791,39 @@ public class StmtExecutor {
// cloud mode retry
LOG.debug("due to exception {} retry {} rpc {} user {}",
e.getMessage(), i, e instanceof RpcException, e
instanceof UserException);
- // errCode = 2, detailMessage = There is no scanNode Backend
available.[10003: not alive]
- List<String> bes =
Env.getCurrentSystemInfo().getAllBackendIds().stream()
- .map(id ->
Long.toString(id)).collect(Collectors.toList());
String msg = e.getMessage();
boolean isNeedRetry = true;
- if (e instanceof UserException
- &&
msg.contains(SystemInfoService.NO_SCAN_NODE_BACKEND_AVAILABLE_MSG)) {
+ if (Config.isCloudMode()) {
isNeedRetry = false;
- Matcher matcher = beIpPattern.matcher(msg);
- // here retry planner not be recreated, so
- // in cloud mode drop node, be id invalid, so need not
retry
- // such as be ids [11000, 11001] -> after drop node 11001
- // don't need to retry 11001's request
- if (matcher.find()) {
- String notAliveBe = matcher.group(1);
- isNeedRetry = bes.contains(notAliveBe);
- if (isNeedRetry) {
- Backend abnormalBe =
Env.getCurrentSystemInfo().getBackend(Long.parseLong(notAliveBe));
- String deadCloudClusterStatus =
abnormalBe.getCloudClusterStatus();
- String deadCloudClusterClusterName =
abnormalBe.getCloudClusterName();
- LOG.info("need retry cluster {} status {}-{}",
deadCloudClusterClusterName,
- deadCloudClusterStatus,
ClusterStatus.valueOf(deadCloudClusterStatus));
- if (ClusterStatus.valueOf(deadCloudClusterStatus)
!= ClusterStatus.NORMAL) {
-
CloudSystemInfoService.waitForAutoStart(deadCloudClusterClusterName);
+ // errCode = 2, detailMessage = There is no scanNode
Backend available.[10003: not alive]
+ List<String> bes =
Env.getCurrentSystemInfo().getAllBackendIds().stream()
+ .map(id ->
Long.toString(id)).collect(Collectors.toList());
+ if (e instanceof UserException
+ &&
msg.contains(SystemInfoService.NO_SCAN_NODE_BACKEND_AVAILABLE_MSG)) {
+ Matcher matcher = beIpPattern.matcher(msg);
+ // here retry planner not be recreated, so
+ // in cloud mode drop node, be id invalid, so need not
retry
+ // such as be ids [11000, 11001] -> after drop node
11001
+ // don't need to retry 11001's request
+ if (matcher.find()) {
+ String notAliveBe = matcher.group(1);
+ isNeedRetry = bes.contains(notAliveBe);
+ if (isNeedRetry) {
+ Backend abnormalBe =
Env.getCurrentSystemInfo().getBackend(Long.parseLong(notAliveBe));
+ String deadCloudClusterStatus =
abnormalBe.getCloudClusterStatus();
+ String deadCloudClusterClusterName =
abnormalBe.getCloudClusterName();
+ LOG.info("need retry cluster {} status {}",
deadCloudClusterClusterName,
+ deadCloudClusterStatus);
+ if
(Strings.isNullOrEmpty(deadCloudClusterStatus)
+ ||
ClusterStatus.valueOf(deadCloudClusterStatus) != ClusterStatus.NORMAL) {
+
CloudSystemInfoService.waitForAutoStart(deadCloudClusterClusterName);
+ }
}
}
}
}
- if (i == retryTime - 1 || !isNeedRetry) {
- throw e;
- }
- if (context.getConnectType().equals(ConnectType.MYSQL) &&
!context.getMysqlChannel().isSend()) {
+ if (i != retryTime - 1 && isNeedRetry
+ && context.getConnectType().equals(ConnectType.MYSQL)
&& !context.getMysqlChannel().isSend()) {
LOG.warn("retry {} times. stmt: {}", (i + 1),
parsedStmt.getOrigStmt().originStmt);
} else {
throw e;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]