This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 156f7b7699 [improvement](transaction) make commit txn fail hint more
understandable (#23227)
156f7b7699 is described below
commit 156f7b76999a09585b2cb906540fdb90cbc8b67f
Author: yujun <[email protected]>
AuthorDate: Wed Aug 23 21:50:24 2023 +0800
[improvement](transaction) make commit txn fail hint more understandable
(#23227)
---
.../org/apache/doris/planner/OlapTableSink.java | 3 +-
.../doris/transaction/DatabaseTransactionMgr.java | 61 ++++++++++++++++++++--
.../transaction/TabletQuorumFailedException.java | 23 +-------
3 files changed, 62 insertions(+), 25 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
index 8f15f4ea7a..2012475083 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
@@ -410,7 +410,8 @@ public class OlapTableSink extends DataSink {
Multimap<Long, Long> bePathsMap =
tablet.getNormalReplicaBackendPathMap();
if (bePathsMap.keySet().size() < quorum) {
throw new
UserException(InternalErrorCode.REPLICA_FEW_ERR,
- "tablet " + tablet.getId() + " has few
replicas: " + bePathsMap.keySet().size()
+ "tablet " + tablet.getId() + " alive replica
num " + bePathsMap.keySet().size()
+ + " < quorum replica num " + quorum
+ ", alive backends: [" +
StringUtils.join(bePathsMap.keySet(), ",") + "]");
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
index 293b49368a..bd125d1dac 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
@@ -81,6 +81,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.Function;
import java.util.stream.Collectors;
/**
@@ -434,6 +435,7 @@ public class DatabaseTransactionMgr {
Set<Long> errorReplicaIds, Map<Long,
Set<Long>> tableToPartition,
Set<Long> totalInvolvedBackends) throws
UserException {
+ long transactionId = transactionState.getTransactionId();
Database db = env.getInternalCatalog().getDbOrMetaException(dbId);
// update transaction state extra if exists
@@ -490,6 +492,33 @@ public class DatabaseTransactionMgr {
}
tabletToBackends.get(tabletId).add(tabletCommitInfos.get(i).getBackendId());
}
+ List<String> tabletSuccReplicas = Lists.newArrayList();
+ List<String> tabletWriteFailedReplicas = Lists.newArrayList();
+ List<String> tabletVersionFailedReplicas = Lists.newArrayList();
+ Function<Replica, String> getReplicaInfo = replica -> {
+ StringBuilder strBuffer = new StringBuilder("[replicaId=");
+ strBuffer.append(replica.getId());
+ strBuffer.append(", backendId=");
+ strBuffer.append(replica.getBackendId());
+ strBuffer.append(", backendAlive=");
+
strBuffer.append(Env.getCurrentSystemInfo().checkBackendAlive(replica.getBackendId()));
+ strBuffer.append(", version=");
+ strBuffer.append(replica.getVersion());
+ if (replica.getLastFailedVersion() >= 0) {
+ strBuffer.append(", lastFailedVersion=");
+ strBuffer.append(replica.getLastFailedVersion());
+ strBuffer.append(", lastSuccessVersion=");
+ strBuffer.append(replica.getLastSuccessVersion());
+ strBuffer.append(", lastFailedTimestamp=");
+ strBuffer.append(replica.getLastFailedTimestamp());
+ }
+ strBuffer.append(", state=");
+ strBuffer.append(replica.getState().name());
+ strBuffer.append("]");
+
+ return strBuffer.toString();
+ };
+
for (long tableId : tableToPartition.keySet()) {
OlapTable table = (OlapTable) db.getTableOrMetaException(tableId);
for (Partition partition : table.getAllPartitions()) {
@@ -533,6 +562,9 @@ public class DatabaseTransactionMgr {
.getReplicaAllocation(partition.getId()).getTotalReplicaNum() / 2 + 1;
for (MaterializedIndex index : allIndices) {
for (Tablet tablet : index.getTablets()) {
+ tabletSuccReplicas.clear();
+ tabletWriteFailedReplicas.clear();
+ tabletVersionFailedReplicas.clear();
int successReplicaNum = 0;
long tabletId = tablet.getId();
Set<Long> tabletBackends = tablet.getBackendIds();
@@ -558,11 +590,14 @@ public class DatabaseTransactionMgr {
// for example, a replica is in clone state
if (replica.getLastFailedVersion() < 0) {
++successReplicaNum;
+
tabletSuccReplicas.add(getReplicaInfo.apply(replica));
} else {
errorReplicaInfo += " replica [" +
replica.getId() + "], lastFailedVersion ["
+
replica.getLastFailedVersion() + "]";
+
tabletVersionFailedReplicas.add(getReplicaInfo.apply(replica));
}
} else {
+
tabletWriteFailedReplicas.add(getReplicaInfo.apply(replica));
errorBackendIdsForTablet.add(tabletBackend);
errorReplicaIds.add(replica.getId());
// not remove rollup task here, because the
commit maybe failed
@@ -580,9 +615,29 @@ public class DatabaseTransactionMgr {
transactionState.getTransactionId(),
tablet.getId(), successReplicaNum,
quorumReplicaNum,
Joiner.on(",").join(errorBackendIdsForTablet),
errorReplicaInfo, commitBackends);
- throw new
TabletQuorumFailedException(transactionState.getTransactionId(), tablet.getId(),
- successReplicaNum, quorumReplicaNum,
- errorBackendIdsForTablet);
+
+ String replicasDetailMsg = "";
+ if (!tabletSuccReplicas.isEmpty()) {
+ replicasDetailMsg += String.format("%s
replicas final succ: { %s }; ",
+ tabletSuccReplicas.size(),
Joiner.on(", ").join(tabletSuccReplicas));
+ }
+ if (!tabletWriteFailedReplicas.isEmpty()) {
+ replicasDetailMsg += String.format("%s
replicas write data failed: { %s }; ",
+ tabletWriteFailedReplicas.size(),
+ Joiner.on(",
").join(tabletWriteFailedReplicas));
+ }
+ if (!tabletVersionFailedReplicas.isEmpty()) {
+ replicasDetailMsg += String.format("%s
replicas write data succ but miss previous "
+ + "version: { %s }.",
+ tabletVersionFailedReplicas.size(),
+ Joiner.on(",
").join(tabletVersionFailedReplicas));
+ }
+
+ throw new
TabletQuorumFailedException(transactionId, String.format(
+ "Failed to commit txn %s, cause tablet
%s succ replica num %s < quorum "
+ + " replica num %s. table %s,
partition %s, this tablet detail: %s",
+ transactionId, tablet.getId(),
successReplicaNum, quorumReplicaNum, tableId,
+ partition.getId(), replicasDetailMsg));
}
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/transaction/TabletQuorumFailedException.java
b/fe/fe-core/src/main/java/org/apache/doris/transaction/TabletQuorumFailedException.java
index 741babff4b..aef45cdcfd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/transaction/TabletQuorumFailedException.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/transaction/TabletQuorumFailedException.java
@@ -17,27 +17,8 @@
package org.apache.doris.transaction;
-import com.google.common.base.Joiner;
-import com.google.common.collect.Sets;
-
-import java.util.Set;
-
public class TabletQuorumFailedException extends TransactionException {
-
- private static final String TABLET_QUORUM_FAILED_MSG = "Failed to commit
txn %s. "
- + "Tablet [%s] success replica num %s is less than quorum "
- + "replica num %s while error backends %s";
-
- private long tabletId;
- private Set<Long> errorBackendIdsForTablet = Sets.newHashSet();
-
- public TabletQuorumFailedException(long transactionId, long tabletId,
- int successReplicaNum, int
quorumReplicaNum,
- Set<Long> errorBackendIdsForTablet) {
- super(String.format(TABLET_QUORUM_FAILED_MSG, transactionId, tabletId,
- successReplicaNum, quorumReplicaNum,
- Joiner.on(",").join(errorBackendIdsForTablet)), transactionId);
- this.tabletId = tabletId;
- this.errorBackendIdsForTablet = errorBackendIdsForTablet;
+ public TabletQuorumFailedException(long transactionId, String message) {
+ super(message, transactionId);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]