This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new ce7f9be [Bug][bdbje] handle bdb rollbackexception (#6582)
ce7f9be is described below
commit ce7f9bef913ce6a654cb98d0ad37e193857d3fe5
Author: dh-cloud <[email protected]>
AuthorDate: Sun Sep 26 11:43:58 2021 +0800
[Bug][bdbje] handle bdb rollbackexception (#6582)
when use 3 FE follower, when restart the fe, and regardless of order, we
probability can't start fe success,
and bdb throw RollbackException,
In this scenario, the bdb suggests to catch the exception, simply closing
all your ReplicatedEnvironment handles,
and then reopening.
so we catch the RollbackException, and reopen the ReplicatedEnvironment
---
.../apache/doris/journal/bdbje/BDBEnvironment.java | 37 ++++++++++++++++++++
.../apache/doris/journal/bdbje/BDBJEJournal.java | 40 +++++++++++++++++-----
2 files changed, 69 insertions(+), 8 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
index 3357e60..6e454b8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
@@ -33,6 +33,7 @@ import com.sleepycat.je.Durability.SyncPolicy;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.rep.InsufficientLogException;
+import com.sleepycat.je.rep.RollbackException;
import com.sleepycat.je.rep.NetworkRestore;
import com.sleepycat.je.rep.NetworkRestoreConfig;
import com.sleepycat.je.rep.NoConsistencyRequiredPolicy;
@@ -320,6 +321,8 @@ public class BDBEnvironment {
break;
} catch (InsufficientLogException e) {
throw e;
+ } catch (RollbackException e) {
+ throw e;
} catch (EnvironmentFailureException e) {
tried++;
if (tried == RETRY_TIME) {
@@ -384,6 +387,40 @@ public class BDBEnvironment {
}
}
+ // Close environment
+ public void closeReplicatedEnvironment() {
+ if (replicatedEnvironment != null) {
+ try {
+ // Finally, close the store and environment.
+ replicatedEnvironment.close();
+ } catch (DatabaseException exception) {
+ LOG.error("Error closing replicatedEnvironment", exception);
+ System.exit(-1);
+ }
+ }
+ }
+ // open environment
+ public void openReplicatedEnvironment(File envHome) {
+ for (int i = 0; i < RETRY_TIME; i++) {
+ try {
+ // open the environment
+ replicatedEnvironment = new ReplicatedEnvironment(envHome,
replicationConfig, environmentConfig);
+ break;
+ } catch (DatabaseException e) {
+ if (i < RETRY_TIME - 1) {
+ try {
+ Thread.sleep(5 * 1000);
+ } catch (InterruptedException e1) {
+ e1.printStackTrace();
+ }
+ } else {
+ LOG.error("error to open replicated environment. will
exit.", e);
+ System.exit(-1);
+ }
+ }
+ }
+ }
+
private SyncPolicy getSyncPolicy(String policy) {
if (policy.equalsIgnoreCase("SYNC")) {
return Durability.SyncPolicy.SYNC;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index 967caad..dae99bc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -37,6 +37,7 @@ import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.rep.InsufficientLogException;
import com.sleepycat.je.rep.NetworkRestore;
import com.sleepycat.je.rep.NetworkRestoreConfig;
+import com.sleepycat.je.rep.RollbackException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -191,7 +192,7 @@ public class BDBJEJournal implements Journal {
@Override
public JournalEntity read(long journalId) {
- List<Long> dbNames = bdbEnvironment.getDatabaseNames();
+ List<Long> dbNames = getDatabaseNames();
if (dbNames == null) {
return null;
}
@@ -252,7 +253,7 @@ public class BDBJEJournal implements Journal {
if (bdbEnvironment == null) {
return ret;
}
- List<Long> dbNames = bdbEnvironment.getDatabaseNames();
+ List<Long> dbNames = getDatabaseNames();
if (dbNames == null) {
return ret;
}
@@ -275,7 +276,7 @@ public class BDBJEJournal implements Journal {
if (bdbEnvironment == null) {
return ret;
}
- List<Long> dbNames = bdbEnvironment.getDatabaseNames();
+ List<Long> dbNames = getDatabaseNames();
if (dbNames == null) {
return ret;
}
@@ -323,7 +324,7 @@ public class BDBJEJournal implements Journal {
List<Long> dbNames = null;
for (int i = 0; i < RETRY_TIME; i++) {
try {
- dbNames = bdbEnvironment.getDatabaseNames();
+ dbNames = getDatabaseNames();
if (dbNames == null) {
LOG.error("fail to get dbNames while open bdbje journal.
will exit");
@@ -364,7 +365,7 @@ public class BDBJEJournal implements Journal {
@Override
public void deleteJournals(long deleteToJournalId) {
- List<Long> dbNames = bdbEnvironment.getDatabaseNames();
+ List<Long> dbNames = getDatabaseNames();
if (dbNames == null) {
LOG.info("delete database names is null.");
return;
@@ -393,7 +394,7 @@ public class BDBJEJournal implements Journal {
@Override
public long getFinalizedJournalId() {
- List<Long> dbNames = bdbEnvironment.getDatabaseNames();
+ List<Long> dbNames = getDatabaseNames();
if (dbNames == null) {
LOG.error("database name is null.");
return 0;
@@ -417,8 +418,31 @@ public class BDBJEJournal implements Journal {
if (bdbEnvironment == null) {
return null;
}
-
- return bdbEnvironment.getDatabaseNames();
+
+ // Open a new journal database or get last existing one as current
journal database
+ Pair<String, Integer> helperNode =
Catalog.getCurrentCatalog().getHelperNode();
+ List<Long> dbNames = null;
+ for (int i = 0; i < RETRY_TIME; i++) {
+ try {
+ dbNames = bdbEnvironment.getDatabaseNames();
+ } catch (InsufficientLogException insufficientLogEx) {
+ // Copy the missing log files from a member of the replication
group who owns the files
+ LOG.warn("catch insufficient log exception. will recover and
try again.", insufficientLogEx);
+ NetworkRestore restore = new NetworkRestore();
+ NetworkRestoreConfig config = new NetworkRestoreConfig();
+ config.setRetainLogFiles(false);
+ restore.execute(insufficientLogEx, config);
+ bdbEnvironment.close();
+ bdbEnvironment.setup(new File(environmentPath), selfNodeName,
selfNodeHostPort,
+ helperNode.first + ":" + helperNode.second,
Catalog.getCurrentCatalog().isElectable());
+ } catch (RollbackException rollbackEx) {
+ LOG.warn("catch rollback log exception. will reopen the
ReplicatedEnvironment.", rollbackEx);
+ bdbEnvironment.closeReplicatedEnvironment();
+ bdbEnvironment.openReplicatedEnvironment(new
File(environmentPath));
+ }
+ }
+
+ return dbNames;
}
public boolean isPortUsing(String host, int port) throws
UnknownHostException {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]