This is an automated email from the ASF dual-hosted git repository.
yong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 2aa59c7 Add skip unrecoverable ledger option for bookkeeper shell
recover command (#2870)
2aa59c7 is described below
commit 2aa59c74cdd1de13c33a593f6dccc888823c4a83
Author: Hang Chen <[email protected]>
AuthorDate: Wed Nov 3 08:54:34 2021 +0800
Add skip unrecoverable ledger option for bookkeeper shell recover command
(#2870)
### Motivation
When we use `bin/bookkeeper shell recover bookieId` command to recover
specific bookie's ledgers, the recover process will exit when occurs recover
ledger failed.
In our production bookkeeper cluster, we found some ledgers in Open state
and has no entry. When we call `bin/bookkeeper shell recover bookieId`
command, it will traverse all the ledgers level by level. In the end, for each
ledger, it will call the following code to process recover.
```Java
Processor<Long> ledgerProcessor = new Processor<Long>() {
@Override
public void process(Long ledgerId, AsyncCallback.VoidCallback
iterCallback) {
recoverLedger(bookiesSrc, ledgerId, dryrun,
skipOpenLedgers, skipUnrecoverableLedgers, iterCallback);
}
};
```
In the `recoverLedger` method, it will call `asyncOpenLedgerNoRecovery` to
open ledger and get LAC if the ledger in `OPEN` state. For the `getLAC`
request, if the request ledger has no entry, it will return entry = -1 and
return ERROR for this `getLAC` request.
https://github.com/apache/bookkeeper/blob/98ddf8149592572eebcfaf6bdd4916f295ffd9d7/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java#L756-L769
And for the `asyncOpenLedgerNoRecovery` callback, it will return error for
this process. It will stop the recover process of the following ledgers.
In the end, the recover command runs failed, and the following ledger can't
be recovered.
### Changes
We should expose a flag for user to determine whether to move forward to
recover the following ledgers when some ledgers recover failed.
So, I provide the parameter `sku` to handle this case.
---
.../org/apache/bookkeeper/bookie/BookieShell.java | 3 +
.../apache/bookkeeper/client/BookKeeperAdmin.java | 76 ++++++++++++++++++----
.../tools/cli/commands/bookies/RecoverCommand.java | 11 +++-
.../apache/bookkeeper/bookie/BookieShellTest.java | 21 ++++--
4 files changed, 89 insertions(+), 22 deletions(-)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
index 8d00220..ffaa18b 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
@@ -481,6 +481,7 @@ public class BookieShell implements Tool {
opts.addOption("l", "ledger", true, "Recover a specific ledger");
opts.addOption("sk", "skipOpenLedgers", false, "Skip recovering
open ledgers");
opts.addOption("d", "deleteCookie", false, "Delete cookie node for
the bookie.");
+ opts.addOption("sku", "skipUnrecoverableLedgers", false, "Skip
unrecoverable ledgers.");
}
@Override
@@ -513,6 +514,7 @@ public class BookieShell implements Tool {
boolean force = cmdLine.hasOption("f");
boolean skipOpenLedgers = cmdLine.hasOption("sk");
boolean removeCookies = !dryrun && cmdLine.hasOption("d");
+ boolean skipUnrecoverableLedgers = cmdLine.hasOption("sku");
Long ledgerId = getOptionLedgerIdValue(cmdLine, "ledger", -1);
@@ -525,6 +527,7 @@ public class BookieShell implements Tool {
flags.ledger(ledgerId);
flags.skipOpenLedgers(skipOpenLedgers);
flags.query(query);
+ flags.skipUnrecoverableLedgers(skipUnrecoverableLedgers);
boolean result = cmd.apply(bkConf, flags);
return (result) ? 0 : -1;
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
index 91e2552..4eadb12 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
@@ -574,10 +574,15 @@ public class BookKeeperAdmin implements AutoCloseable {
}
public void recoverBookieData(final Set<BookieId> bookiesSrc, boolean
dryrun, boolean skipOpenLedgers)
- throws InterruptedException, BKException {
+ throws InterruptedException, BKException {
+ recoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, false);
+ }
+
+ public void recoverBookieData(final Set<BookieId> bookiesSrc, boolean
dryrun, boolean skipOpenLedgers,
+ boolean skipUnrecoverableLedgers) throws
InterruptedException, BKException {
SyncObject sync = new SyncObject();
// Call the async method to recover bookie data.
- asyncRecoverBookieData(bookiesSrc, dryrun, skipOpenLedgers, new
RecoverCallback() {
+ asyncRecoverBookieData(bookiesSrc, dryrun, skipOpenLedgers,
skipUnrecoverableLedgers, new RecoverCallback() {
@Override
public void recoverComplete(int rc, Object ctx) {
LOG.info("Recover bookie operation completed with rc: {}",
BKException.codeLogger(rc));
@@ -657,12 +662,13 @@ public class BookKeeperAdmin implements AutoCloseable {
public void asyncRecoverBookieData(final Set<BookieId> bookieSrc,
final RecoverCallback cb, final Object
context) {
- asyncRecoverBookieData(bookieSrc, false, false, cb, context);
+ asyncRecoverBookieData(bookieSrc, false, false, false, cb, context);
}
public void asyncRecoverBookieData(final Set<BookieId> bookieSrc, boolean
dryrun,
- final boolean skipOpenLedgers, final
RecoverCallback cb, final Object context) {
- getActiveLedgers(bookieSrc, dryrun, skipOpenLedgers, cb, context);
+ final boolean skipOpenLedgers, final
boolean skipUnrecoverableLedgers,
+ final RecoverCallback cb, final Object
context) {
+ getActiveLedgers(bookieSrc, dryrun, skipOpenLedgers,
skipUnrecoverableLedgers, cb, context);
}
/**
@@ -709,7 +715,8 @@ public class BookKeeperAdmin implements AutoCloseable {
* Context for the RecoverCallback to call.
*/
private void getActiveLedgers(final Set<BookieId> bookiesSrc, final
boolean dryrun,
- final boolean skipOpenLedgers, final
RecoverCallback cb, final Object context) {
+ final boolean skipOpenLedgers, final boolean
skipUnrecoverableLedgers,
+ final RecoverCallback cb, final Object
context) {
// Wrapper class around the RecoverCallback so it can be used
// as the final VoidCallback to process ledgers
class RecoverCallbackWrapper implements AsyncCallback.VoidCallback {
@@ -728,7 +735,7 @@ public class BookKeeperAdmin implements AutoCloseable {
Processor<Long> ledgerProcessor = new Processor<Long>() {
@Override
public void process(Long ledgerId, AsyncCallback.VoidCallback
iterCallback) {
- recoverLedger(bookiesSrc, ledgerId, dryrun, skipOpenLedgers,
iterCallback);
+ recoverLedger(bookiesSrc, ledgerId, dryrun, skipOpenLedgers,
skipUnrecoverableLedgers, iterCallback);
}
};
bkc.getLedgerManager().asyncProcessLedgers(
@@ -755,6 +762,31 @@ public class BookKeeperAdmin implements AutoCloseable {
*/
private void recoverLedger(final Set<BookieId> bookiesSrc, final long lId,
final boolean dryrun,
final boolean skipOpenLedgers, final
AsyncCallback.VoidCallback finalLedgerIterCb) {
+ recoverLedger(bookiesSrc, lId, dryrun, skipOpenLedgers, false,
finalLedgerIterCb);
+ }
+
+ /**
+ * This method asynchronously recovers a given ledger if any of the ledger
+ * entries were stored on the failed bookie.
+ *
+ * @param bookiesSrc
+ * Source bookies that had a failure. We want to replicate the
+ * ledger fragments that were stored there.
+ * @param lId
+ * Ledger id we want to recover.
+ * @param dryrun
+ * printing the recovery plan without actually recovering
bookies
+ * @param skipOpenLedgers
+ * Skip recovering open ledgers.
+ * @param skipUnrecoverableLedgers
+ * Skip unrecoverable ledgers.
+ * @param finalLedgerIterCb
+ * IterationCallback to invoke once we've recovered the current
+ * ledger.
+ */
+ private void recoverLedger(final Set<BookieId> bookiesSrc, final long lId,
final boolean dryrun,
+ final boolean skipOpenLedgers, final boolean
skipUnrecoverableLedgers,
+ final AsyncCallback.VoidCallback
finalLedgerIterCb) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recovering ledger : {}", lId);
}
@@ -763,8 +795,13 @@ public class BookKeeperAdmin implements AutoCloseable {
@Override
public void openComplete(int rc, final LedgerHandle lh, Object
ctx) {
if (rc != BKException.Code.OK) {
- LOG.error("BK error opening ledger: " + lId,
BKException.create(rc));
- finalLedgerIterCb.processResult(rc, null, null);
+ if (skipUnrecoverableLedgers) {
+ LOG.warn("BK error opening ledger: {}, skip recover
it.", lId, BKException.create(rc));
+ finalLedgerIterCb.processResult(BKException.Code.OK,
null, null);
+ } else {
+ LOG.error("BK error opening ledger: {}", lId,
BKException.create(rc));
+ finalLedgerIterCb.processResult(rc, null, null);
+ }
return;
}
@@ -798,13 +835,20 @@ public class BookKeeperAdmin implements AutoCloseable {
@Override
public void openComplete(int newrc, final LedgerHandle
newlh, Object newctx) {
if (newrc != BKException.Code.OK) {
- LOG.error("BK error close ledger: " + lId,
BKException.create(newrc));
- finalLedgerIterCb.processResult(newrc, null,
null);
+ if (skipUnrecoverableLedgers) {
+ LOG.warn("BK error opening ledger: {},
skip recover it.",
+ lId, BKException.create(newrc));
+
finalLedgerIterCb.processResult(BKException.Code.OK, null, null);
+ } else {
+ LOG.error("BK error close ledger: {}",
lId, BKException.create(newrc));
+ finalLedgerIterCb.processResult(newrc,
null, null);
+ }
return;
}
bkc.mainWorkerPool.submit(() -> {
// do recovery
- recoverLedger(bookiesSrc, lId, dryrun,
skipOpenLedgers, finalLedgerIterCb);
+ recoverLedger(bookiesSrc, lId, dryrun,
skipOpenLedgers,
+ skipUnrecoverableLedgers,
finalLedgerIterCb);
});
}
}, null);
@@ -815,7 +859,13 @@ public class BookKeeperAdmin implements AutoCloseable {
@Override
public void processResult(int rc, String path, Object ctx)
{
if (BKException.Code.OK != rc) {
- LOG.error("Failed to recover ledger {} : {}", lId,
BKException.codeLogger(rc));
+ if (skipUnrecoverableLedgers) {
+ LOG.warn("Failed to recover ledger: {} : {},
skip recover it.", lId,
+ BKException.codeLogger(rc));
+ rc = BKException.Code.OK;
+ } else {
+ LOG.error("Failed to recover ledger {} : {}",
lId, BKException.codeLogger(rc));
+ }
} else {
LOG.info("Recovered ledger {}.", lId);
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
index cd7ede0..ff1339f 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/tools/cli/commands/bookies/RecoverCommand.java
@@ -100,6 +100,9 @@ public class RecoverCommand extends
BookieCommand<RecoverCommand.RecoverFlags> {
@Parameter(names = { "-bs", "--bokiesrc" }, description = "Bookie
address")
private String bookieAddress;
+
+ @Parameter(names = {"-sku", "--skipunrecoverableledgers"}, description
= "Skip unrecoverable ledgers")
+ private boolean skipUnrecoverableLedgers;
}
@Override
@@ -118,6 +121,7 @@ public class RecoverCommand extends
BookieCommand<RecoverCommand.RecoverFlags> {
boolean force = flags.force;
boolean skipOpenLedgers = flags.skipOpenLedgers;
boolean removeCookies = !dryrun && flags.deleteCookie;
+ boolean skipUnrecoverableLedgers = flags.skipUnrecoverableLedgers;
Long ledgerId = flags.ledger;
@@ -153,7 +157,7 @@ public class RecoverCommand extends
BookieCommand<RecoverCommand.RecoverFlags> {
if (DEFAULT_ID != ledgerId) {
return bkRecoveryLedger(admin, ledgerId, bookieAddrs, dryrun,
skipOpenLedgers, removeCookies);
}
- return bkRecovery(admin, bookieAddrs, dryrun, skipOpenLedgers,
removeCookies);
+ return bkRecovery(admin, bookieAddrs, dryrun, skipOpenLedgers,
removeCookies, skipUnrecoverableLedgers);
} finally {
admin.close();
}
@@ -259,9 +263,10 @@ public class RecoverCommand extends
BookieCommand<RecoverCommand.RecoverFlags> {
Set<BookieId> bookieAddrs,
boolean dryrun,
boolean skipOpenLedgers,
- boolean removeCookies)
+ boolean removeCookies,
+ boolean skipUnrecoverableLedgers)
throws InterruptedException, BKException {
- bkAdmin.recoverBookieData(bookieAddrs, dryrun, skipOpenLedgers);
+ bkAdmin.recoverBookieData(bookieAddrs, dryrun, skipOpenLedgers,
skipUnrecoverableLedgers);
if (removeCookies) {
deleteCookies(bkAdmin.getConf(), bookieAddrs);
}
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
index 2c9eb81..2da712f 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/BookieShellTest.java
@@ -280,7 +280,7 @@ public class BookieShellTest {
public void testRecoverCmdRecoverDefault() throws Exception {
// default behavior
testRecoverCmdRecover(
- false, false, false,
+ false, false, false, false,
"-force", "127.0.0.1:3181");
}
@@ -288,7 +288,7 @@ public class BookieShellTest {
public void testRecoverCmdRecoverDeleteCookie() throws Exception {
// dryrun
testRecoverCmdRecover(
- false, false, true,
+ false, false, true, false,
"-force", "-deleteCookie", "127.0.0.1:3181");
}
@@ -296,7 +296,7 @@ public class BookieShellTest {
public void testRecoverCmdRecoverSkipOpenLedgersDeleteCookie() throws
Exception {
// dryrun
testRecoverCmdRecover(
- false, true, true,
+ false, true, true, false,
"-force", "-deleteCookie", "-skipOpenLedgers", "127.0.0.1:3181");
}
@@ -304,7 +304,7 @@ public class BookieShellTest {
public void testRecoverCmdRecoverDryrun() throws Exception {
// dryrun
testRecoverCmdRecover(
- true, false, false,
+ true, false, false, false,
"-force", "-dryrun", "127.0.0.1:3181");
}
@@ -312,14 +312,23 @@ public class BookieShellTest {
public void testRecoverCmdRecoverDryrunDeleteCookie() throws Exception {
// dryrun & removeCookie : removeCookie should be false
testRecoverCmdRecover(
- true, false, false,
+ true, false, false, false,
"-force", "-dryrun", "-deleteCookie", "127.0.0.1:3181");
}
+ @Test
+ public void testRecoverCmdRecoverSkipUnrecoverableLedgers() throws
Exception {
+ // skipUnrecoverableLedgers
+ testRecoverCmdRecover(
+ false, false, false, true,
+ "-force", "-sku", "127.0.0.1:3181");
+ }
+
@SuppressWarnings("unchecked")
void testRecoverCmdRecover(boolean dryrun,
boolean skipOpenLedgers,
boolean removeCookies,
+ boolean skipUnrecoverableLedgers,
String... args) throws Exception {
RecoverCmd cmd = (RecoverCmd) shell.commands.get("recover");
CommandLine cmdLine = parseCommandLine(cmd, args);
@@ -328,7 +337,7 @@ public class BookieShellTest {
.verifyNew(BookKeeperAdmin.class, times(1))
.withArguments(any(ClientConfiguration.class));
verify(admin, times(1))
- .recoverBookieData(any(Set.class), eq(dryrun),
eq(skipOpenLedgers));
+ .recoverBookieData(any(Set.class), eq(dryrun),
eq(skipOpenLedgers), eq(skipUnrecoverableLedgers));
verify(admin, times(1)).close();
if (removeCookies) {
PowerMockito.verifyStatic(MetadataDrivers.class);