kevinrr888 commented on code in PR #5383:
URL: https://github.com/apache/accumulo/pull/5383#discussion_r2014373322
##########
server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java:
##########
@@ -91,6 +92,20 @@ protected AbstractServer(ServerId.Type serverType,
ConfigOpts opts,
ClusterConfigParser.validateGroupNames(List.of(resourceGroup));
SecurityUtil.serverLogin(siteConfig);
context = serverContextFactory.apply(siteConfig);
+
+ final String upgradePrepNode = context.getZooKeeperRoot() +
Constants.ZPREPARE_FOR_UPGRADE;
+ try {
+ if (context.getZooSession().asReader().exists(upgradePrepNode)) {
+ throw new IllegalStateException(
+ "Instance has been prepared for upgrade, no servers can be
started."
+ + " To undo this state and abort upgrade preparations delete
the zookeeper node: "
+ + upgradePrepNode);
Review Comment:
Maybe there could be a command that can abort this which just deletes the
node. Could be `ZooZap -abort-prepare-for-upgrade`. I like the idea of having a
complimentary undo command
##########
server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java:
##########
@@ -115,6 +128,71 @@ public void zap(SiteConfiguration siteConf, String...
args) {
try (var context = new ServerContext(siteConf)) {
final ZooReaderWriter zrw = context.getZooSession().asReaderWriter();
+
+ if (opts.upgrade) {
+ final String volDir =
VolumeConfiguration.getVolumeUris(siteConf).iterator().next();
+ final Path instanceDir = new Path(volDir, "instance_id");
+ final InstanceId iid =
+ VolumeManager.getInstanceIDFromHdfs(instanceDir, new
Configuration());
+ final String zkRoot = ZooUtil.getRoot(iid);
+ final String upgradePath = zkRoot + Constants.ZPREPARE_FOR_UPGRADE;
+
+ try {
+ if (zrw.exists(upgradePath)) {
+ if (!opts.forceUpgradePrep) {
+ throw new IllegalStateException(
+ "'ZooZap -prepare-for-upgrade' must have already been run."
+ + " To run again use the 'ZooZap -prepare-for-upgrade
-force'");
+ } else {
+ zrw.delete(upgradePath);
+ }
+ }
+ } catch (KeeperException | InterruptedException e) {
+ throw new IllegalStateException("Error creating or checking for "
+ upgradePath
+ + " node in zookeeper: " + e.getMessage(), e);
+ }
+
+ log.info("Upgrade specified, validating that Manager is stopped");
+ if (context.getServerPaths().getManager(true) != null) {
+ throw new IllegalStateException(
+ "Manager is running, shut it down and retry this operation");
+ }
+
+ log.info("Checking for existing fate transactions");
+ try {
+ // Adapted from UpgradeCoordinator.abortIfFateTransactions
+ if (!zrw.getChildren(context.getZooKeeperRoot() +
Constants.ZFATE).isEmpty()) {
+ throw new IllegalStateException("Cannot complete upgrade
preparation"
+ + " because FATE transactions exist. You can start a
tserver, but"
+ + " not the Manager, then use the shell to delete completed"
+ + " transactions and fail pending or in-progress
transactions."
+ + " Once all of the FATE transactions have been removed you
can"
+ + " retry this operation.");
+ }
+ } catch (KeeperException | InterruptedException e) {
+ throw new IllegalStateException("Error checking for existing FATE
transactions", e);
+ }
Review Comment:
In upgrading from 4.0 to a newer version in the future, will also need to
check the FATE table. Maybe could check if the FATE table exists, and fail to
upgrade if it does, to make it easy to track where upgrade changes are needed
in the future.
##########
server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java:
##########
@@ -61,7 +69,7 @@ public String keyword() {
@Override
public String description() {
- return "Utility for zapping Zookeeper locks";
+ return "Utility for removing Zookeeper locks and other data";
Review Comment:
Could specify the other data
##########
server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java:
##########
@@ -115,6 +128,71 @@ public void zap(SiteConfiguration siteConf, String...
args) {
try (var context = new ServerContext(siteConf)) {
final ZooReaderWriter zrw = context.getZooSession().asReaderWriter();
+
+ if (opts.upgrade) {
+ final String volDir =
VolumeConfiguration.getVolumeUris(siteConf).iterator().next();
+ final Path instanceDir = new Path(volDir, "instance_id");
+ final InstanceId iid =
+ VolumeManager.getInstanceIDFromHdfs(instanceDir, new
Configuration());
+ final String zkRoot = ZooUtil.getRoot(iid);
+ final String upgradePath = zkRoot + Constants.ZPREPARE_FOR_UPGRADE;
+
+ try {
+ if (zrw.exists(upgradePath)) {
+ if (!opts.forceUpgradePrep) {
+ throw new IllegalStateException(
+ "'ZooZap -prepare-for-upgrade' must have already been run."
+ + " To run again use the 'ZooZap -prepare-for-upgrade
-force'");
+ } else {
+ zrw.delete(upgradePath);
+ }
+ }
+ } catch (KeeperException | InterruptedException e) {
+ throw new IllegalStateException("Error creating or checking for "
+ upgradePath
+ + " node in zookeeper: " + e.getMessage(), e);
+ }
+
+ log.info("Upgrade specified, validating that Manager is stopped");
+ if (context.getServerPaths().getManager(true) != null) {
+ throw new IllegalStateException(
+ "Manager is running, shut it down and retry this operation");
+ }
+
+ log.info("Checking for existing fate transactions");
+ try {
+ // Adapted from UpgradeCoordinator.abortIfFateTransactions
+ if (!zrw.getChildren(context.getZooKeeperRoot() +
Constants.ZFATE).isEmpty()) {
+ throw new IllegalStateException("Cannot complete upgrade
preparation"
+ + " because FATE transactions exist. You can start a
tserver, but"
+ + " not the Manager, then use the shell to delete completed"
+ + " transactions and fail pending or in-progress
transactions."
+ + " Once all of the FATE transactions have been removed you
can"
+ + " retry this operation.");
+ }
+ } catch (KeeperException | InterruptedException e) {
+ throw new IllegalStateException("Error checking for existing FATE
transactions", e);
+ }
+
+ log.info("Creating {} node in zookeeper, servers will be prevented
from"
+ + " starting while this node exists", upgradePath);
+ try {
+ zrw.putPersistentData(upgradePath, new byte[0],
NodeExistsPolicy.SKIP);
+ } catch (KeeperException | InterruptedException e) {
+ throw new IllegalStateException("Error creating " + upgradePath
+ + " node in zookeeper. Check for any issues and retry.", e);
+ }
+ log.info("Instance {} prepared for upgrade. Server processes will
not start while"
+ + " in this state. To undo this state and abort upgrade
preparations delete"
+ + " the zookeeper node: {}", iid.canonical(), upgradePath);
+
+ log.info("Forcing removal of all server locks");
+ // modify the options to remove all locks
+ opts.zapCompactors = true;
+ opts.zapManager = true;
+ opts.zapScanServers = true;
+ opts.zapTservers = true;
Review Comment:
What about GC and Monitor?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]