keith-turner commented on code in PR #5383: URL: https://github.com/apache/accumulo/pull/5383#discussion_r1987773989
########## server/base/src/main/java/org/apache/accumulo/server/util/upgrade/PreUpgradeCheck.java: ########## @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.util.upgrade; + +import java.util.List; +import java.util.Set; + +import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.fate.ReadOnlyTStore; +import org.apache.accumulo.core.fate.ZooStore; +import org.apache.accumulo.core.fate.zookeeper.ZooUtil; +import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy; +import org.apache.accumulo.core.zookeeper.ZooSession; +import org.apache.accumulo.server.AccumuloDataVersion; +import org.apache.accumulo.server.ServerContext; +import org.apache.accumulo.server.cli.ServerUtilOpts; +import org.apache.accumulo.start.spi.KeywordExecutable; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PreUpgradeCheck implements KeywordExecutable { + + private static final Logger LOG = LoggerFactory.getLogger(PreUpgradeCheck.class); + + @Override + public String keyword() { + return "pre-upgrade"; + } + + @Override + public String description() { + return "Utility that prepares an instance to be upgraded to a new version." + + " This utility must be run before starting any servers."; Review Comment: Would be nice to squeeze in some bigger picture guidance here. ```suggestion + " This utility must be run before starting any servers. This utility fits into a bigger picture of upgrade in which these steps should be followed : in the old version of Accumulo run prep-upgrade, run pre-upgrade (this utility) using the new Accumulo version, and then start the new version of the servers."; ``` ########## server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java: ########## @@ -115,13 +123,77 @@ public void zap(SiteConfiguration siteConf, String... args) { SecurityUtil.serverLogin(siteConf); } - String volDir = VolumeConfiguration.getVolumeUris(siteConf).iterator().next(); - Path instanceDir = new Path(volDir, "instance_id"); - InstanceId iid = VolumeManager.getInstanceIDFromHdfs(instanceDir, new Configuration()); - var zrw = zk.asReaderWriter(); + final String volDir = VolumeConfiguration.getVolumeUris(siteConf).iterator().next(); + final Path instanceDir = new Path(volDir, "instance_id"); + final InstanceId iid = VolumeManager.getInstanceIDFromHdfs(instanceDir, new Configuration()); + final String zkRoot = ZooUtil.getRoot(iid); + final var zrw = zk.asReaderWriter(); + final String upgradePath = zkRoot + Constants.ZPREPARE_FOR_UPGRADE; + + if (opts.upgrade) { + + try { + if (zrw.exists(upgradePath)) { + if (!opts.forceUpgradePrep) { + throw new IllegalStateException( + "'ZooZap -prepare-for-upgrade' must have already been run." + + " To run again use the 'ZooZap -prepare-for-upgrade -force'"); + } else { + zrw.delete(upgradePath); + } + } + } catch (KeeperException | InterruptedException e) { + throw new IllegalStateException("Error creating or checking for " + upgradePath + + " node in zookeeper: " + e.getMessage(), e); + } + + log.info("Upgrade specified, validating that Manager is stopped"); + AdminUtil<Admin> admin = new AdminUtil<>(false); + if (!admin.checkGlobalLock(zk, ServiceLock.path(zkRoot + Constants.ZMANAGER_LOCK))) { + throw new IllegalStateException( + "Manager is running, shut it down and retry this operation"); + } + + log.info("Checking for existing fate transactions"); + try { + final String fatePath = zkRoot + Constants.ZFATE; + // Adapted from UpgradeCoordinator.abortIfFateTransactions + final ReadOnlyTStore<ZooZap> fate = new ZooStore<>(fatePath, zk); + if (!fate.list().isEmpty()) { + throw new IllegalStateException("Cannot complete upgrade preparation" + + " because FATE transactions exist. You can start a tserver, but" + + " not the Manager, then use the shell to delete completed" + + " transactions and fail pending or in-progress transactions." Review Comment: This guidance does not seems like it will work. If a fate op is failed the manager would need to be up to execute the rollback. We could advise when restarting the manager to make sure any client process that will initiate table operations like compact or bulk import are stopped. Alternatively this process could create another zookeeper flag that the manager checks on startup that disables creating new fate ops and only allows ones already started to run. Then all the old servers could be started and the fates cleaned up or allowed to complete w/o worrying about some random process in the system starting new fate ops. The prep-upgrade could delete this flag. ########## server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java: ########## @@ -72,6 +73,21 @@ protected AbstractServer(String appName, ConfigOpts opts, String[] args) { this.hostname = siteConfig.get(Property.GENERAL_PROCESS_BIND_ADDRESS); SecurityUtil.serverLogin(siteConfig); context = new ServerContext(siteConfig); + + final String upgradePrepNode = context.getZooKeeperRoot() + Constants.ZPREPARE_FOR_UPGRADE; Review Comment: We could add an IT that veifies servers will not start when this exists. The IT could stop all servers, create the ZK flag, and then start server processes and verify they exited. Maybe the error message could be verified. ########## server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java: ########## @@ -80,6 +84,10 @@ static class Opts extends Help { boolean zapScanServers = false; @Parameter(names = "-verbose", description = "print out messages about progress") boolean verbose = false; + @Parameter(names = "-prepare-for-upgrade", description = "prepare ZooKeeper for an upgrade") Review Comment: ```suggestion @Parameter(names = "-prepare-for-upgrade", description = "prepare Accumulo for an upgrade to the next non bug fix release") ``` ########## server/base/src/main/java/org/apache/accumulo/server/util/upgrade/PreUpgradeCheck.java: ########## @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.util.upgrade; + +import java.util.List; +import java.util.Set; + +import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.fate.ReadOnlyTStore; +import org.apache.accumulo.core.fate.ZooStore; +import org.apache.accumulo.core.fate.zookeeper.ZooUtil; +import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy; +import org.apache.accumulo.core.zookeeper.ZooSession; +import org.apache.accumulo.server.AccumuloDataVersion; +import org.apache.accumulo.server.ServerContext; +import org.apache.accumulo.server.cli.ServerUtilOpts; +import org.apache.accumulo.start.spi.KeywordExecutable; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PreUpgradeCheck implements KeywordExecutable { + + private static final Logger LOG = LoggerFactory.getLogger(PreUpgradeCheck.class); + + @Override + public String keyword() { + return "pre-upgrade"; + } + + @Override + public String description() { + return "Utility that prepares an instance to be upgraded to a new version." + + " This utility must be run before starting any servers."; + } + + @Override + public void execute(String[] args) throws Exception { + + ServerUtilOpts opts = new ServerUtilOpts(); + opts.parseArgs(PreUpgradeCheck.class.getName(), args); + + final ServerContext context = opts.getServerContext(); + + final int persistentVersion = AccumuloDataVersion.getCurrentVersion(context); + final int thisVersion = AccumuloDataVersion.get(); + if (persistentVersion == thisVersion) { + throw new IllegalStateException("Running this utility is unnecessary, this instance" + + " has already been upgraded to version " + thisVersion); + } + + final String zkRoot = context.getZooKeeperRoot(); + final ZooSession zs = context.getZooSession(); + final String prepUpgradePath = context.getZooKeeperRoot() + Constants.ZPREPARE_FOR_UPGRADE; + + if (!zs.asReader().exists(prepUpgradePath)) { + LOG.info("{} node not found in ZooKeeper, 'ZooZap -prepare-for-upgrade' was likely" + + " not run when after shutting down instance for upgrade. Removing" + + " server locks and checking for fate transactions."); + + try { + final String fatePath = zkRoot + Constants.ZFATE; + // Adapted from UpgradeCoordinator.abortIfFateTransactions + final ReadOnlyTStore<PreUpgradeCheck> fate = new ZooStore<>(fatePath, zs); Review Comment: For 4.0 will probably need to copy a minimal amount of code from 2.1/3.1 for reading the transaction ids as the persisted format has changed a lot. These changes are fine for 3.1, the 3.1 code should be able to read the 2.1 fate ids. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
