anoopsjohn commented on a change in pull request #528: HBASE-22890 Verify the files when RegionServer is starting and BucketCache is in file mode URL: https://github.com/apache/hbase/pull/528#discussion_r324573285
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java ########## @@ -19,46 +19,96 @@ package org.apache.hadoop.hbase.io.hfile.bucket; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.ObjectInputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.ClosedByInterruptException; import java.nio.channels.ClosedChannelException; import java.nio.channels.FileChannel; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Arrays; import java.util.concurrent.locks.ReentrantLock; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; /** * IO engine that stores data to a file on the local file system. */ @InterfaceAudience.Private -public class FileIOEngine implements IOEngine { +public class FileIOEngine implements PersistentIOEngine { private static final Log LOG = LogFactory.getLog(FileIOEngine.class); public static final String FILE_DELIMITER = ","; + private static final DuFileCommand DU = new DuFileCommand(new String[] {"du", ""}); + private final String[] filePaths; private final FileChannel[] fileChannels; private final RandomAccessFile[] rafs; private final ReentrantLock[] channelLocks; private final long sizePerFile; private final long capacity; + private final String algorithmName; + private boolean oldVersion; private FileReadAccessor readAccessor = new FileReadAccessor(); private FileWriteAccessor writeAccessor = new FileWriteAccessor(); - public FileIOEngine(long capacity, String... filePaths) throws IOException { + public FileIOEngine(String algorithmName, String persistentPath, + long capacity, String... filePaths) throws IOException { this.sizePerFile = capacity / filePaths.length; this.capacity = this.sizePerFile * filePaths.length; this.filePaths = filePaths; this.fileChannels = new FileChannel[filePaths.length]; this.rafs = new RandomAccessFile[filePaths.length]; this.channelLocks = new ReentrantLock[filePaths.length]; + this.algorithmName = algorithmName; + verifyFileIntegrity(persistentPath); + init(); + } + + /** + * Verify cache files's integrity + * @param persistentPath the backingMap persistent path + */ + @Override + public void verifyFileIntegrity(String persistentPath) { + if (persistentPath != null) { + byte[] persistentChecksum = readPersistentChecksum(persistentPath); + if (!oldVersion) { + try { + byte[] calculateChecksum = calculateChecksum(); + if (!Bytes.equals(persistentChecksum, calculateChecksum)) { + LOG.warn("The persistent checksum is " + Bytes.toString(persistentChecksum) + + ", but the calculate checksum is " + Bytes.toString(calculateChecksum)); + throw new IOException(); Review comment: Actually if the checksum do not match, we can still continue with RS operation. We can not regain the old cached data. But now as this throw IOE happens while construction of the FileIOEngine, we can no longer use the IOEngine itself. That is wrong. One more reason not to do this verify as part of constructor but at a later time as part of retrieve from persisted meta data. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services