anoopsjohn commented on a change in pull request #528: HBASE-22890 Verify the files when RegionServer is starting and BucketCache is in file mode URL: https://github.com/apache/hbase/pull/528#discussion_r324975888
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java ########## @@ -1021,41 +1033,48 @@ void doDrain(final List<RAMQueueEntry> entries) throws InterruptedException { private void persistToFile() throws IOException { assert !cacheEnabled; - FileOutputStream fos = null; - ObjectOutputStream oos = null; - try { + try (ObjectOutputStream oos = new ObjectOutputStream( + new FileOutputStream(persistencePath, false))){ if (!ioEngine.isPersistent()) { throw new IOException("Attempt to persist non-persistent cache mappings!"); } - fos = new FileOutputStream(persistencePath, false); - oos = new ObjectOutputStream(fos); + if (ioEngine instanceof PersistentIOEngine) { + oos.write(ProtobufUtil.PB_MAGIC); + byte[] checksum = ((PersistentIOEngine) ioEngine).calculateChecksum(); + oos.writeInt(checksum.length); + oos.write(checksum); + } oos.writeLong(cacheCapacity); oos.writeUTF(ioEngine.getClass().getName()); oos.writeUTF(backingMap.getClass().getName()); oos.writeObject(deserialiserMap); oos.writeObject(backingMap); - } finally { - if (oos != null) oos.close(); - if (fos != null) fos.close(); + } catch (NoSuchAlgorithmException e) { + LOG.error("No such algorithm : " + algorithm + "! Failed to persist data on exit",e); } } @SuppressWarnings("unchecked") - private void retrieveFromFile(int[] bucketSizes) throws IOException, BucketAllocatorException, + private void retrieveFromFile(int[] bucketSizes) throws IOException, ClassNotFoundException { File persistenceFile = new File(persistencePath); if (!persistenceFile.exists()) { return; } assert !cacheEnabled; - FileInputStream fis = null; - ObjectInputStream ois = null; - try { + try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(persistencePath))){ if (!ioEngine.isPersistent()) throw new IOException( "Attempt to restore non-persistent cache mappings!"); - fis = new FileInputStream(persistencePath); - ois = new ObjectInputStream(fis); + // for backward compatibility + if (ioEngine instanceof PersistentIOEngine && + !((PersistentIOEngine) ioEngine).isOldVersion()) { + byte[] PBMagic = new byte[ProtobufUtil.PB_MAGIC.length]; + ois.read(PBMagic); + int length = ois.readInt(); + byte[] persistenceChecksum = new byte[length]; + ois.read(persistenceChecksum); Review comment: May be we need to close and reopen for the read. My biggest worry is where we do this verify. See my below comment. Now we are doing it while creation of FileIOE. If the verify fails, we are not allowing the FileIOE to be created and do its future work. My point is this. We should create the FileIOE. And then the Bucket Cache is trying to retrieve the already cached data from persistent store and for that its recreating the cache meta. At that step we should be doing the verification right. First see whether the checksum for verify is present already and if so verify. If verify ok and then try to recreate the cache meta data. Or else just forget abt that existing persisted cache data and may be do the necessary cleanup. All these work of Bucket Cache. It can ask the FileIOE to do actual verify. But should be initiated by the BucketCache. You get my mind clearly now? Sorry for not saying in detail at 1st step itself. Ya may be we need close the file and reopen in case of old style with no pb magic. Or else consider it as 4 bytes and read next 4 bytes and make out the 8 bytes long number. But that may be having challenges wrt the platform. I dont think it is an issue to just reopen the file if no pbmagic. Comments. @Reidddddd ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services