anoopsjohn commented on a change in pull request #528: HBASE-22890 Verify the files when RegionServer is starting and BucketCache is in file mode URL: https://github.com/apache/hbase/pull/528#discussion_r324572000
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java ########## @@ -1021,41 +1033,48 @@ void doDrain(final List<RAMQueueEntry> entries) throws InterruptedException { private void persistToFile() throws IOException { assert !cacheEnabled; - FileOutputStream fos = null; - ObjectOutputStream oos = null; - try { + try (ObjectOutputStream oos = new ObjectOutputStream( + new FileOutputStream(persistencePath, false))){ if (!ioEngine.isPersistent()) { throw new IOException("Attempt to persist non-persistent cache mappings!"); } - fos = new FileOutputStream(persistencePath, false); - oos = new ObjectOutputStream(fos); + if (ioEngine instanceof PersistentIOEngine) { + oos.write(ProtobufUtil.PB_MAGIC); + byte[] checksum = ((PersistentIOEngine) ioEngine).calculateChecksum(); + oos.writeInt(checksum.length); + oos.write(checksum); + } oos.writeLong(cacheCapacity); oos.writeUTF(ioEngine.getClass().getName()); oos.writeUTF(backingMap.getClass().getName()); oos.writeObject(deserialiserMap); oos.writeObject(backingMap); - } finally { - if (oos != null) oos.close(); - if (fos != null) fos.close(); + } catch (NoSuchAlgorithmException e) { + LOG.error("No such algorithm : " + algorithm + "! Failed to persist data on exit",e); } } @SuppressWarnings("unchecked") - private void retrieveFromFile(int[] bucketSizes) throws IOException, BucketAllocatorException, + private void retrieveFromFile(int[] bucketSizes) throws IOException, ClassNotFoundException { File persistenceFile = new File(persistencePath); if (!persistenceFile.exists()) { return; } assert !cacheEnabled; - FileInputStream fis = null; - ObjectInputStream ois = null; - try { + try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(persistencePath))){ if (!ioEngine.isPersistent()) throw new IOException( "Attempt to restore non-persistent cache mappings!"); - fis = new FileInputStream(persistencePath); - ois = new ObjectInputStream(fis); + // for backward compatibility + if (ioEngine instanceof PersistentIOEngine && + !((PersistentIOEngine) ioEngine).isOldVersion()) { + byte[] PBMagic = new byte[ProtobufUtil.PB_MAGIC.length]; + ois.read(PBMagic); + int length = ois.readInt(); + byte[] persistenceChecksum = new byte[length]; + ois.read(persistenceChecksum); Review comment: Actually we are reading persistentChecksum twice in this flow. At FileIOE create time as part of verify call and here too. Here we are doing as a skip way. So why can't we do it here only? We have verifyFileIntegrity() in PersistentIOEngine interface and we can call that from here? It looks bit odd. The oldVersion check can be done here also based on he PBMagic matching. isOldVersion() API itself not needed in the FileIOE. We process the persisted meta info here and based on that recreate the backingMap etc here in BC. So knowing whether checksum also persisted and if so verify that all can be here. I mean the actual verify imp can be in FileIOE but the call to that should be from here. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services