Github user uncleGen commented on a diff in the pull request: https://github.com/apache/spark/pull/16972#discussion_r102390214 --- Diff: core/src/main/scala/org/apache/spark/storage/DiskStore.scala --- @@ -73,17 +81,52 @@ private[spark] class DiskStore(conf: SparkConf, diskManager: DiskBlockManager) e } def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = { + val bytesToStore = if (serializerManager.encryptionEnabled) { + try { + val data = bytes.toByteBuffer + val in = new ByteBufferInputStream(data, true) + val byteBufOut = new ByteBufferOutputStream(data.remaining()) + val out = CryptoStreamUtils.createCryptoOutputStream(byteBufOut, conf, + serializerManager.encryptionKey.get) + try { + ByteStreams.copy(in, out) + } finally { + in.close() + out.close() + } + new ChunkedByteBuffer(byteBufOut.toByteBuffer) + } finally { + bytes.dispose() + } + } else { + bytes + } + put(blockId) { fileOutputStream => val channel = fileOutputStream.getChannel Utils.tryWithSafeFinally { - bytes.writeFully(channel) + bytesToStore.writeFully(channel) } { channel.close() } } } def getBytes(blockId: BlockId): ChunkedByteBuffer = { + val bytes = readBytes(blockId) + + val in = serializerManager.wrapForEncryption(bytes.toInputStream(dispose = true)) + new ChunkedByteBuffer(ByteBuffer.wrap(IOUtils.toByteArray(in))) --- End diff -- @vanzin After take some to think about it, I find it may perplex the issue if we seperate `MemoryStore` with un-encrypted data and `DiskStore`with encrypted data. As get data from remote, we will encrypt data if it is stored in memory in un-encrypted style. Besides, when we `maybeCacheDiskBytesInMemory`, we will decrypt them again. I've thought about caching disk data into memory in encrypted style, and then decrypt them lazily when used. It makes things much complicated. Maybe, it is better to keep the original style, i.e. keep data encrypted (if can) in memory and disk. We should narrow this problem. Any suggesstion?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org