cloud-fan commented on a change in pull request #27786: [SPARK-31034][CORE] ShuffleBlockFetcherIterator should always create request for last block group URL: https://github.com/apache/spark/pull/27786#discussion_r387524638
########## File path: core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala ########## @@ -341,32 +341,84 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT assert(blockManager.hostLocalDirManager.get.getCachedHostLocalDirs().size === 1) } - test("fetch continuous blocks in batch respects maxSize and maxBlocks") { + test("fetch continuous blocks in batch should respect maxBytesInFlight") { val blockManager = mock(classOf[BlockManager]) val localBmId = BlockManagerId("test-client", "test-local-host", 1) doReturn(localBmId).when(blockManager).blockManagerId // Make sure remote blocks would return the merged block - val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2) - val remoteBlocks = Seq[BlockId]( + val remoteBmId1 = BlockManagerId("test-client-1", "test-client-1", 1) + val remoteBmId2 = BlockManagerId("test-client-2", "test-client-2", 2) + val remoteBlocks1 = (0 until 15).map(ShuffleBlockId(0, 3, _)) + val remoteBlocks2 = Seq[BlockId](ShuffleBlockId(0, 4, 0), ShuffleBlockId(0, 4, 1)) + val mergedRemoteBlocks = Map[BlockId, ManagedBuffer]( + ShuffleBlockBatchId(0, 3, 0, 3) -> createMockManagedBuffer(), + ShuffleBlockBatchId(0, 3, 3, 6) -> createMockManagedBuffer(), + ShuffleBlockBatchId(0, 3, 6, 9) -> createMockManagedBuffer(), + ShuffleBlockBatchId(0, 3, 9, 12) -> createMockManagedBuffer(), + ShuffleBlockBatchId(0, 3, 12, 15) -> createMockManagedBuffer(), + ShuffleBlockBatchId(0, 4, 0, 2) -> createMockManagedBuffer()) + val transfer = createMockTransfer(mergedRemoteBlocks) + + val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])]( + (remoteBmId1, remoteBlocks1.map(blockId => (blockId, 100L, 1))), + (remoteBmId2, remoteBlocks2.map(blockId => (blockId, 100L, 1)))).toIterator + + val taskContext = TaskContext.empty() + val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics() + val iterator = new ShuffleBlockFetcherIterator( + taskContext, + transfer, + blockManager, + blocksByAddress, + (_, in) => in, + 1500, + Int.MaxValue, + Int.MaxValue, + Int.MaxValue, + true, + false, + metrics, + true) + + var numResults = 0 + // After initialize(), there will be 6 FetchRequests, and the each of the first 5 + // includes 3 merged blocks and the last one has 1 merged block. So, only the Review comment: or do you mean shuffle blocks? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org