Github user rxin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/1907#discussion_r16134921
  
    --- Diff: 
core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala ---
    @@ -262,77 +271,49 @@ object BlockFetcherIterator {
           readMetrics: ShuffleReadMetrics)
         extends BasicBlockFetcherIterator(blockManager, blocksByAddress, 
serializer, readMetrics) {
     
    -    import blockManager._
    +    override protected def sendRequest(req: FetchRequest) {
    +      logDebug("Sending request for %d blocks (%s) from %s".format(
    +        req.blocks.size, Utils.bytesToString(req.size), 
req.address.hostPort))
    +      val cmId = new ConnectionManagerId(req.address.host, 
req.address.port)
     
    -    val fetchRequestsSync = new LinkedBlockingQueue[FetchRequest]
    -
    -    private def startCopiers(numCopiers: Int): List[_ <: Thread] = {
    -      (for ( i <- Range(0,numCopiers) ) yield {
    -        val copier = new Thread {
    -          override def run(){
    -            try {
    -              while(!isInterrupted && !fetchRequestsSync.isEmpty) {
    -                sendRequest(fetchRequestsSync.take())
    -              }
    -            } catch {
    -              case x: InterruptedException => logInfo("Copier Interrupted")
    -              // case _ => throw new SparkException("Exception Throw in 
Shuffle Copier")
    +      bytesInFlight += req.size
    +      val sizeMap = req.blocks.toMap // so we can look up the size of each 
blockID
    +      val client = 
blockManager.diskBlockManager.nettyBlockClientFactory.createClient(
    +        cmId.host, req.address.nettyPort)
    +      val blocks = req.blocks.map(_._1.toString)
    +
    +      client.fetchBlocks(
    +        blocks,
    +        (blockId: String, refBuf: ReferenceCountedBuffer) => {
    +          // Increment the reference count so the buffer won't be recycled.
    +          refBuf.retain()
    +          val buf = refBuf.byteBuffer()
    +          val blockSize = buf.remaining()
    +          val bid = BlockId(blockId)
    +
    +          // TODO: remove code duplication between here and 
BlockManager.dataDeserialization.
    +          results.put(new FetchResult(bid, sizeMap(bid), () => {
    +            def createIterator: Iterator[Any] = {
    +              val stream = blockManager.wrapForCompression(bid, 
refBuf.inputStream())
    +              serializer.newInstance().deserializeStream(stream).asIterator
                 }
    +            new LazyInitIterator(createIterator) {
    +              // Release the buffer when we are done traversing it.
    +              override def close(): Unit = refBuf.release()
    --- End diff --
    
    Note to self: memory leak in error case, when the iterator won't be 
exhausted.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to