Repository: spark
Updated Branches:
  refs/heads/branch-2.0 bcf0c51b6 -> 4018c4800


[SPARK-15742][SQL] Reduce temp collections allocations in TreeNode transform 
methods

In Catalyst's TreeNode transform methods we end up calling 
`productIterator.map(...).toArray` in a number of places, which is slightly 
inefficient because it needs to allocate an `ArrayBuilder` and grow a temporary 
array. Since we already know the size of the final output (`productArity`), we 
can simply allocate an array up-front and use a while loop to consume the 
iterator and populate the array.

For most workloads, this performance difference is negligible but it does make 
a measurable difference in optimizer performance for queries that operate over 
very wide schemas (such as the benchmark queries in #13456).

### Perf results (from #13456 benchmarks)

**Before**

```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_66-b17 on Mac OS X 10.10.5
Intel(R) Core(TM) i7-4960HQ CPU  2.60GHz

parsing large select:                    Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
------------------------------------------------------------------------------------------------
1 select expressions                            19 /   22          0.0    
19119858.0       1.0X
10 select expressions                           23 /   25          0.0    
23208774.0       0.8X
100 select expressions                          55 /   73          0.0    
54768402.0       0.3X
1000 select expressions                        229 /  259          0.0   
228606373.0       0.1X
2500 select expressions                        530 /  554          0.0   
529938178.0       0.0X
```

**After**

```
parsing large select:                    Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
------------------------------------------------------------------------------------------------
1 select expressions                            15 /   21          0.0    
14978203.0       1.0X
10 select expressions                           22 /   27          0.0    
22492262.0       0.7X
100 select expressions                          48 /   64          0.0    
48449834.0       0.3X
1000 select expressions                        189 /  208          0.0   
189346428.0       0.1X
2500 select expressions                        429 /  449          0.0   
428943897.0       0.0X
```

###

Author: Josh Rosen <joshro...@databricks.com>

Closes #13484 from JoshRosen/treenode-productiterator-map.

(cherry picked from commit e526913989d6099064886ea3ed3f6a2a0376a4f8)
Signed-off-by: Josh Rosen <joshro...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4018c480
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4018c480
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4018c480

Branch: refs/heads/branch-2.0
Commit: 4018c48009ab479b996ba0ec0a3d0a47c97388ed
Parents: bcf0c51
Author: Josh Rosen <joshro...@databricks.com>
Authored: Fri Jun 3 13:53:02 2016 -0700
Committer: Josh Rosen <joshro...@databricks.com>
Committed: Fri Jun 3 13:53:49 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/plans/QueryPlan.scala    |  6 ++---
 .../spark/sql/catalyst/trees/TreeNode.scala     | 26 +++++++++++++++-----
 2 files changed, 23 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4018c480/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 6784c3a..3de15a9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -172,7 +172,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] 
extends TreeNode[PlanT
       case null => null
     }
 
-    val newArgs = productIterator.map(recursiveTransform).toArray
+    val newArgs = mapProductIterator(recursiveTransform)
 
     if (changed) makeCopy(newArgs).asInstanceOf[this.type] else this
   }
@@ -206,7 +206,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] 
extends TreeNode[PlanT
       case null => null
     }
 
-    val newArgs = productIterator.map(recursiveTransform).toArray
+    val newArgs = mapProductIterator(recursiveTransform)
 
     if (changed) makeCopy(newArgs).asInstanceOf[this.type] else this
   }
@@ -318,7 +318,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] 
extends TreeNode[PlanT
       case other => other
     }
 
-    productIterator.map {
+    mapProductIterator {
       case s: Option[_] => s.map(cleanArg)
       case s: Seq[_] => s.map(cleanArg)
       case m: Map[_, _] => m.mapValues(cleanArg)

http://git-wip-us.apache.org/repos/asf/spark/blob/4018c480/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 50481cd..22d82c6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -21,6 +21,7 @@ import java.util.UUID
 
 import scala.collection.Map
 import scala.collection.mutable.Stack
+import scala.reflect.ClassTag
 
 import org.apache.commons.lang.ClassUtils
 import org.json4s.JsonAST._
@@ -169,11 +170,24 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
   }
 
   /**
+   * Efficient alternative to `productIterator.map(f).toArray`.
+   */
+  protected def mapProductIterator[B: ClassTag](f: Any => B): Array[B] = {
+    val arr = Array.ofDim[B](productArity)
+    var i = 0
+    while (i < arr.length) {
+      arr(i) = f(productElement(i))
+      i += 1
+    }
+    arr
+  }
+
+  /**
    * Returns a copy of this node where `f` has been applied to all the nodes 
children.
    */
   def mapChildren(f: BaseType => BaseType): BaseType = {
     var changed = false
-    val newArgs = productIterator.map {
+    val newArgs = mapProductIterator {
       case arg: TreeNode[_] if containsChild(arg) =>
         val newChild = f(arg.asInstanceOf[BaseType])
         if (newChild fastEquals arg) {
@@ -184,7 +198,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
         }
       case nonChild: AnyRef => nonChild
       case null => null
-    }.toArray
+    }
     if (changed) makeCopy(newArgs) else this
   }
 
@@ -197,7 +211,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
     var changed = false
     val remainingNewChildren = newChildren.toBuffer
     val remainingOldChildren = children.toBuffer
-    val newArgs = productIterator.map {
+    val newArgs = mapProductIterator {
       case s: StructType => s // Don't convert struct types to some other type 
of Seq[StructField]
       // Handle Seq[TreeNode] in TreeNode parameters.
       case s: Seq[_] => s.map {
@@ -237,7 +251,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
         }
       case nonChild: AnyRef => nonChild
       case null => null
-    }.toArray
+    }
 
     if (changed) makeCopy(newArgs) else this
   }
@@ -302,7 +316,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
       rule: PartialFunction[BaseType, BaseType],
       nextOperation: (BaseType, PartialFunction[BaseType, BaseType]) => 
BaseType): BaseType = {
     var changed = false
-    val newArgs = productIterator.map {
+    val newArgs = mapProductIterator {
       case arg: TreeNode[_] if containsChild(arg) =>
         val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
         if (!(newChild fastEquals arg)) {
@@ -353,7 +367,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
       }
       case nonChild: AnyRef => nonChild
       case null => null
-    }.toArray
+    }
     if (changed) makeCopy(newArgs) else this
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to